From a6dafb9808dbf14b59d73e9557e97e35965e91ae Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Sun, 10 Nov 2024 17:57:09 +0300 Subject: [PATCH 1/3] add more stat --- mteb/abstasks/AbsTaskBitextMining.py | 24 +- mteb/abstasks/AbsTaskClassification.py | 9 +- mteb/abstasks/AbsTaskClustering.py | 16 +- mteb/abstasks/AbsTaskClusteringFast.py | 15 +- mteb/abstasks/AbsTaskInstructionRetrieval.py | 62 +- .../AbsTaskMultilabelClassification.py | 18 +- mteb/abstasks/AbsTaskPairClassification.py | 30 +- mteb/abstasks/AbsTaskReranking.py | 43 +- mteb/abstasks/AbsTaskRetrieval.py | 30 +- mteb/abstasks/AbsTaskSTS.py | 24 +- mteb/abstasks/AbsTaskSummarization.py | 51 +- .../BitextMining/BornholmBitextMining.json | 4 + .../BitextMining/IN22ConvBitextMining.json | 2028 +++++++ .../NusaTranslationBitextMining.json | 48 + .../BitextMining/PhincBitextMining.json | 24 + .../LanguageClassification.json | 2 + .../SlovakHateSpeechClassification.json | 2 + .../ArXivHierarchicalClusteringP2P.json | 4 + .../Clustering/BiorxivClusteringS2S.json | 4 + .../RuSciBenchGRNTIClusteringP2P.json | 4 + .../Clustering/WikiClusteringP2P.json | 60 + .../Core17InstructionRetrieval.json | 14 +- .../CEDRClassification.json | 4 + .../MultiEURLEXMultilabelClassification.json | 96 + .../PawsXPairClassification.json | 128 +- .../PairClassification/TwitterURLCorpus.json | 8 +- .../PairClassification/XNLI.json | 240 +- .../Reranking/AskUbuntuDupQuestions.json | 10 +- .../Reranking/ESCIReranking.json | 40 +- .../WikipediaRerankingMultilingual.json | 170 +- .../Retrieval/AppsRetrieval.json | 14 +- .../Retrieval/BelebeleRetrieval.json | 5276 ++++++++++++----- .../Retrieval/COIRCodeSearchNetRetrieval.json | 96 +- .../Retrieval/CodeEditSearchRetrieval.json | 194 +- .../Retrieval/CodeFeedbackMT.json | 14 +- .../Retrieval/CodeFeedbackST.json | 14 +- .../Retrieval/CodeSearchNetCCRetrieval.json | 96 +- .../Retrieval/CodeSearchNetRetrieval.json | 96 +- .../Retrieval/CodeTransOceanContest.json | 14 +- .../Retrieval/CodeTransOceanDL.json | 14 +- mteb/descriptive_stats/Retrieval/CosQA.json | 14 +- .../Retrieval/JaqketRetrieval.json | 14 +- .../Retrieval/StackOverflowQA.json | 14 +- .../Retrieval/SyntheticText2SQL.json | 14 +- .../Retrieval/Touche2020.json | 14 +- .../Retrieval/Touche2020Retrieval.v3.json | 14 +- ...lowIRCrossLingualInstructionRetrieval.json | 54 +- .../mFollowIRInstructionRetrieval.json | 54 +- mteb/descriptive_stats/STS/STS12.json | 8 +- mteb/descriptive_stats/STS/STS17.json | 94 +- .../Summarization/SummEval.json | 50 +- 51 files changed, 7456 insertions(+), 1928 deletions(-) create mode 100644 mteb/descriptive_stats/BitextMining/PhincBitextMining.json diff --git a/mteb/abstasks/AbsTaskBitextMining.py b/mteb/abstasks/AbsTaskBitextMining.py index 00a9160b9..68d20aea7 100644 --- a/mteb/abstasks/AbsTaskBitextMining.py +++ b/mteb/abstasks/AbsTaskBitextMining.py @@ -21,14 +21,26 @@ class BitextDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + + min_sentence1_length: Minimum length of sentence1 average_sentence1_length: Average length of sentence1 + max_sentence1_length: Maximum length of sentence1 + + min_sentence2_length: Minimum length of sentence2 average_sentence2_length: Average length of sentence2 + max_sentence2_length: Maximum length of sentence2 """ num_samples: int number_of_characters: int + + min_sentence1_length: int average_sentence1_length: float + max_sentence1_length: int + + min_sentence2_length: int average_sentence2_length: float + max_sentence2_length: int class AbsTaskBitextMining(AbsTask): @@ -153,12 +165,18 @@ def _calculate_metrics_from_split( sent_1, sent_2 = pairs_cols[0] sentence1 = self.dataset[split][sent_1] sentence2 = self.dataset[split][sent_2] - total_s1_len = sum([len(s1) for s1 in sentence1]) - total_s2_len = sum([len(s2) for s2 in sentence2]) + s1_len = [len(s1) for s1 in sentence1] + s2_len = [len(s2) for s2 in sentence2] + total_s1_len = sum(s1_len) + total_s2_len = sum(s2_len) return BitextDescriptiveStatistics( - average_sentence1_length=total_s1_len / len(sentence1), + min_sentence1_length=min(s1_len), + average_sentence1_length=sum(s1_len) / len(sentence1), + max_sentence1_length=max(s1_len), + min_sentence2_length=min(s2_len), average_sentence2_length=total_s2_len / len(sentence2), + max_sentence2_length=max(s2_len), num_samples=len(sentence1), number_of_characters=total_s1_len + total_s2_len, ) diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index 1367fd3ed..7c8556004 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -26,14 +26,18 @@ class ClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + min_text_length: int average_text_length: float + max_text_length: int unique_labels: int labels: dict[str, dict[str, int]] @@ -219,12 +223,15 @@ def _calculate_metrics_from_split( text = self.dataset[split]["text"] label = self.dataset[split]["label"] - total_text_len = sum([len(t) for t in text]) + text_len = [len(t) for t in text] + total_text_len = sum(text_len) label_count = Counter(label) return ClassificationDescriptiveStatistics( num_samples=len(text), number_of_characters=total_text_len, + min_text_length=min(text_len), average_text_length=total_text_len / len(text), + max_text_length=max(text_len), unique_labels=len(label_count), labels={ str(label): {"count": count} for label, count in label_count.items() diff --git a/mteb/abstasks/AbsTaskClustering.py b/mteb/abstasks/AbsTaskClustering.py index 7f2c94e14..bc91081a2 100644 --- a/mteb/abstasks/AbsTaskClustering.py +++ b/mteb/abstasks/AbsTaskClustering.py @@ -24,7 +24,9 @@ class ClusteringDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text average_labels_per_text: Average number of labels per text unique_labels: Number of unique labels labels: dict of label frequencies @@ -32,8 +34,15 @@ class ClusteringDescriptiveStatistics(DescriptiveStatistics): num_samples: int number_of_characters: int + + min_text_length: int average_text_length: float + max_text_length: int + + min_labels_per_text: int average_labels_per_text: float + max_labels_per_text: int + unique_labels: int labels: dict[str, dict[str, int]] @@ -96,7 +105,8 @@ def _calculate_metrics_from_split( sentences = self.dataset[split]["sentences"] labels = self.dataset[split]["labels"] - total_text_len = sum([len(t) for t in sentences]) + text_len = [len(t) for t in sentences] + total_text_len = sum(text_len) total_labels = [] for label in labels: if isinstance(label, list): @@ -107,8 +117,12 @@ def _calculate_metrics_from_split( return ClusteringDescriptiveStatistics( num_samples=len(sentences), number_of_characters=total_text_len, + min_text_length=min(text_len), average_text_length=total_text_len / len(sentences), + max_text_length=max(text_len), + min_labels_per_text=min(label_counter.values()), average_labels_per_text=len(total_labels) / len(sentences), + max_labels_per_text=max(label_counter.values()), unique_labels=len(label_counter), labels={ str(label): { diff --git a/mteb/abstasks/AbsTaskClusteringFast.py b/mteb/abstasks/AbsTaskClusteringFast.py index fedf392f7..9f462358f 100644 --- a/mteb/abstasks/AbsTaskClusteringFast.py +++ b/mteb/abstasks/AbsTaskClusteringFast.py @@ -85,16 +85,24 @@ class ClusteringFastDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text + min_labels_per_text: Minimum number of labels per text average_labels_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + min_text_length: int average_text_length: float + max_text_length: int + min_labels_per_text: int average_labels_per_text: float + max_labels_per_text: int unique_labels: int labels: dict[str, dict[str, int]] @@ -226,7 +234,8 @@ def _calculate_metrics_from_split( sentences = self.dataset[split]["sentences"] labels = self.dataset[split]["labels"] - total_text_len = sum([len(t) for t in sentences]) + text_len = [len(t) for t in sentences] + total_text_len = sum(text_len) total_labels = [] for label in labels: if isinstance(label, list): @@ -237,8 +246,12 @@ def _calculate_metrics_from_split( return ClusteringFastDescriptiveStatistics( num_samples=len(sentences), number_of_characters=total_text_len, + min_text_length=min(text_len), average_text_length=total_text_len / len(sentences), + max_text_length=max(text_len), + min_labels_per_text=min(label_counter.values()), average_labels_per_text=len(total_labels) / len(sentences), + max_labels_per_text=max(label_counter.values()), unique_labels=len(label_counter), labels={ str(label): { diff --git a/mteb/abstasks/AbsTaskInstructionRetrieval.py b/mteb/abstasks/AbsTaskInstructionRetrieval.py index bdbe5cd6c..e3257974a 100644 --- a/mteb/abstasks/AbsTaskInstructionRetrieval.py +++ b/mteb/abstasks/AbsTaskInstructionRetrieval.py @@ -37,6 +37,7 @@ def __init__( qrels_file: str = "", streaming: bool = False, keep_in_memory: bool = False, + trust_remote_code: bool = False, ): self.corpus = {} self.queries = {} @@ -69,6 +70,8 @@ def __init__( self.qrels_file = qrels_file self.streaming = streaming self.keep_in_memory = keep_in_memory + self.trust_remote_code = trust_remote_code + def load( self, split="test" @@ -227,24 +230,48 @@ class InstructionRetrievalDescriptiveStatistics(DescriptiveStatistics): num_queries: Number of queries num_docs: Number of documents number_of_characters: Total number of symbols in the dataset + min_document_length: Minimum length of documents average_document_length: Average length of documents + max_document_length: Maximum length of documents + min_query_length: Minimum length of queries average_query_length: Average length of queries + max_query_length: Maximum length of queries + min_instruction_length: Minimum length of instructions average_instruction_length: Average length of instructions + max_instruction_length: Maximum length of instructions + min_changed_instruction_length: Minimum length of changed instructions average_changed_instruction_length: Average length of changed instructions + max_changed_instruction_length: Maximum length of changed instructions + min_average_relevant_docs_per_query: Minimum number of relevant docs per query average_relevant_docs_per_query: Average number of relevant docs per query + max_average_relevant_docs_per_query: Maximum number of relevant docs per query + min_average_top_ranked_per_query: Minimum number of top ranked docs per query average_top_ranked_per_query: Average number of top ranked docs per query + max_average_top_ranked_per_query: Maximum number of top ranked docs per query """ num_samples: int num_queries: int num_docs: int number_of_characters: int + min_document_length: int average_document_length: float + max_document_length: int + min_query_length: int average_query_length: float + max_query_length: int + min_instruction_length: int average_instruction_length: float + max_instruction_length: int + min_changed_instruction_length: int average_changed_instruction_length: float + max_changed_instruction_length: int + min_average_relevant_docs_per_query: float average_relevant_docs_per_query: float + max_average_relevant_docs_per_query: float + min_average_top_ranked_per_query: float average_top_ranked_per_query: float + max_average_top_ranked_per_query: float class AbsTaskInstructionRetrieval(AbsTask): @@ -665,23 +692,28 @@ def _calculate_metrics_from_split( changed_instructions = self.changed_instructions[split] top_ranked = self.top_ranked[split] + corpus_len = [len(doc.get("title", "")) + len(doc["text"]) for doc in corpus.values()] total_corpus_len = sum( - [len(doc.get("title", "")) + len(doc["text"]) for doc in corpus.values()] - ) - total_queries_len = sum([len(query) for query in queries.values()]) - total_instructions_len = sum( - [len(instruction) for instruction in og_instructions.values()] + corpus_len ) + queries_len = [len(query) for query in queries.values()] + total_queries_len = sum(queries_len) + instructions_len = [len(instruction) for instruction in og_instructions.values()] + total_instructions_len = sum(instructions_len) + changed_instructions_len = [len(instruction) for instruction in changed_instructions.values()] total_changed_instructions_len = sum( - [len(instruction) for instruction in changed_instructions.values()] + changed_instructions_len ) + qrels_non_zero = [ + sum(1 for doc_id in docs if docs[doc_id] != 0) for docs in relevant_docs.values() + ] num_qrels_non_zero = sum( - sum(1 for doc_id in docs if docs[doc_id] != 0) - for docs in relevant_docs.values() + qrels_non_zero ) qrels_per_doc = num_qrels_non_zero / len(relevant_docs) if len(queries) else 0 + ranked_per_query = [len(docs) for docs in top_ranked.values()] top_ranked_per_query = ( - sum(len(docs) for docs in top_ranked.values()) / len(queries) + sum(ranked_per_query) / len(queries) if len(queries) else 0 ) @@ -693,20 +725,32 @@ def _calculate_metrics_from_split( + total_queries_len + total_instructions_len + total_changed_instructions_len, + min_document_length=min(corpus_len), average_document_length=( total_corpus_len / len(corpus) if len(corpus) else 0 ), + max_document_length=max(corpus_len), + min_query_length=min(queries_len), average_query_length=( total_queries_len / len(queries) if len(queries) else 0 ), + max_query_length=max(queries_len), + min_instruction_length=min(instructions_len), average_instruction_length=( total_instructions_len / len(queries) if len(queries) else 0 ), + max_instruction_length=max(instructions_len), + min_changed_instruction_length=min(changed_instructions_len), average_changed_instruction_length=( total_changed_instructions_len / len(queries) if len(queries) else 0 ), + max_changed_instruction_length=max(changed_instructions_len), + min_average_relevant_docs_per_query=min(qrels_non_zero), average_relevant_docs_per_query=qrels_per_doc, + max_average_relevant_docs_per_query=max(qrels_non_zero), + min_average_top_ranked_per_query=min(ranked_per_query), average_top_ranked_per_query=top_ranked_per_query, + max_average_top_ranked_per_query=max(ranked_per_query), ) diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index afcd42737..b67c54474 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -47,16 +47,24 @@ class MultilabelClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text + min_labels_per_text: Minimum number of labels per text average_label_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + min_text_length: int average_text_length: float + max_text_length: int + min_labels_per_text: int average_label_per_text: float + max_labels_per_text: int unique_labels: int labels: dict[str, dict[str, int]] @@ -242,16 +250,22 @@ def _calculate_metrics_from_split( text = self.dataset[split]["text"] label = self.dataset[split]["label"] - total_text_len = sum(len(t) for t in text) - total_label_len = sum(len(l) for l in label) + text_len = [len(t) for t in text] + total_text_len = sum(text_len) + label_len = [len(l) for l in label] + total_label_len = sum(label_len) total_labels = [] for l in label: total_labels.extend(l if len(l) > 0 else [None]) label_count = Counter(total_labels) return MultilabelClassificationDescriptiveStatistics( + min_text_length=min(text_len), average_text_length=total_text_len / len(text), + max_text_length=max(text_len), number_of_characters=total_text_len, + min_labels_per_text=min(label_len), average_label_per_text=total_label_len / len(label), + max_labels_per_text=max(label_len), num_samples=len(text), unique_labels=len(label_count), labels={ diff --git a/mteb/abstasks/AbsTaskPairClassification.py b/mteb/abstasks/AbsTaskPairClassification.py index 0cbdafda8..7f140d5ee 100644 --- a/mteb/abstasks/AbsTaskPairClassification.py +++ b/mteb/abstasks/AbsTaskPairClassification.py @@ -20,16 +20,24 @@ class PairClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. - avg_sentence1_len: Average length of sentence1 - avg_sentence2_len: Average length of sentence2 + min_sentence1_length: Minimum length of sentence1 + avg_sentence1_length: Average length of sentence1 + max_sentence1_length: Maximum length of sentence1 + min_sentence2_length: Minimum length of sentence2 + avg_sentence2_length: Average length of sentence2 + max_sentence2_length: Maximum length of sentence2 unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int - avg_sentence1_len: float - avg_sentence2_len: float + min_sentence1_length: int + avg_sentence1_length: float + max_sentence1_length: int + min_sentence2_length: int + avg_sentence2_length: float + max_sentence2_length: int unique_labels: int labels: dict[str, dict[str, int]] @@ -109,14 +117,20 @@ def _calculate_metrics_from_split( dataset["labels"][0] if len(dataset["labels"]) == 1 else dataset["labels"] ) - total_sentence1_len = sum([len(sentence) for sentence in sentence1]) - total_sentence2_len = sum([len(sentence) for sentence in sentence2]) + sentence1_len = [len(sentence) for sentence in sentence1] + total_sentence1_len = sum(sentence1_len) + sentence2_len = [len(sentence) for sentence in sentence2] + total_sentence2_len = sum(sentence2_len) label_count = Counter(labels) return PairClassificationDescriptiveStatistics( num_samples=len(sentence1), number_of_characters=total_sentence1_len + total_sentence2_len, - avg_sentence1_len=total_sentence1_len / len(sentence1), - avg_sentence2_len=total_sentence2_len / len(sentence2), + min_sentence1_length=min(sentence1_len), + avg_sentence1_length=total_sentence1_len / len(sentence1), + max_sentence1_length=max(sentence1_len), + min_sentence2_length=min(sentence2_len), + avg_sentence2_length=total_sentence2_len / len(sentence2), + max_sentence2_length=max(sentence2_len), unique_labels=len(set(labels)), labels={ str(label): {"count": count} for label, count in label_count.items() diff --git a/mteb/abstasks/AbsTaskReranking.py b/mteb/abstasks/AbsTaskReranking.py index 3703b5a3c..9f2b40b3c 100644 --- a/mteb/abstasks/AbsTaskReranking.py +++ b/mteb/abstasks/AbsTaskReranking.py @@ -20,18 +20,30 @@ class RerankingDescriptiveStatistics(DescriptiveStatistics): number_of_characters: Total number of symbols in the dataset. num_positive: Number of positive examples num_negative: Number of negative examples - avg_query_len: Average length of queries - avg_positive_len: Average length of positive examples - avg_negative_len: Average length of negative examples + min_query_length: Minimum length of queries + avg_query_length: Average length of queries + max_query_length: Maximum length of queries + min_positive_length: Minimum length of positive examples + avg_positive_length: Average length of positive examples + max_positive_length: Maximum length of positive examples + min_negative_length: Minimum length of negative examples + avg_negative_length: Average length of negative examples + max_negative_length: Maximum length of negative examples """ num_samples: int number_of_characters: int num_positive: int num_negative: int - avg_query_len: float - avg_positive_len: float - avg_negative_len: float + min_query_length: int + avg_query_length: float + max_query_length: int + min_positive_length: int + avg_positive_length: float + max_positive_length: int + min_negative_length: int + avg_negative_length: float + max_negative_length: int class AbsTaskReranking(AbsTask): @@ -98,9 +110,12 @@ def _calculate_metrics_from_split( positive = transform_reranking_data(self.dataset[split]["positive"]) negative = transform_reranking_data(self.dataset[split]["negative"]) - total_len_query = sum([len(q) for q in query]) - total_len_positive = sum([len(p) for p in positive]) - total_len_negative = sum([len(n) for n in negative]) + len_query = [len(q) for q in query] + total_len_query = sum(len_query) + len_positive = [len(p) for p in positive] + total_len_positive = sum(len_positive) + len_negative = [len(n) for n in negative] + total_len_negative = sum(len_negative) return RerankingDescriptiveStatistics( num_samples=len(query), number_of_characters=total_len_query @@ -108,9 +123,13 @@ def _calculate_metrics_from_split( + total_len_negative, num_positive=len(positive), num_negative=len(negative), - avg_query_len=total_len_query / len(query), - avg_positive_len=total_len_positive / len(positive), - avg_negative_len=total_len_negative / len(negative), + min_query_length=min(len_query), + avg_query_length=total_len_query / len(query), + max_query_length=max(len_query), + min_positive_length=min(len_positive), + avg_positive_length=total_len_positive / len(positive), + max_positive_length=max(len_positive), + avg_negative_length=total_len_negative / len(negative), ) diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index 078979b6b..18300b0c4 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -206,18 +206,30 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): num_queries: number of queries in the dataset num_documents: Number of documents number_of_characters: Total number of symbols in the dataset + min_document_length: Minimum length of documents average_document_length: Average length of documents + max_document_length: Maximum length of documents + min_query_length: Minimum length of queries average_query_length: Average length of queries + max_query_length: Maximum length of queries + min_relevant_docs_per_query: Minimum number of relevant documents per query average_relevant_docs_per_query: Average number of relevant documents per query + max_relevant_docs_per_query: Maximum number of relevant documents per query """ num_samples: int num_queries: int num_documents: int number_of_characters: int + min_document_length: int average_document_length: float + max_document_length: int + min_query_length: int average_query_length: float + max_query_length: int + min_relevant_docs_per_query: int average_relevant_docs_per_query: float + max_relevant_docs_per_query: int class AbsTaskRetrieval(AbsTask): @@ -443,19 +455,25 @@ def _calculate_metrics_from_split( ) qrels_per_doc = num_qrels_non_zero / len(relevant_docs) if num_queries else 0 return RetrievalDescriptiveStatistics( - number_of_characters=query_len + doc_len, + number_of_characters=sum(query_len) + sum(doc_len), num_samples=num_documents + num_queries, num_queries=num_queries, num_documents=num_documents, - average_document_length=doc_len / num_documents, - average_query_length=query_len / num_queries, + min_document_length=min(doc_len), + average_document_length=sum(doc_len) / num_documents, + max_document_length=max(doc_len), + min_query_length=min(query_len), + average_query_length=sum(query_len) / num_queries, + max_query_length=max(query_len), + min_relevant_docs_per_query=qrels_per_doc, average_relevant_docs_per_query=qrels_per_doc, + max_relevant_docs_per_query=qrels_per_doc, ) def calculate_length( queries: dict[str, str], corpus: dict[str, str] -) -> tuple[int, int]: +) -> tuple[list[int], list[int]]: queries_lens = [] doc_lens = [] for query in queries.values(): @@ -467,9 +485,7 @@ def calculate_length( for doc in corpus.values(): doc_lens.append(len(doc)) - doc_len = sum(doc_lens) / len(doc_lens) if doc_lens else 0 - query_len = sum(queries_lens) / len(queries_lens) if queries_lens else 0 - return query_len, doc_len + return doc_lens, queries_lens def process_docs( diff --git a/mteb/abstasks/AbsTaskSTS.py b/mteb/abstasks/AbsTaskSTS.py index c9fa896b6..88a0df87e 100644 --- a/mteb/abstasks/AbsTaskSTS.py +++ b/mteb/abstasks/AbsTaskSTS.py @@ -17,16 +17,28 @@ class STSDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_sentence1_length: Minimum length of sentence1 average_sentence1_len: Average length of sentence1 + max_sentence1_length: Maximum length of sentence1 + min_sentence2_length: Minimum length of sentence2 average_sentence2_len: Average length of sentence2 + max_sentence2_length: Maximum length of sentence2 + min_score: Minimum score avg_score: Average score + max_score: Maximum score """ num_samples: int number_of_characters: int + min_sentence1_length: int average_sentence1_len: float + max_sentence1_length: int + min_sentence2_length: int average_sentence2_len: float + max_sentence2_length: int + min_score: float avg_score: float + max_score: float class AbsTaskSTS(AbsTask): @@ -93,13 +105,21 @@ def _calculate_metrics_from_split( sentence2 = self.dataset[split]["sentence2"] score = self.dataset[split]["score"] - total_sentence1_len = sum([len(s) for s in sentence1]) - total_sentence2_len = sum([len(s) for s in sentence2]) + sentence1_len = [len(s) for s in sentence1] + sentence2_len = [len(s) for s in sentence2] + total_sentence1_len = sum(sentence1_len) + total_sentence2_len = sum(sentence2_len) avg_score = sum(score) / len(score) return STSDescriptiveStatistics( num_samples=len(sentence1), number_of_characters=total_sentence1_len + total_sentence2_len, + min_sentence1_length=min(sentence1_len), average_sentence1_len=total_sentence1_len / len(sentence1), + max_sentence1_length=max(sentence1_len), + min_sentence2_length=min(sentence2_len), average_sentence2_len=total_sentence2_len / len(sentence2), + max_sentence2_length=max(sentence2_len), + min_score=min(score), avg_score=avg_score, + max_score=max(score), ) diff --git a/mteb/abstasks/AbsTaskSummarization.py b/mteb/abstasks/AbsTaskSummarization.py index 6d792c319..9cc705f13 100644 --- a/mteb/abstasks/AbsTaskSummarization.py +++ b/mteb/abstasks/AbsTaskSummarization.py @@ -21,18 +21,34 @@ class SummarizationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. - avg_text_len: Average length of text - avg_human_summaries_len: Average length of human summaries - avg_machine_summaries_len: Average length of machine summaries + min_text_length: Minimum length of text + avg_text_length: Average length of text + max_text_length: Maximum length of text + min_human_summaries_length: Minimum length of human summaries + avg_human_summaries_length: Average length of human summaries + max_human_summaries_length: Maximum length of human summaries + min_machine_summaries_length: Minimum length of machine summaries + avg_machine_summaries_length: Average length of machine summaries + max_machine_summaries_length: Maximum length of machine + min_relevance: Minimum relevance score avg_relevance: Average relevance score + max_relevance: Maximum relevance score """ num_samples: int number_of_characters: int - avg_text_len: float - avg_human_summaries_len: float - avg_machine_summaries_len: float + min_text_length: int + avg_text_length: float + max_text_length: int + min_human_summaries_length: int + avg_human_summaries_length: float + max_human_summaries_length: int + min_machine_summaries_length: int + avg_machine_summaries_length: float + max_machine_summaries_length: int + min_relevance: float avg_relevance: float + max_relevance: float class AbsTaskSummarization(AbsTask): @@ -112,17 +128,28 @@ def _calculate_metrics_from_split( machine_summaries = self.dataset[split]["machine_summaries"] relevance = self.dataset[split]["relevance"] - total_text_len = sum(len(x) for x in text) - total_human_summaries_len = sum(len(x) for x in human_summaries) - total_machine_summaries_len = sum(len(x) for x in machine_summaries) + text_len = [len(t) for t in text] + total_text_len = sum(text_len) + human_summaries_len = [len(s) for s in human_summaries] + total_human_summaries_len = sum(human_summaries_len) + machine_summaries_len = [len(s) for s in machine_summaries] + total_machine_summaries_len = sum(machine_summaries_len) total_relevance = sum(sum(x) / len(x) for x in relevance) return SummarizationDescriptiveStatistics( num_samples=len(text), number_of_characters=total_text_len + total_human_summaries_len + total_machine_summaries_len, - avg_text_len=total_text_len / len(text), - avg_human_summaries_len=total_human_summaries_len / len(text), - avg_machine_summaries_len=total_machine_summaries_len / len(text), + min_text_length=min(text_len), + avg_text_length=total_text_len / len(text), + max_text_length=max(text_len), + min_human_summaries_length=min(human_summaries_len), + avg_human_summaries_length=total_human_summaries_len / len(text), + max_human_summaries_length=max(human_summaries_len), + min_machine_summaries_length=min(machine_summaries_len), + avg_machine_summaries_length=total_machine_summaries_len / len(text), + max_machine_summaries_length=max(machine_summaries_len), + min_relevance=min(relevance), avg_relevance=total_relevance / len(relevance), + max_relevance=max(relevance), ) diff --git a/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json b/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json index 131c9966a..a6fcf5600 100644 --- a/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json @@ -1,7 +1,11 @@ { "test": { + "min_sentence1_length": 1, "average_sentence1_length": 49.834, + "max_sentence1_length": 555, + "min_sentence2_length": 5, "average_sentence2_length": 38.888, + "max_sentence2_length": 453, "num_samples": 500, "number_of_characters": 44361 } diff --git a/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json b/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json index 507d9ad7b..1435fa3f1 100644 --- a/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json @@ -1,3043 +1,5071 @@ { "test": { + "min_sentence1_length": 3, "average_sentence1_length": 54.32948595562498, + "max_sentence1_length": 239, + "min_sentence2_length": 3, "average_sentence2_length": 54.32948595562498, + "max_sentence2_length": 239, "num_samples": 760518, "number_of_characters": 82637104, "hf_subset_descriptive_stats": { "asm_Beng-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 155988 }, "asm_Beng-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 162044 }, "asm_Beng-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 167032 }, "asm_Beng-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 160716 }, "asm_Beng-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 156282 }, "asm_Beng-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 158269 }, "asm_Beng-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 159964 }, "asm_Beng-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 165177 }, "asm_Beng-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 164681 }, "asm_Beng-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 162408 }, "asm_Beng-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 172838 }, "asm_Beng-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 162747 }, "asm_Beng-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 157316 }, "asm_Beng-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 160906 }, "asm_Beng-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 164223 }, "asm_Beng-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 160201 }, "asm_Beng-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 158093 }, "asm_Beng-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 169379 }, "asm_Beng-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 162623 }, "asm_Beng-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 174866 }, "asm_Beng-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 157690 }, "asm_Beng-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 161305 }, "ben_Beng-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 155988 }, "ben_Beng-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 156448 }, "ben_Beng-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 161436 }, "ben_Beng-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 155120 }, "ben_Beng-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 150686 }, "ben_Beng-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 152673 }, "ben_Beng-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 154368 }, "ben_Beng-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 159581 }, "ben_Beng-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 159085 }, "ben_Beng-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 156812 }, "ben_Beng-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 167242 }, "ben_Beng-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 157151 }, "ben_Beng-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 151720 }, "ben_Beng-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 155310 }, "ben_Beng-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 158627 }, "ben_Beng-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 154605 }, "ben_Beng-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 152497 }, "ben_Beng-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 163783 }, "ben_Beng-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 157027 }, "ben_Beng-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 169270 }, "ben_Beng-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 152094 }, "ben_Beng-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 155709 }, "brx_Deva-asm_Beng": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 162044 }, "brx_Deva-ben_Beng": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 156448 }, "brx_Deva-doi_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 167492 }, "brx_Deva-eng_Latn": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 161176 }, "brx_Deva-gom_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 156742 }, "brx_Deva-guj_Gujr": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 158729 }, "brx_Deva-hin_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 160424 }, "brx_Deva-kan_Knda": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 165637 }, "brx_Deva-kas_Arab": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 165141 }, "brx_Deva-mai_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 162868 }, "brx_Deva-mal_Mlym": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 173298 }, "brx_Deva-mar_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 163207 }, "brx_Deva-mni_Mtei": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 157776 }, "brx_Deva-npi_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 161366 }, "brx_Deva-ory_Orya": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 164683 }, "brx_Deva-pan_Guru": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 160661 }, "brx_Deva-san_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 158553 }, "brx_Deva-sat_Olck": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 169839 }, "brx_Deva-snd_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 163083 }, "brx_Deva-tam_Taml": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 175326 }, "brx_Deva-tel_Telu": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 158150 }, "brx_Deva-urd_Arab": { + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 161765 }, "doi_Deva-asm_Beng": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 167032 }, "doi_Deva-ben_Beng": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 161436 }, "doi_Deva-brx_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 167492 }, "doi_Deva-eng_Latn": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 166164 }, "doi_Deva-gom_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 161730 }, "doi_Deva-guj_Gujr": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 163717 }, "doi_Deva-hin_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 165412 }, "doi_Deva-kan_Knda": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 170625 }, "doi_Deva-kas_Arab": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 170129 }, "doi_Deva-mai_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 167856 }, "doi_Deva-mal_Mlym": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 178286 }, "doi_Deva-mar_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 168195 }, "doi_Deva-mni_Mtei": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 162764 }, "doi_Deva-npi_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 166354 }, "doi_Deva-ory_Orya": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 169671 }, "doi_Deva-pan_Guru": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 165649 }, "doi_Deva-san_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 163541 }, "doi_Deva-sat_Olck": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 174827 }, "doi_Deva-snd_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 168071 }, "doi_Deva-tam_Taml": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 180314 }, "doi_Deva-tel_Telu": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 163138 }, "doi_Deva-urd_Arab": { + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 166753 }, "eng_Latn-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 160716 }, "eng_Latn-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 155120 }, "eng_Latn-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 161176 }, "eng_Latn-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 166164 }, "eng_Latn-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 155414 }, "eng_Latn-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 157401 }, "eng_Latn-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 159096 }, "eng_Latn-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 164309 }, "eng_Latn-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 163813 }, "eng_Latn-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 161540 }, "eng_Latn-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 171970 }, "eng_Latn-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 161879 }, "eng_Latn-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 156448 }, "eng_Latn-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 160038 }, "eng_Latn-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 163355 }, "eng_Latn-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 159333 }, "eng_Latn-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 157225 }, "eng_Latn-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 168511 }, "eng_Latn-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 161755 }, "eng_Latn-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 173998 }, "eng_Latn-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 156822 }, "eng_Latn-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 160437 }, "gom_Deva-asm_Beng": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 156282 }, "gom_Deva-ben_Beng": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 150686 }, "gom_Deva-brx_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 156742 }, "gom_Deva-doi_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 161730 }, "gom_Deva-eng_Latn": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 155414 }, "gom_Deva-guj_Gujr": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 152967 }, "gom_Deva-hin_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 154662 }, "gom_Deva-kan_Knda": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 159875 }, "gom_Deva-kas_Arab": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 159379 }, "gom_Deva-mai_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 157106 }, "gom_Deva-mal_Mlym": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 167536 }, "gom_Deva-mar_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 157445 }, "gom_Deva-mni_Mtei": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 152014 }, "gom_Deva-npi_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 155604 }, "gom_Deva-ory_Orya": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 158921 }, "gom_Deva-pan_Guru": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 154899 }, "gom_Deva-san_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 152791 }, "gom_Deva-sat_Olck": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 164077 }, "gom_Deva-snd_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 157321 }, "gom_Deva-tam_Taml": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 169564 }, "gom_Deva-tel_Telu": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 152388 }, "gom_Deva-urd_Arab": { + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 156003 }, "guj_Gujr-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 158269 }, "guj_Gujr-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 152673 }, "guj_Gujr-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 158729 }, "guj_Gujr-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 163717 }, "guj_Gujr-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 157401 }, "guj_Gujr-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 152967 }, "guj_Gujr-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 156649 }, "guj_Gujr-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 161862 }, "guj_Gujr-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 161366 }, "guj_Gujr-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 159093 }, "guj_Gujr-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 169523 }, "guj_Gujr-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 159432 }, "guj_Gujr-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 154001 }, "guj_Gujr-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 157591 }, "guj_Gujr-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 160908 }, "guj_Gujr-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 156886 }, "guj_Gujr-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 154778 }, "guj_Gujr-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 166064 }, "guj_Gujr-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 159308 }, "guj_Gujr-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 171551 }, "guj_Gujr-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 154375 }, "guj_Gujr-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 157990 }, "hin_Deva-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 159964 }, "hin_Deva-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 154368 }, "hin_Deva-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 160424 }, "hin_Deva-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 165412 }, "hin_Deva-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 159096 }, "hin_Deva-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 154662 }, "hin_Deva-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 156649 }, "hin_Deva-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 163557 }, "hin_Deva-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 163061 }, "hin_Deva-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 160788 }, "hin_Deva-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 171218 }, "hin_Deva-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 161127 }, "hin_Deva-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 155696 }, "hin_Deva-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 159286 }, "hin_Deva-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 162603 }, "hin_Deva-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 158581 }, "hin_Deva-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 156473 }, "hin_Deva-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 167759 }, "hin_Deva-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 161003 }, "hin_Deva-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 173246 }, "hin_Deva-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 156070 }, "hin_Deva-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 159685 }, "kan_Knda-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 165177 }, "kan_Knda-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 159581 }, "kan_Knda-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 165637 }, "kan_Knda-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 170625 }, "kan_Knda-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 164309 }, "kan_Knda-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 159875 }, "kan_Knda-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 161862 }, "kan_Knda-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 163557 }, "kan_Knda-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 168274 }, "kan_Knda-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 166001 }, "kan_Knda-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 176431 }, "kan_Knda-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 166340 }, "kan_Knda-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 160909 }, "kan_Knda-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 164499 }, "kan_Knda-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 167816 }, "kan_Knda-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 163794 }, "kan_Knda-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 161686 }, "kan_Knda-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 172972 }, "kan_Knda-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 166216 }, "kan_Knda-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 178459 }, "kan_Knda-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 161283 }, "kan_Knda-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 164898 }, "kas_Arab-asm_Beng": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 164681 }, "kas_Arab-ben_Beng": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 159085 }, "kas_Arab-brx_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 165141 }, "kas_Arab-doi_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 170129 }, "kas_Arab-eng_Latn": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 163813 }, "kas_Arab-gom_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 159379 }, "kas_Arab-guj_Gujr": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 161366 }, "kas_Arab-hin_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 163061 }, "kas_Arab-kan_Knda": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 168274 }, "kas_Arab-mai_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 165505 }, "kas_Arab-mal_Mlym": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 175935 }, "kas_Arab-mar_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 165844 }, "kas_Arab-mni_Mtei": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 160413 }, "kas_Arab-npi_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 164003 }, "kas_Arab-ory_Orya": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 167320 }, "kas_Arab-pan_Guru": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 163298 }, "kas_Arab-san_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 161190 }, "kas_Arab-sat_Olck": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 172476 }, "kas_Arab-snd_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 165720 }, "kas_Arab-tam_Taml": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 177963 }, "kas_Arab-tel_Telu": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 160787 }, "kas_Arab-urd_Arab": { + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 164402 }, "mai_Deva-asm_Beng": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 162408 }, "mai_Deva-ben_Beng": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 156812 }, "mai_Deva-brx_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 162868 }, "mai_Deva-doi_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 167856 }, "mai_Deva-eng_Latn": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 161540 }, "mai_Deva-gom_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 157106 }, "mai_Deva-guj_Gujr": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 159093 }, "mai_Deva-hin_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 160788 }, "mai_Deva-kan_Knda": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 166001 }, "mai_Deva-kas_Arab": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 165505 }, "mai_Deva-mal_Mlym": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 173662 }, "mai_Deva-mar_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 163571 }, "mai_Deva-mni_Mtei": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 158140 }, "mai_Deva-npi_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 161730 }, "mai_Deva-ory_Orya": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 165047 }, "mai_Deva-pan_Guru": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 161025 }, "mai_Deva-san_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 158917 }, "mai_Deva-sat_Olck": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 170203 }, "mai_Deva-snd_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 163447 }, "mai_Deva-tam_Taml": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 175690 }, "mai_Deva-tel_Telu": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 158514 }, "mai_Deva-urd_Arab": { + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 162129 }, "mal_Mlym-asm_Beng": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 172838 }, "mal_Mlym-ben_Beng": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 167242 }, "mal_Mlym-brx_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 173298 }, "mal_Mlym-doi_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 178286 }, "mal_Mlym-eng_Latn": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 171970 }, "mal_Mlym-gom_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 167536 }, "mal_Mlym-guj_Gujr": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 169523 }, "mal_Mlym-hin_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 171218 }, "mal_Mlym-kan_Knda": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 176431 }, "mal_Mlym-kas_Arab": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 175935 }, "mal_Mlym-mai_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 173662 }, "mal_Mlym-mar_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 174001 }, "mal_Mlym-mni_Mtei": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 168570 }, "mal_Mlym-npi_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 172160 }, "mal_Mlym-ory_Orya": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 175477 }, "mal_Mlym-pan_Guru": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 171455 }, "mal_Mlym-san_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 169347 }, "mal_Mlym-sat_Olck": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 180633 }, "mal_Mlym-snd_Deva": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 173877 }, "mal_Mlym-tam_Taml": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 186120 }, "mal_Mlym-tel_Telu": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 168944 }, "mal_Mlym-urd_Arab": { + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 172559 }, "mar_Deva-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 162747 }, "mar_Deva-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 157151 }, "mar_Deva-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 163207 }, "mar_Deva-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 168195 }, "mar_Deva-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 161879 }, "mar_Deva-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 157445 }, "mar_Deva-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 159432 }, "mar_Deva-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 161127 }, "mar_Deva-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 166340 }, "mar_Deva-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 165844 }, "mar_Deva-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 163571 }, "mar_Deva-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 174001 }, "mar_Deva-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 158479 }, "mar_Deva-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 162069 }, "mar_Deva-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 165386 }, "mar_Deva-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 161364 }, "mar_Deva-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 159256 }, "mar_Deva-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 170542 }, "mar_Deva-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 163786 }, "mar_Deva-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 176029 }, "mar_Deva-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 158853 }, "mar_Deva-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 162468 }, "mni_Mtei-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 157316 }, "mni_Mtei-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 151720 }, "mni_Mtei-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 157776 }, "mni_Mtei-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 162764 }, "mni_Mtei-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 156448 }, "mni_Mtei-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 152014 }, "mni_Mtei-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 154001 }, "mni_Mtei-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 155696 }, "mni_Mtei-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 160909 }, "mni_Mtei-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 160413 }, "mni_Mtei-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 158140 }, "mni_Mtei-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 168570 }, "mni_Mtei-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 158479 }, "mni_Mtei-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 156638 }, "mni_Mtei-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 159955 }, "mni_Mtei-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 155933 }, "mni_Mtei-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 153825 }, "mni_Mtei-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 165111 }, "mni_Mtei-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 158355 }, "mni_Mtei-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 170598 }, "mni_Mtei-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 153422 }, "mni_Mtei-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 157037 }, "npi_Deva-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 160906 }, "npi_Deva-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 155310 }, "npi_Deva-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 161366 }, "npi_Deva-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 166354 }, "npi_Deva-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 160038 }, "npi_Deva-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 155604 }, "npi_Deva-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 157591 }, "npi_Deva-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 159286 }, "npi_Deva-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 164499 }, "npi_Deva-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 164003 }, "npi_Deva-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 161730 }, "npi_Deva-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 172160 }, "npi_Deva-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 162069 }, "npi_Deva-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 156638 }, "npi_Deva-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 163545 }, "npi_Deva-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 159523 }, "npi_Deva-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 157415 }, "npi_Deva-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 168701 }, "npi_Deva-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 161945 }, "npi_Deva-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 174188 }, "npi_Deva-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 157012 }, "npi_Deva-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 160627 }, "ory_Orya-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 164223 }, "ory_Orya-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 158627 }, "ory_Orya-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 164683 }, "ory_Orya-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 169671 }, "ory_Orya-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 163355 }, "ory_Orya-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 158921 }, "ory_Orya-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 160908 }, "ory_Orya-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 162603 }, "ory_Orya-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 167816 }, "ory_Orya-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 167320 }, "ory_Orya-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 165047 }, "ory_Orya-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 175477 }, "ory_Orya-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 165386 }, "ory_Orya-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 159955 }, "ory_Orya-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 163545 }, "ory_Orya-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 162840 }, "ory_Orya-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 160732 }, "ory_Orya-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 172018 }, "ory_Orya-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 165262 }, "ory_Orya-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 177505 }, "ory_Orya-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 160329 }, "ory_Orya-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 163944 }, "pan_Guru-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 160201 }, "pan_Guru-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 154605 }, "pan_Guru-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 160661 }, "pan_Guru-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 165649 }, "pan_Guru-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 159333 }, "pan_Guru-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 154899 }, "pan_Guru-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 156886 }, "pan_Guru-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 158581 }, "pan_Guru-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 163794 }, "pan_Guru-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 163298 }, "pan_Guru-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 161025 }, "pan_Guru-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 171455 }, "pan_Guru-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 161364 }, "pan_Guru-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 155933 }, "pan_Guru-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 159523 }, "pan_Guru-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 162840 }, "pan_Guru-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 156710 }, "pan_Guru-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 167996 }, "pan_Guru-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 161240 }, "pan_Guru-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 173483 }, "pan_Guru-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 156307 }, "pan_Guru-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 159922 }, "san_Deva-asm_Beng": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 158093 }, "san_Deva-ben_Beng": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 152497 }, "san_Deva-brx_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 158553 }, "san_Deva-doi_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 163541 }, "san_Deva-eng_Latn": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 157225 }, "san_Deva-gom_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 152791 }, "san_Deva-guj_Gujr": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 154778 }, "san_Deva-hin_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 156473 }, "san_Deva-kan_Knda": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 161686 }, "san_Deva-kas_Arab": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 161190 }, "san_Deva-mai_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 158917 }, "san_Deva-mal_Mlym": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 169347 }, "san_Deva-mar_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 159256 }, "san_Deva-mni_Mtei": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 153825 }, "san_Deva-npi_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 157415 }, "san_Deva-ory_Orya": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 160732 }, "san_Deva-pan_Guru": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 156710 }, "san_Deva-sat_Olck": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 165888 }, "san_Deva-snd_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 159132 }, "san_Deva-tam_Taml": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 171375 }, "san_Deva-tel_Telu": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 154199 }, "san_Deva-urd_Arab": { + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 157814 }, "sat_Olck-asm_Beng": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 169379 }, "sat_Olck-ben_Beng": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 163783 }, "sat_Olck-brx_Deva": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 169839 }, "sat_Olck-doi_Deva": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 174827 }, "sat_Olck-eng_Latn": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 168511 }, "sat_Olck-gom_Deva": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 164077 }, "sat_Olck-guj_Gujr": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 166064 }, "sat_Olck-hin_Deva": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 167759 }, "sat_Olck-kan_Knda": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 172972 }, "sat_Olck-kas_Arab": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 172476 }, "sat_Olck-mai_Deva": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 170203 }, "sat_Olck-mal_Mlym": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 180633 }, "sat_Olck-mar_Deva": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 170542 }, "sat_Olck-mni_Mtei": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 165111 }, "sat_Olck-npi_Deva": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 168701 }, "sat_Olck-ory_Orya": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 172018 }, "sat_Olck-pan_Guru": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 167996 }, "sat_Olck-san_Deva": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 165888 }, "sat_Olck-snd_Deva": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 170418 }, "sat_Olck-tam_Taml": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 182661 }, "sat_Olck-tel_Telu": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 165485 }, "sat_Olck-urd_Arab": { + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 169100 }, "snd_Deva-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 162623 }, "snd_Deva-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 157027 }, "snd_Deva-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 163083 }, "snd_Deva-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 168071 }, "snd_Deva-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 161755 }, "snd_Deva-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 157321 }, "snd_Deva-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 159308 }, "snd_Deva-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 161003 }, "snd_Deva-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 166216 }, "snd_Deva-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 165720 }, "snd_Deva-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 163447 }, "snd_Deva-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 173877 }, "snd_Deva-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 163786 }, "snd_Deva-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 158355 }, "snd_Deva-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 161945 }, "snd_Deva-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 165262 }, "snd_Deva-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 161240 }, "snd_Deva-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 159132 }, "snd_Deva-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 170418 }, "snd_Deva-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 175905 }, "snd_Deva-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 158729 }, "snd_Deva-urd_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 162344 }, "tam_Taml-asm_Beng": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 174866 }, "tam_Taml-ben_Beng": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 169270 }, "tam_Taml-brx_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 175326 }, "tam_Taml-doi_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 180314 }, "tam_Taml-eng_Latn": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 173998 }, "tam_Taml-gom_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 169564 }, "tam_Taml-guj_Gujr": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 171551 }, "tam_Taml-hin_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 173246 }, "tam_Taml-kan_Knda": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 178459 }, "tam_Taml-kas_Arab": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 177963 }, "tam_Taml-mai_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 175690 }, "tam_Taml-mal_Mlym": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 186120 }, "tam_Taml-mar_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 176029 }, "tam_Taml-mni_Mtei": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 170598 }, "tam_Taml-npi_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 174188 }, "tam_Taml-ory_Orya": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 177505 }, "tam_Taml-pan_Guru": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 173483 }, "tam_Taml-san_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 171375 }, "tam_Taml-sat_Olck": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 182661 }, "tam_Taml-snd_Deva": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 175905 }, "tam_Taml-tel_Telu": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 170972 }, "tam_Taml-urd_Arab": { + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 174587 }, "tel_Telu-asm_Beng": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 157690 }, "tel_Telu-ben_Beng": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 152094 }, "tel_Telu-brx_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 158150 }, "tel_Telu-doi_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 163138 }, "tel_Telu-eng_Latn": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 156822 }, "tel_Telu-gom_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 152388 }, "tel_Telu-guj_Gujr": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 154375 }, "tel_Telu-hin_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 156070 }, "tel_Telu-kan_Knda": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 161283 }, "tel_Telu-kas_Arab": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 160787 }, "tel_Telu-mai_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 158514 }, "tel_Telu-mal_Mlym": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 168944 }, "tel_Telu-mar_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 158853 }, "tel_Telu-mni_Mtei": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 153422 }, "tel_Telu-npi_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 157012 }, "tel_Telu-ory_Orya": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 160329 }, "tel_Telu-pan_Guru": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 156307 }, "tel_Telu-san_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 154199 }, "tel_Telu-sat_Olck": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 165485 }, "tel_Telu-snd_Deva": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 158729 }, "tel_Telu-tam_Taml": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 170972 }, "tel_Telu-urd_Arab": { + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, + "max_sentence2_length": 206, "num_samples": 1503, "number_of_characters": 157411 }, "urd_Arab-asm_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, + "max_sentence2_length": 208, "num_samples": 1503, "number_of_characters": 161305 }, "urd_Arab-ben_Beng": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, + "max_sentence2_length": 178, "num_samples": 1503, "number_of_characters": 155709 }, "urd_Arab-brx_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, + "max_sentence2_length": 210, "num_samples": 1503, "number_of_characters": 161765 }, "urd_Arab-doi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, + "max_sentence2_length": 209, "num_samples": 1503, "number_of_characters": 166753 }, "urd_Arab-eng_Latn": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 160437 }, "urd_Arab-gom_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 156003 }, "urd_Arab-guj_Gujr": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, + "max_sentence2_length": 205, "num_samples": 1503, "number_of_characters": 157990 }, "urd_Arab-hin_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, + "max_sentence2_length": 192, "num_samples": 1503, "number_of_characters": 159685 }, "urd_Arab-kan_Knda": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, + "max_sentence2_length": 201, "num_samples": 1503, "number_of_characters": 164898 }, "urd_Arab-kas_Arab": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, + "max_sentence2_length": 203, "num_samples": 1503, "number_of_characters": 164402 }, "urd_Arab-mai_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, + "max_sentence2_length": 230, "num_samples": 1503, "number_of_characters": 162129 }, "urd_Arab-mal_Mlym": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, + "max_sentence2_length": 219, "num_samples": 1503, "number_of_characters": 172559 }, "urd_Arab-mar_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 162468 }, "urd_Arab-mni_Mtei": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, + "max_sentence2_length": 239, "num_samples": 1503, "number_of_characters": 157037 }, "urd_Arab-npi_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, + "max_sentence2_length": 223, "num_samples": 1503, "number_of_characters": 160627 }, "urd_Arab-ory_Orya": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 163944 }, "urd_Arab-pan_Guru": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, + "max_sentence2_length": 221, "num_samples": 1503, "number_of_characters": 159922 }, "urd_Arab-san_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, + "max_sentence2_length": 181, "num_samples": 1503, "number_of_characters": 157814 }, "urd_Arab-sat_Olck": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, + "max_sentence2_length": 225, "num_samples": 1503, "number_of_characters": 169100 }, "urd_Arab-snd_Deva": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, + "max_sentence2_length": 195, "num_samples": 1503, "number_of_characters": 162344 }, "urd_Arab-tam_Taml": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, + "max_sentence2_length": 224, "num_samples": 1503, "number_of_characters": 174587 }, "urd_Arab-tel_Telu": { + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, + "max_sentence2_length": 182, "num_samples": 1503, "number_of_characters": 157411 } diff --git a/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json b/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json index 60a8e055c..833e9f17d 100644 --- a/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json @@ -1,73 +1,121 @@ { "train": { + "min_sentence1_length": 5, "average_sentence1_length": 145.4552390438247, + "max_sentence1_length": 873, + "min_sentence2_length": 5, "average_sentence2_length": 148.56607569721115, + "max_sentence2_length": 980, "num_samples": 50200, "number_of_characters": 14759870, "hf_subset_descriptive_stats": { "ind-abs": { + "min_sentence1_length": 5, "average_sentence1_length": 148.366, + "max_sentence1_length": 727, + "min_sentence2_length": 6, "average_sentence2_length": 147.314, + "max_sentence2_length": 629, "num_samples": 1000, "number_of_characters": 295680 }, "ind-btk": { + "min_sentence1_length": 5, "average_sentence1_length": 145.36666666666667, + "max_sentence1_length": 873, + "min_sentence2_length": 5, "average_sentence2_length": 146.74045454545455, + "max_sentence2_length": 980, "num_samples": 6600, "number_of_characters": 1927907 }, "ind-bew": { + "min_sentence1_length": 5, "average_sentence1_length": 145.4280303030303, + "max_sentence1_length": 873, + "min_sentence2_length": 6, "average_sentence2_length": 148.40530303030303, + "max_sentence2_length": 840, "num_samples": 6600, "number_of_characters": 1939300 }, "ind-bhp": { + "min_sentence1_length": 11, "average_sentence1_length": 133.528, + "max_sentence1_length": 468, + "min_sentence2_length": 10, "average_sentence2_length": 128.138, + "max_sentence2_length": 459, "num_samples": 1000, "number_of_characters": 261666 }, "ind-jav": { + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "min_sentence2_length": 5, "average_sentence2_length": 145.8089393939394, + "max_sentence2_length": 854, "num_samples": 6600, "number_of_characters": 1922162 }, "ind-mad": { + "min_sentence1_length": 5, "average_sentence1_length": 145.35545454545453, + "max_sentence1_length": 873, + "min_sentence2_length": 5, "average_sentence2_length": 153.6228787878788, + "max_sentence2_length": 827, "num_samples": 6600, "number_of_characters": 1973257 }, "ind-mak": { + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "min_sentence2_length": 6, "average_sentence2_length": 150.6128787878788, + "max_sentence2_length": 888, "num_samples": 6600, "number_of_characters": 1953868 }, "ind-min": { + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "min_sentence2_length": 6, "average_sentence2_length": 148.0621212121212, + "max_sentence2_length": 837, "num_samples": 6600, "number_of_characters": 1937033 }, "ind-mui": { + "min_sentence1_length": 11, "average_sentence1_length": 150.454, + "max_sentence1_length": 451, + "min_sentence2_length": 11, "average_sentence2_length": 150.994, + "max_sentence2_length": 450, "num_samples": 1000, "number_of_characters": 301448 }, "ind-rej": { + "min_sentence1_length": 9, "average_sentence1_length": 151.622, + "max_sentence1_length": 873, + "min_sentence2_length": 8, "average_sentence2_length": 139.583, + "max_sentence2_length": 784, "num_samples": 1000, "number_of_characters": 291205 }, "ind-sun": { + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "min_sentence2_length": 5, "average_sentence2_length": 150.9880303030303, + "max_sentence2_length": 881, "num_samples": 6600, "number_of_characters": 1956344 } diff --git a/mteb/descriptive_stats/BitextMining/PhincBitextMining.json b/mteb/descriptive_stats/BitextMining/PhincBitextMining.json new file mode 100644 index 000000000..73080b250 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/PhincBitextMining.json @@ -0,0 +1,24 @@ +{ + "train": { + "min_sentence1_length": 1, + "average_sentence1_length": 74.02300189256079, + "max_sentence1_length": 278, + "min_sentence2_length": 3, + "average_sentence2_length": 76.61442713640996, + "max_sentence2_length": 274, + "num_samples": 13738, + "number_of_characters": 2069457, + "hf_subset_descriptive_stats": { + "eng-eng_hin": { + "min_sentence1_length": 1, + "average_sentence1_length": 74.02300189256079, + "max_sentence1_length": 278, + "min_sentence2_length": 3, + "average_sentence2_length": 76.61442713640996, + "max_sentence2_length": 274, + "num_samples": 13738, + "number_of_characters": 2069457 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LanguageClassification.json b/mteb/descriptive_stats/Classification/LanguageClassification.json index cf8b83d5d..3c446bd33 100644 --- a/mteb/descriptive_stats/Classification/LanguageClassification.json +++ b/mteb/descriptive_stats/Classification/LanguageClassification.json @@ -2,7 +2,9 @@ "test": { "num_samples": 2048, "number_of_characters": 224352, + "min_text_length": 14, "average_text_length": 109.546875, + "max_text_length": 1270, "unique_labels": 20, "labels": { "17": { diff --git a/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json b/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json index 23225ae22..67b65bdc7 100644 --- a/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json +++ b/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json @@ -2,7 +2,9 @@ "test": { "num_samples": 1319, "number_of_characters": 122279, + "min_text_length": 8, "average_text_length": 92.70583775587566, + "max_text_length": 1584, "unique_labels": 2, "labels": { "1": { diff --git a/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json b/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json index 8a5118e0c..e6066a83c 100644 --- a/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json +++ b/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json @@ -2,8 +2,12 @@ "test": { "num_samples": 2048, "number_of_characters": 2065284, + "min_text_length": 103, "average_text_length": 1008.439453125, + "max_text_length": 2103, + "min_labels_per_text": 1, "average_labels_per_text": 1.46337890625, + "max_labels_per_text": 381, "unique_labels": 129, "labels": { "cs": { diff --git a/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json b/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json index f1dda7920..062713177 100644 --- a/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json +++ b/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json @@ -2,8 +2,12 @@ "test": { "num_samples": 10, "number_of_characters": 75000, + "min_text_length": 5000, "average_text_length": 7500.0, + "max_text_length": 10000, + "min_labels_per_text": 1, "average_labels_per_text": 7500.0, + "max_labels_per_text": 14251, "unique_labels": 26, "labels": { "neuroscience": { diff --git a/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json b/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json index 9eff1b40d..126cd893b 100644 --- a/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json +++ b/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json @@ -2,8 +2,12 @@ "test": { "num_samples": 2048, "number_of_characters": 1822339, + "min_text_length": 84, "average_text_length": 889.81396484375, + "max_text_length": 3143, + "min_labels_per_text": 73, "average_labels_per_text": 1.0, + "max_labels_per_text": 74, "unique_labels": 28, "labels": { "3": { diff --git a/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json b/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json index 99b033bce..700dbeed0 100644 --- a/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json +++ b/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json @@ -2,8 +2,12 @@ "test": { "num_samples": 140, "number_of_characters": 71680, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 1, "average_labels_per_text": 512.0, + "max_labels_per_text": 3986, "unique_labels": 282, "labels": { "Nauke": { @@ -857,8 +861,12 @@ "bs": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 6, "average_labels_per_text": 512.0, + "max_labels_per_text": 1492, "unique_labels": 17, "labels": { "Nauke": { @@ -917,8 +925,12 @@ "ca": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 20, "average_labels_per_text": 512.0, + "max_labels_per_text": 1844, "unique_labels": 8, "labels": { "Llocs": { @@ -950,8 +962,12 @@ "cs": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 21, "average_labels_per_text": 512.0, + "max_labels_per_text": 1559, "unique_labels": 21, "labels": { "Lid\u00c3\u00a9": { @@ -1022,8 +1038,12 @@ "da": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 35, "average_labels_per_text": 512.0, + "max_labels_per_text": 911, "unique_labels": 20, "labels": { "Natur": { @@ -1091,8 +1111,12 @@ "eu": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 110, "average_labels_per_text": 512.0, + "max_labels_per_text": 2486, "unique_labels": 5, "labels": { "Entitateak": { @@ -1115,8 +1139,12 @@ "gv": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 1334, "unique_labels": 28, "labels": { "Chron-oaylleeaght": { @@ -1208,8 +1236,12 @@ "ilo": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 1, "average_labels_per_text": 512.0, + "max_labels_per_text": 1405, "unique_labels": 34, "labels": { "Katutubo": { @@ -1319,8 +1351,12 @@ "ku": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 5, "average_labels_per_text": 512.0, + "max_labels_per_text": 1078, "unique_labels": 39, "labels": { "Kes": { @@ -1445,8 +1481,12 @@ "lv": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 13, "average_labels_per_text": 512.0, + "max_labels_per_text": 878, "unique_labels": 16, "labels": { "Kult\u00c5\u00abra": { @@ -1502,8 +1542,12 @@ "min": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 1, "average_labels_per_text": 512.0, + "max_labels_per_text": 3986, "unique_labels": 16, "labels": { "Makaluak_iduik": { @@ -1559,8 +1603,12 @@ "mt": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 1634, "unique_labels": 27, "labels": { "\u00c4\u00a0eografija": { @@ -1649,8 +1697,12 @@ "sco": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 3, "average_labels_per_text": 512.0, + "max_labels_per_text": 1081, "unique_labels": 23, "labels": { "Life": { @@ -1727,8 +1779,12 @@ "sq": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 1109, "unique_labels": 36, "labels": { "Gjeografi": { @@ -1844,8 +1900,12 @@ "wa": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 3653, "unique_labels": 6, "labels": { "Economeye": { diff --git a/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json b/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json index 8a912bee4..a05f101ff 100644 --- a/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json +++ b/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json @@ -4,11 +4,23 @@ "num_docs": 19899, "num_queries": 20, "number_of_characters": 44450333, + "min_document_length": 7, "average_document_length": 2233.0329664807277, + "max_document_length": 2959, + "min_query_length": 55, "average_query_length": 109.75, + "max_query_length": 278, + "min_instruction_length": 102, "average_instruction_length": 295.55, + "max_instruction_length": 811, + "min_changed_instruction_length": 151, "average_changed_instruction_length": 355.2, + "max_changed_instruction_length": 837, + "min_average_relevant_docs_per_query": 4, "average_relevant_docs_per_query": 32.7, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 55, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json b/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json index 2120a1113..cc8313a80 100644 --- a/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json +++ b/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json @@ -1,8 +1,12 @@ { "test": { + "min_text_length": 6, "average_text_length": 91.20563230605738, + "max_text_length": 220, "number_of_characters": 171649, + "min_labels_per_text": 0, "average_label_per_text": 0.620616365568544, + "max_labels_per_text": 2, "num_samples": 1882, "unique_labels": 6, "labels": { diff --git a/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json b/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json index 2f4f979d0..37fe86909 100644 --- a/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json +++ b/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json @@ -1,8 +1,12 @@ { "test": { + "min_text_length": 563, "average_text_length": 12014.408930434782, + "max_text_length": 1458188, "number_of_characters": 1381657027, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 115000, "unique_labels": 21, "labels": { @@ -72,9 +76,13 @@ }, "hf_subset_descriptive_stats": { "en": { + "min_text_length": 700, "average_text_length": 11720.2926, + "max_text_length": 1269363, "number_of_characters": 58601463, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -144,9 +152,13 @@ } }, "de": { + "min_text_length": 688, "average_text_length": 12865.4162, + "max_text_length": 1361562, "number_of_characters": 64327081, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -216,9 +228,13 @@ } }, "fr": { + "min_text_length": 676, "average_text_length": 13081.1098, + "max_text_length": 1440461, "number_of_characters": 65405549, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -288,9 +304,13 @@ } }, "it": { + "min_text_length": 696, "average_text_length": 12763.4786, + "max_text_length": 1404333, "number_of_characters": 63817393, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -360,9 +380,13 @@ } }, "es": { + "min_text_length": 683, "average_text_length": 13080.29, + "max_text_length": 1458188, "number_of_characters": 65401450, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -432,9 +456,13 @@ } }, "pl": { + "min_text_length": 697, "average_text_length": 12282.5926, + "max_text_length": 1381409, "number_of_characters": 61412963, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -504,9 +532,13 @@ } }, "ro": { + "min_text_length": 645, "average_text_length": 12836.9322, + "max_text_length": 1450509, "number_of_characters": 64184661, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -576,9 +608,13 @@ } }, "nl": { + "min_text_length": 721, "average_text_length": 12857.9742, + "max_text_length": 1442428, "number_of_characters": 64289871, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -648,9 +684,13 @@ } }, "el": { + "min_text_length": 695, "average_text_length": 12998.143, + "max_text_length": 1436873, "number_of_characters": 64990715, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -720,9 +760,13 @@ } }, "hu": { + "min_text_length": 635, "average_text_length": 12424.641, + "max_text_length": 1405731, "number_of_characters": 62123205, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -792,9 +836,13 @@ } }, "pt": { + "min_text_length": 662, "average_text_length": 12482.4616, + "max_text_length": 1400357, "number_of_characters": 62412308, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -864,9 +912,13 @@ } }, "cs": { + "min_text_length": 563, "average_text_length": 10783.4676, + "max_text_length": 1183634, "number_of_characters": 53917338, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -936,9 +988,13 @@ } }, "sv": { + "min_text_length": 660, "average_text_length": 11612.4774, + "max_text_length": 1257482, "number_of_characters": 58062387, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -1008,9 +1064,13 @@ } }, "bg": { + "min_text_length": 661, "average_text_length": 12235.4268, + "max_text_length": 1309869, "number_of_characters": 61177134, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -1080,9 +1140,13 @@ } }, "da": { + "min_text_length": 680, "average_text_length": 11773.958, + "max_text_length": 1297978, "number_of_characters": 58869790, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -1152,9 +1216,13 @@ } }, "fi": { + "min_text_length": 707, "average_text_length": 12087.6862, + "max_text_length": 1330363, "number_of_characters": 60438431, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -1224,9 +1292,13 @@ } }, "sk": { + "min_text_length": 595, "average_text_length": 11130.814, + "max_text_length": 1229063, "number_of_characters": 55654070, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -1296,9 +1368,13 @@ } }, "lt": { + "min_text_length": 597, "average_text_length": 11245.3566, + "max_text_length": 1274867, "number_of_characters": 56226783, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -1368,9 +1444,13 @@ } }, "hr": { + "min_text_length": 610, "average_text_length": 11022.142, + "max_text_length": 1252581, "number_of_characters": 55110710, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -1440,9 +1520,13 @@ } }, "sl": { + "min_text_length": 573, "average_text_length": 10620.0594, + "max_text_length": 1208117, "number_of_characters": 53100297, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -1512,9 +1596,13 @@ } }, "et": { + "min_text_length": 599, "average_text_length": 10898.4312, + "max_text_length": 1370495, "number_of_characters": 54492156, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -1584,9 +1672,13 @@ } }, "lv": { + "min_text_length": 614, "average_text_length": 10938.5102, + "max_text_length": 1230284, "number_of_characters": 54692551, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { @@ -1656,9 +1748,13 @@ } }, "mt": { + "min_text_length": 703, "average_text_length": 12589.7442, + "max_text_length": 1403346, "number_of_characters": 62948721, + "min_labels_per_text": 1, "average_label_per_text": 3.5938, + "max_labels_per_text": 9, "num_samples": 5000, "unique_labels": 21, "labels": { diff --git a/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json b/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json index 63180983c..35e88be06 100644 --- a/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json +++ b/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json @@ -2,8 +2,12 @@ "test": { "num_samples": 14000, "number_of_characters": 2551922, - "avg_sentence1_len": 91.17892857142857, - "avg_sentence2_len": 91.10121428571429, + "min_sentence1_length": 2, + "avg_sentence1_length": 91.17892857142857, + "max_sentence1_length": 268, + "min_sentence2_length": 2, + "avg_sentence2_length": 91.10121428571429, + "max_sentence2_length": 247, "unique_labels": 2, "labels": { "1": { @@ -17,8 +21,12 @@ "de": { "num_samples": 2000, "number_of_characters": 478034, - "avg_sentence1_len": 119.7815, - "avg_sentence2_len": 119.2355, + "min_sentence1_length": 2, + "avg_sentence1_length": 119.7815, + "max_sentence1_length": 268, + "min_sentence2_length": 2, + "avg_sentence2_length": 119.2355, + "max_sentence2_length": 235, "unique_labels": 2, "labels": { "1": { @@ -32,8 +40,12 @@ "en": { "num_samples": 2000, "number_of_characters": 454362, - "avg_sentence1_len": 113.7575, - "avg_sentence2_len": 113.4235, + "min_sentence1_length": 25, + "avg_sentence1_length": 113.7575, + "max_sentence1_length": 209, + "min_sentence2_length": 25, + "avg_sentence2_length": 113.4235, + "max_sentence2_length": 209, "unique_labels": 2, "labels": { "1": { @@ -47,8 +59,12 @@ "es": { "num_samples": 2000, "number_of_characters": 471226, - "avg_sentence1_len": 117.815, - "avg_sentence2_len": 117.798, + "min_sentence1_length": 2, + "avg_sentence1_length": 117.815, + "max_sentence1_length": 226, + "min_sentence2_length": 22, + "avg_sentence2_length": 117.798, + "max_sentence2_length": 233, "unique_labels": 2, "labels": { "1": { @@ -62,8 +78,12 @@ "fr": { "num_samples": 2000, "number_of_characters": 480033, - "avg_sentence1_len": 120.028, - "avg_sentence2_len": 119.9885, + "min_sentence1_length": 2, + "avg_sentence1_length": 120.028, + "max_sentence1_length": 238, + "min_sentence2_length": 2, + "avg_sentence2_length": 119.9885, + "max_sentence2_length": 247, "unique_labels": 2, "labels": { "1": { @@ -77,8 +97,12 @@ "ja": { "num_samples": 2000, "number_of_characters": 235106, - "avg_sentence1_len": 58.678, - "avg_sentence2_len": 58.875, + "min_sentence1_length": 2, + "avg_sentence1_length": 58.678, + "max_sentence1_length": 192, + "min_sentence2_length": 2, + "avg_sentence2_length": 58.875, + "max_sentence2_length": 198, "unique_labels": 2, "labels": { "1": { @@ -92,8 +116,12 @@ "ko": { "num_samples": 2000, "number_of_characters": 260149, - "avg_sentence1_len": 64.9605, - "avg_sentence2_len": 65.114, + "min_sentence1_length": 2, + "avg_sentence1_length": 64.9605, + "max_sentence1_length": 153, + "min_sentence2_length": 2, + "avg_sentence2_length": 65.114, + "max_sentence2_length": 159, "unique_labels": 2, "labels": { "1": { @@ -107,8 +135,12 @@ "zh": { "num_samples": 2000, "number_of_characters": 173012, - "avg_sentence1_len": 43.232, - "avg_sentence2_len": 43.274, + "min_sentence1_length": 2, + "avg_sentence1_length": 43.232, + "max_sentence1_length": 120, + "min_sentence2_length": 2, + "avg_sentence2_length": 43.274, + "max_sentence2_length": 113, "unique_labels": 2, "labels": { "1": { @@ -124,8 +156,12 @@ "validation": { "num_samples": 14000, "number_of_characters": 2524625, - "avg_sentence1_len": 90.12585714285714, - "avg_sentence2_len": 90.2045, + "min_sentence1_length": 2, + "avg_sentence1_length": 90.12585714285714, + "max_sentence1_length": 248, + "min_sentence2_length": 2, + "avg_sentence2_length": 90.2045, + "max_sentence2_length": 275, "unique_labels": 2, "labels": { "1": { @@ -139,8 +175,12 @@ "de": { "num_samples": 2000, "number_of_characters": 467643, - "avg_sentence1_len": 116.82, - "avg_sentence2_len": 117.0015, + "min_sentence1_length": 2, + "avg_sentence1_length": 116.82, + "max_sentence1_length": 248, + "min_sentence2_length": 2, + "avg_sentence2_length": 117.0015, + "max_sentence2_length": 275, "unique_labels": 2, "labels": { "1": { @@ -154,8 +194,12 @@ "en": { "num_samples": 2000, "number_of_characters": 451931, - "avg_sentence1_len": 113.1075, - "avg_sentence2_len": 112.858, + "min_sentence1_length": 25, + "avg_sentence1_length": 113.1075, + "max_sentence1_length": 213, + "min_sentence2_length": 25, + "avg_sentence2_length": 112.858, + "max_sentence2_length": 213, "unique_labels": 2, "labels": { "1": { @@ -169,8 +213,12 @@ "es": { "num_samples": 2000, "number_of_characters": 466112, - "avg_sentence1_len": 116.3285, - "avg_sentence2_len": 116.7275, + "min_sentence1_length": 2, + "avg_sentence1_length": 116.3285, + "max_sentence1_length": 240, + "min_sentence2_length": 2, + "avg_sentence2_length": 116.7275, + "max_sentence2_length": 241, "unique_labels": 2, "labels": { "1": { @@ -184,8 +232,12 @@ "fr": { "num_samples": 2000, "number_of_characters": 478510, - "avg_sentence1_len": 119.5045, - "avg_sentence2_len": 119.7505, + "min_sentence1_length": 2, + "avg_sentence1_length": 119.5045, + "max_sentence1_length": 233, + "min_sentence2_length": 2, + "avg_sentence2_length": 119.7505, + "max_sentence2_length": 246, "unique_labels": 2, "labels": { "1": { @@ -199,8 +251,12 @@ "ja": { "num_samples": 2000, "number_of_characters": 229655, - "avg_sentence1_len": 57.5105, - "avg_sentence2_len": 57.317, + "min_sentence1_length": 2, + "avg_sentence1_length": 57.5105, + "max_sentence1_length": 126, + "min_sentence2_length": 2, + "avg_sentence2_length": 57.317, + "max_sentence2_length": 121, "unique_labels": 2, "labels": { "1": { @@ -214,8 +270,12 @@ "ko": { "num_samples": 2000, "number_of_characters": 261355, - "avg_sentence1_len": 65.162, - "avg_sentence2_len": 65.5155, + "min_sentence1_length": 2, + "avg_sentence1_length": 65.162, + "max_sentence1_length": 178, + "min_sentence2_length": 2, + "avg_sentence2_length": 65.5155, + "max_sentence2_length": 174, "unique_labels": 2, "labels": { "1": { @@ -229,8 +289,12 @@ "zh": { "num_samples": 2000, "number_of_characters": 169419, - "avg_sentence1_len": 42.448, - "avg_sentence2_len": 42.2615, + "min_sentence1_length": 2, + "avg_sentence1_length": 42.448, + "max_sentence1_length": 101, + "min_sentence2_length": 2, + "avg_sentence2_length": 42.2615, + "max_sentence2_length": 120, "unique_labels": 2, "labels": { "1": { diff --git a/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json b/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json index 6ca4a5616..06edb67b3 100644 --- a/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json +++ b/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json @@ -2,8 +2,12 @@ "test": { "num_samples": 51534, "number_of_characters": 8659940, - "avg_sentence1_len": 79.48919160166103, - "avg_sentence2_len": 88.5540419916948, + "min_sentence1_length": 24, + "avg_sentence1_length": 79.48919160166103, + "max_sentence1_length": 126, + "min_sentence2_length": 6, + "avg_sentence2_length": 88.5540419916948, + "max_sentence2_length": 608, "unique_labels": 2, "labels": { "0": { diff --git a/mteb/descriptive_stats/PairClassification/XNLI.json b/mteb/descriptive_stats/PairClassification/XNLI.json index 91ef22435..62ce2048a 100644 --- a/mteb/descriptive_stats/PairClassification/XNLI.json +++ b/mteb/descriptive_stats/PairClassification/XNLI.json @@ -2,8 +2,12 @@ "test": { "num_samples": 19110, "number_of_characters": 2907145, - "avg_sentence1_len": 103.23793825222397, - "avg_sentence2_len": 48.88895866038723, + "min_sentence1_length": 3, + "avg_sentence1_length": 103.23793825222397, + "max_sentence1_length": 401, + "min_sentence2_length": 2, + "avg_sentence2_length": 48.88895866038723, + "max_sentence2_length": 187, "unique_labels": 2, "labels": { "0": { @@ -17,8 +21,12 @@ "ar": { "num_samples": 1365, "number_of_characters": 179591, - "avg_sentence1_len": 89.57362637362637, - "avg_sentence2_len": 41.99487179487179, + "min_sentence1_length": 11, + "avg_sentence1_length": 89.57362637362637, + "max_sentence1_length": 242, + "min_sentence2_length": 8, + "avg_sentence2_length": 41.99487179487179, + "max_sentence2_length": 115, "unique_labels": 2, "labels": { "0": { @@ -32,8 +40,12 @@ "bg": { "num_samples": 1365, "number_of_characters": 220646, - "avg_sentence1_len": 110.01611721611722, - "avg_sentence2_len": 51.62930402930403, + "min_sentence1_length": 14, + "avg_sentence1_length": 110.01611721611722, + "max_sentence1_length": 303, + "min_sentence2_length": 8, + "avg_sentence2_length": 51.62930402930403, + "max_sentence2_length": 150, "unique_labels": 2, "labels": { "0": { @@ -47,8 +59,12 @@ "de": { "num_samples": 1365, "number_of_characters": 241224, - "avg_sentence1_len": 119.92600732600732, - "avg_sentence2_len": 56.794871794871796, + "min_sentence1_length": 3, + "avg_sentence1_length": 119.92600732600732, + "max_sentence1_length": 301, + "min_sentence2_length": 9, + "avg_sentence2_length": 56.794871794871796, + "max_sentence2_length": 187, "unique_labels": 2, "labels": { "0": { @@ -62,8 +78,12 @@ "el": { "num_samples": 1365, "number_of_characters": 240222, - "avg_sentence1_len": 119.05421245421246, - "avg_sentence2_len": 56.93260073260073, + "min_sentence1_length": 13, + "avg_sentence1_length": 119.05421245421246, + "max_sentence1_length": 344, + "min_sentence2_length": 13, + "avg_sentence2_length": 56.93260073260073, + "max_sentence2_length": 172, "unique_labels": 2, "labels": { "0": { @@ -77,8 +97,12 @@ "en": { "num_samples": 1365, "number_of_characters": 212223, - "avg_sentence1_len": 105.67032967032966, - "avg_sentence2_len": 49.8043956043956, + "min_sentence1_length": 19, + "avg_sentence1_length": 105.67032967032966, + "max_sentence1_length": 268, + "min_sentence2_length": 9, + "avg_sentence2_length": 49.8043956043956, + "max_sentence2_length": 137, "unique_labels": 2, "labels": { "0": { @@ -92,8 +116,12 @@ "es": { "num_samples": 1365, "number_of_characters": 232207, - "avg_sentence1_len": 115.43296703296703, - "avg_sentence2_len": 54.68205128205128, + "min_sentence1_length": 11, + "avg_sentence1_length": 115.43296703296703, + "max_sentence1_length": 385, + "min_sentence2_length": 8, + "avg_sentence2_length": 54.68205128205128, + "max_sentence2_length": 163, "unique_labels": 2, "labels": { "0": { @@ -107,8 +135,12 @@ "fr": { "num_samples": 1365, "number_of_characters": 245259, - "avg_sentence1_len": 121.0967032967033, - "avg_sentence2_len": 58.58021978021978, + "min_sentence1_length": 9, + "avg_sentence1_length": 121.0967032967033, + "max_sentence1_length": 327, + "min_sentence2_length": 10, + "avg_sentence2_length": 58.58021978021978, + "max_sentence2_length": 169, "unique_labels": 2, "labels": { "0": { @@ -122,8 +154,12 @@ "hi": { "num_samples": 1365, "number_of_characters": 211312, - "avg_sentence1_len": 104.63443223443224, - "avg_sentence2_len": 50.17289377289377, + "min_sentence1_length": 16, + "avg_sentence1_length": 104.63443223443224, + "max_sentence1_length": 401, + "min_sentence2_length": 9, + "avg_sentence2_length": 50.17289377289377, + "max_sentence2_length": 162, "unique_labels": 2, "labels": { "0": { @@ -137,8 +173,12 @@ "ru": { "num_samples": 1365, "number_of_characters": 222797, - "avg_sentence1_len": 110.76923076923077, - "avg_sentence2_len": 52.452014652014654, + "min_sentence1_length": 11, + "avg_sentence1_length": 110.76923076923077, + "max_sentence1_length": 306, + "min_sentence2_length": 8, + "avg_sentence2_length": 52.452014652014654, + "max_sentence2_length": 167, "unique_labels": 2, "labels": { "0": { @@ -152,8 +192,12 @@ "sw": { "num_samples": 1365, "number_of_characters": 210103, - "avg_sentence1_len": 104.43956043956044, - "avg_sentence2_len": 49.48205128205128, + "min_sentence1_length": 10, + "avg_sentence1_length": 104.43956043956044, + "max_sentence1_length": 266, + "min_sentence2_length": 2, + "avg_sentence2_length": 49.48205128205128, + "max_sentence2_length": 146, "unique_labels": 2, "labels": { "0": { @@ -167,8 +211,12 @@ "th": { "num_samples": 1365, "number_of_characters": 192788, - "avg_sentence1_len": 96.6923076923077, - "avg_sentence2_len": 44.544322344322346, + "min_sentence1_length": 12, + "avg_sentence1_length": 96.6923076923077, + "max_sentence1_length": 262, + "min_sentence2_length": 6, + "avg_sentence2_length": 44.544322344322346, + "max_sentence2_length": 129, "unique_labels": 2, "labels": { "0": { @@ -182,8 +230,12 @@ "tr": { "num_samples": 1365, "number_of_characters": 208658, - "avg_sentence1_len": 103.67765567765568, - "avg_sentence2_len": 49.18534798534799, + "min_sentence1_length": 15, + "avg_sentence1_length": 103.67765567765568, + "max_sentence1_length": 255, + "min_sentence2_length": 6, + "avg_sentence2_length": 49.18534798534799, + "max_sentence2_length": 140, "unique_labels": 2, "labels": { "0": { @@ -197,8 +249,12 @@ "vi": { "num_samples": 1365, "number_of_characters": 223549, - "avg_sentence1_len": 111.31208791208792, - "avg_sentence2_len": 52.46007326007326, + "min_sentence1_length": 14, + "avg_sentence1_length": 111.31208791208792, + "max_sentence1_length": 265, + "min_sentence2_length": 9, + "avg_sentence2_length": 52.46007326007326, + "max_sentence2_length": 143, "unique_labels": 2, "labels": { "0": { @@ -212,8 +268,12 @@ "zh": { "num_samples": 1365, "number_of_characters": 66566, - "avg_sentence1_len": 33.03589743589744, - "avg_sentence2_len": 15.73040293040293, + "min_sentence1_length": 4, + "avg_sentence1_length": 33.03589743589744, + "max_sentence1_length": 112, + "min_sentence2_length": 3, + "avg_sentence2_length": 15.73040293040293, + "max_sentence2_length": 59, "unique_labels": 2, "labels": { "0": { @@ -229,8 +289,12 @@ "validation": { "num_samples": 19110, "number_of_characters": 2909058, - "avg_sentence1_len": 103.20790162218734, - "avg_sentence2_len": 49.01909994767138, + "min_sentence1_length": 5, + "avg_sentence1_length": 103.20790162218734, + "max_sentence1_length": 323, + "min_sentence2_length": 3, + "avg_sentence2_length": 49.01909994767138, + "max_sentence2_length": 172, "unique_labels": 2, "labels": { "0": { @@ -244,8 +308,12 @@ "ar": { "num_samples": 1365, "number_of_characters": 177355, - "avg_sentence1_len": 88.31868131868131, - "avg_sentence2_len": 41.61172161172161, + "min_sentence1_length": 13, + "avg_sentence1_length": 88.31868131868131, + "max_sentence1_length": 214, + "min_sentence2_length": 6, + "avg_sentence2_length": 41.61172161172161, + "max_sentence2_length": 137, "unique_labels": 2, "labels": { "0": { @@ -259,8 +327,12 @@ "bg": { "num_samples": 1365, "number_of_characters": 219988, - "avg_sentence1_len": 109.196336996337, - "avg_sentence2_len": 51.967032967032964, + "min_sentence1_length": 16, + "avg_sentence1_length": 109.196336996337, + "max_sentence1_length": 316, + "min_sentence2_length": 10, + "avg_sentence2_length": 51.967032967032964, + "max_sentence2_length": 151, "unique_labels": 2, "labels": { "0": { @@ -274,8 +346,12 @@ "de": { "num_samples": 1365, "number_of_characters": 241852, - "avg_sentence1_len": 119.81172161172161, - "avg_sentence2_len": 57.36923076923077, + "min_sentence1_length": 20, + "avg_sentence1_length": 119.81172161172161, + "max_sentence1_length": 298, + "min_sentence2_length": 12, + "avg_sentence2_length": 57.36923076923077, + "max_sentence2_length": 162, "unique_labels": 2, "labels": { "0": { @@ -289,8 +365,12 @@ "el": { "num_samples": 1365, "number_of_characters": 241275, - "avg_sentence1_len": 119.87545787545787, - "avg_sentence2_len": 56.88278388278388, + "min_sentence1_length": 16, + "avg_sentence1_length": 119.87545787545787, + "max_sentence1_length": 302, + "min_sentence2_length": 6, + "avg_sentence2_length": 56.88278388278388, + "max_sentence2_length": 171, "unique_labels": 2, "labels": { "0": { @@ -304,8 +384,12 @@ "en": { "num_samples": 1365, "number_of_characters": 212384, - "avg_sentence1_len": 105.71648351648352, - "avg_sentence2_len": 49.87619047619047, + "min_sentence1_length": 20, + "avg_sentence1_length": 105.71648351648352, + "max_sentence1_length": 271, + "min_sentence2_length": 8, + "avg_sentence2_length": 49.87619047619047, + "max_sentence2_length": 139, "unique_labels": 2, "labels": { "0": { @@ -319,8 +403,12 @@ "es": { "num_samples": 1365, "number_of_characters": 232451, - "avg_sentence1_len": 115.17289377289377, - "avg_sentence2_len": 55.120879120879124, + "min_sentence1_length": 14, + "avg_sentence1_length": 115.17289377289377, + "max_sentence1_length": 265, + "min_sentence2_length": 7, + "avg_sentence2_length": 55.120879120879124, + "max_sentence2_length": 148, "unique_labels": 2, "labels": { "0": { @@ -334,8 +422,12 @@ "fr": { "num_samples": 1365, "number_of_characters": 246857, - "avg_sentence1_len": 121.75897435897436, - "avg_sentence2_len": 59.08864468864469, + "min_sentence1_length": 19, + "avg_sentence1_length": 121.75897435897436, + "max_sentence1_length": 323, + "min_sentence2_length": 11, + "avg_sentence2_length": 59.08864468864469, + "max_sentence2_length": 172, "unique_labels": 2, "labels": { "0": { @@ -349,8 +441,12 @@ "hi": { "num_samples": 1365, "number_of_characters": 212269, - "avg_sentence1_len": 105.06446886446886, - "avg_sentence2_len": 50.44395604395604, + "min_sentence1_length": 18, + "avg_sentence1_length": 105.06446886446886, + "max_sentence1_length": 277, + "min_sentence2_length": 7, + "avg_sentence2_length": 50.44395604395604, + "max_sentence2_length": 152, "unique_labels": 2, "labels": { "0": { @@ -364,8 +460,12 @@ "ru": { "num_samples": 1365, "number_of_characters": 221152, - "avg_sentence1_len": 109.74725274725274, - "avg_sentence2_len": 52.26886446886447, + "min_sentence1_length": 15, + "avg_sentence1_length": 109.74725274725274, + "max_sentence1_length": 310, + "min_sentence2_length": 8, + "avg_sentence2_length": 52.26886446886447, + "max_sentence2_length": 140, "unique_labels": 2, "labels": { "0": { @@ -379,8 +479,12 @@ "sw": { "num_samples": 1365, "number_of_characters": 210482, - "avg_sentence1_len": 104.32234432234432, - "avg_sentence2_len": 49.87692307692308, + "min_sentence1_length": 13, + "avg_sentence1_length": 104.32234432234432, + "max_sentence1_length": 264, + "min_sentence2_length": 8, + "avg_sentence2_length": 49.87692307692308, + "max_sentence2_length": 153, "unique_labels": 2, "labels": { "0": { @@ -394,8 +498,12 @@ "th": { "num_samples": 1365, "number_of_characters": 192640, - "avg_sentence1_len": 97.28498168498169, - "avg_sentence2_len": 43.843223443223444, + "min_sentence1_length": 7, + "avg_sentence1_length": 97.28498168498169, + "max_sentence1_length": 255, + "min_sentence2_length": 3, + "avg_sentence2_length": 43.843223443223444, + "max_sentence2_length": 140, "unique_labels": 2, "labels": { "0": { @@ -409,8 +517,12 @@ "tr": { "num_samples": 1365, "number_of_characters": 208305, - "avg_sentence1_len": 102.96630036630036, - "avg_sentence2_len": 49.63809523809524, + "min_sentence1_length": 15, + "avg_sentence1_length": 102.96630036630036, + "max_sentence1_length": 269, + "min_sentence2_length": 10, + "avg_sentence2_length": 49.63809523809524, + "max_sentence2_length": 139, "unique_labels": 2, "labels": { "0": { @@ -424,8 +536,12 @@ "vi": { "num_samples": 1365, "number_of_characters": 224811, - "avg_sentence1_len": 112.26373626373626, - "avg_sentence2_len": 52.432967032967035, + "min_sentence1_length": 18, + "avg_sentence1_length": 112.26373626373626, + "max_sentence1_length": 323, + "min_sentence2_length": 9, + "avg_sentence2_length": 52.432967032967035, + "max_sentence2_length": 159, "unique_labels": 2, "labels": { "0": { @@ -439,8 +555,12 @@ "zh": { "num_samples": 1365, "number_of_characters": 67237, - "avg_sentence1_len": 33.41098901098901, - "avg_sentence2_len": 15.846886446886447, + "min_sentence1_length": 5, + "avg_sentence1_length": 33.41098901098901, + "max_sentence1_length": 135, + "min_sentence2_length": 3, + "avg_sentence2_length": 15.846886446886447, + "max_sentence2_length": 66, "unique_labels": 2, "labels": { "0": { diff --git a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json index c12f4f292..deee90477 100644 --- a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json +++ b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json @@ -4,8 +4,12 @@ "number_of_characters": 413674, "num_positive": 2255, "num_negative": 5245, - "avg_query_len": 50.205333333333336, - "avg_positive_len": 52.54013303769401, - "avg_negative_len": 52.69189704480458 + "min_query_length": 17, + "avg_query_length": 50.205333333333336, + "max_query_length": 148, + "min_positive_length": 15, + "avg_positive_length": 52.54013303769401, + "max_positive_length": 152, + "avg_negative_length": 52.69189704480458 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/ESCIReranking.json b/mteb/descriptive_stats/Reranking/ESCIReranking.json index 419b228eb..e17fa884e 100644 --- a/mteb/descriptive_stats/Reranking/ESCIReranking.json +++ b/mteb/descriptive_stats/Reranking/ESCIReranking.json @@ -4,36 +4,52 @@ "number_of_characters": 254538331, "num_positive": 271416, "num_negative": 44235, - "avg_query_len": 19.691890046098685, - "avg_positive_len": 803.9230995961918, - "avg_negative_len": 808.501458121397, + "min_query_length": 1, + "avg_query_length": 19.691890046098685, + "max_query_length": 151, + "min_positive_length": 1, + "avg_positive_length": 803.9230995961918, + "max_positive_length": 8640, + "avg_negative_length": 808.501458121397, "hf_subset_descriptive_stats": { "us": { "num_samples": 21296, "number_of_characters": 186915609, "num_positive": 189375, "num_negative": 25463, - "avg_query_len": 21.440833959429, - "avg_positive_len": 868.3698006600661, - "avg_negative_len": 864.4493578918431 + "min_query_length": 1, + "avg_query_length": 21.440833959429, + "max_query_length": 151, + "min_positive_length": 1, + "avg_positive_length": 868.3698006600661, + "max_positive_length": 5545, + "avg_negative_length": 864.4493578918431 }, "es": { "num_samples": 3703, "number_of_characters": 48861389, "num_positive": 39110, "num_negative": 10183, - "avg_query_len": 20.681609505806104, - "avg_positive_len": 980.9613142418818, - "avg_negative_len": 1023.2159481488756 + "min_query_length": 3, + "avg_query_length": 20.681609505806104, + "max_query_length": 59, + "min_positive_length": 1, + "avg_positive_length": 980.9613142418818, + "max_positive_length": 8640, + "avg_negative_length": 1023.2159481488756 }, "jp": { "num_samples": 4286, "number_of_characters": 18761333, "num_positive": 42931, "num_negative": 8589, - "avg_query_len": 10.146756882874476, - "avg_positive_len": 358.35792317905475, - "avg_negative_len": 388.075445337059 + "min_query_length": 1, + "avg_query_length": 10.146756882874476, + "max_query_length": 60, + "min_positive_length": 1, + "avg_positive_length": 358.35792317905475, + "max_positive_length": 3488, + "avg_negative_length": 388.075445337059 } } } diff --git a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json index 1c5fe0f03..c14c0a0f8 100644 --- a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json +++ b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json @@ -4,153 +4,221 @@ "number_of_characters": 83866932, "num_positive": 24000, "num_negative": 192000, - "avg_query_len": 59.091208333333334, - "avg_positive_len": 385.45120833333334, - "avg_negative_len": 381.23913541666667, + "min_query_length": 7, + "avg_query_length": 59.091208333333334, + "max_query_length": 180, + "min_positive_length": 100, + "avg_positive_length": 385.45120833333334, + "max_positive_length": 3515, + "avg_negative_length": 381.23913541666667, "hf_subset_descriptive_stats": { "bg": { "num_samples": 1500, "number_of_characters": 5145316, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 60.82666666666667, - "avg_positive_len": 375.88866666666667, - "avg_negative_len": 374.18691666666666 + "min_query_length": 18, + "avg_query_length": 60.82666666666667, + "max_query_length": 166, + "min_positive_length": 100, + "avg_positive_length": 375.88866666666667, + "max_positive_length": 2241, + "avg_negative_length": 374.18691666666666 }, "bn": { "num_samples": 1500, "number_of_characters": 5390581, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 47.266666666666666, - "avg_positive_len": 394.5946666666667, - "avg_negative_len": 393.98241666666667 + "min_query_length": 7, + "avg_query_length": 47.266666666666666, + "max_query_length": 123, + "min_positive_length": 100, + "avg_positive_length": 394.5946666666667, + "max_positive_length": 2338, + "avg_negative_length": 393.98241666666667 }, "cs": { "num_samples": 1500, "number_of_characters": 5079180, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 56.272, - "avg_positive_len": 383.8446666666667, - "avg_negative_len": 368.2504166666667 + "min_query_length": 17, + "avg_query_length": 56.272, + "max_query_length": 137, + "min_positive_length": 100, + "avg_positive_length": 383.8446666666667, + "max_positive_length": 2300, + "avg_negative_length": 368.2504166666667 }, "da": { "num_samples": 1500, "number_of_characters": 4746132, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 56.75066666666667, - "avg_positive_len": 351.6813333333333, - "avg_negative_len": 344.457 + "min_query_length": 17, + "avg_query_length": 56.75066666666667, + "max_query_length": 137, + "min_positive_length": 100, + "avg_positive_length": 351.6813333333333, + "max_positive_length": 2159, + "avg_negative_length": 344.457 }, "de": { "num_samples": 1500, "number_of_characters": 5483592, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 70.004, - "avg_positive_len": 391.5366666666667, - "avg_negative_len": 399.27341666666666 + "min_query_length": 20, + "avg_query_length": 70.004, + "max_query_length": 180, + "min_positive_length": 100, + "avg_positive_length": 391.5366666666667, + "max_positive_length": 2674, + "avg_negative_length": 399.27341666666666 }, "en": { "num_samples": 1500, "number_of_characters": 6217884, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 68.372, - "avg_positive_len": 451.72866666666664, - "avg_negative_len": 453.14441666666664 + "min_query_length": 18, + "avg_query_length": 68.372, + "max_query_length": 162, + "min_positive_length": 100, + "avg_positive_length": 451.72866666666664, + "max_positive_length": 3515, + "avg_negative_length": 453.14441666666664 }, "fa": { "num_samples": 1500, "number_of_characters": 4732619, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 48.66733333333333, - "avg_positive_len": 347.704, - "avg_negative_len": 344.8385 + "min_query_length": 12, + "avg_query_length": 48.66733333333333, + "max_query_length": 119, + "min_positive_length": 100, + "avg_positive_length": 347.704, + "max_positive_length": 2571, + "avg_negative_length": 344.8385 }, "fi": { "num_samples": 1500, "number_of_characters": 5209132, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 55.343333333333334, - "avg_positive_len": 394.7126666666667, - "avg_negative_len": 377.83733333333333 + "min_query_length": 14, + "avg_query_length": 55.343333333333334, + "max_query_length": 132, + "min_positive_length": 100, + "avg_positive_length": 394.7126666666667, + "max_positive_length": 2129, + "avg_negative_length": 377.83733333333333 }, "hi": { "num_samples": 1500, "number_of_characters": 5620959, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 50.77733333333333, - "avg_positive_len": 420.3786666666667, - "avg_negative_len": 409.51875 + "min_query_length": 13, + "avg_query_length": 50.77733333333333, + "max_query_length": 125, + "min_positive_length": 100, + "avg_positive_length": 420.3786666666667, + "max_positive_length": 2361, + "avg_negative_length": 409.51875 }, "it": { "num_samples": 1500, "number_of_characters": 5420496, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 70.05466666666666, - "avg_positive_len": 396.97333333333336, - "avg_negative_len": 393.3295 + "min_query_length": 23, + "avg_query_length": 70.05466666666666, + "max_query_length": 156, + "min_positive_length": 100, + "avg_positive_length": 396.97333333333336, + "max_positive_length": 2082, + "avg_negative_length": 393.3295 }, "nl": { "num_samples": 1500, "number_of_characters": 5169556, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 65.34466666666667, - "avg_positive_len": 380.79133333333334, - "avg_negative_len": 375.02933333333334 + "min_query_length": 18, + "avg_query_length": 65.34466666666667, + "max_query_length": 136, + "min_positive_length": 100, + "avg_positive_length": 380.79133333333334, + "max_positive_length": 1864, + "avg_negative_length": 375.02933333333334 }, "pt": { "num_samples": 1500, "number_of_characters": 5474356, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 65.11933333333333, - "avg_positive_len": 404.01933333333335, - "avg_negative_len": 397.554 + "min_query_length": 18, + "avg_query_length": 65.11933333333333, + "max_query_length": 176, + "min_positive_length": 100, + "avg_positive_length": 404.01933333333335, + "max_positive_length": 3057, + "avg_negative_length": 397.554 }, "ro": { "num_samples": 1500, "number_of_characters": 4796113, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 61.973333333333336, - "avg_positive_len": 346.70866666666666, - "avg_negative_len": 348.5908333333333 + "min_query_length": 14, + "avg_query_length": 61.973333333333336, + "max_query_length": 169, + "min_positive_length": 100, + "avg_positive_length": 346.70866666666666, + "max_positive_length": 1917, + "avg_negative_length": 348.5908333333333 }, "sr": { "num_samples": 1500, "number_of_characters": 5271732, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 55.669333333333334, - "avg_positive_len": 386.34933333333333, - "avg_negative_len": 384.0586666666667 + "min_query_length": 15, + "avg_query_length": 55.669333333333334, + "max_query_length": 146, + "min_positive_length": 100, + "avg_positive_length": 386.34933333333333, + "max_positive_length": 2421, + "avg_negative_length": 384.0586666666667 }, "no": { "num_samples": 1500, "number_of_characters": 5036586, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 55.288, - "avg_positive_len": 367.72, - "avg_negative_len": 366.8395 + "min_query_length": 14, + "avg_query_length": 55.288, + "max_query_length": 129, + "min_positive_length": 100, + "avg_positive_length": 367.72, + "max_positive_length": 1450, + "avg_negative_length": 366.8395 }, "sv": { "num_samples": 1500, "number_of_characters": 5072698, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 57.73, - "avg_positive_len": 372.58733333333333, - "avg_negative_len": 368.93516666666665 + "min_query_length": 17, + "avg_query_length": 57.73, + "max_query_length": 133, + "min_positive_length": 100, + "avg_positive_length": 372.58733333333333, + "max_positive_length": 2493, + "avg_negative_length": 368.93516666666665 } } } diff --git a/mteb/descriptive_stats/Retrieval/AppsRetrieval.json b/mteb/descriptive_stats/Retrieval/AppsRetrieval.json index 8a71a1ad1..a1c1011bf 100644 --- a/mteb/descriptive_stats/Retrieval/AppsRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/AppsRetrieval.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 2245.837090504686, + "number_of_characters": 11335620, "num_samples": 12530, "num_queries": 3765, "num_documents": 8765, - "average_document_length": 0.0657169048317138, - "average_query_length": 0.4435135244766838, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 152, + "average_document_length": 717.2737022247576, + "max_document_length": 5742, + "min_query_length": 6, + "average_query_length": 1340.9604249667996, + "max_query_length": 289049, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json b/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json index fe213d96d..858d560f9 100644 --- a/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json @@ -1,3396 +1,5658 @@ { "test": { - "number_of_characters": 76.49551684802204, + "number_of_characters": 25574620, "num_samples": 521866, "num_queries": 338378, "num_documents": 183488, - "average_document_length": 1.0899895361004534e-05, - "average_query_length": 0.000220154728877238, + "min_document_length": 4, + "average_document_length": 137.38034094872688, + "max_document_length": 237, + "min_query_length": 2, + "average_query_length": 1.0845149507355678, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0000413738481817, "average_relevant_docs_per_query": 1.0000413738481817, + "max_relevant_docs_per_query": 1.0000413738481817, "hf_subset_descriptive_stats": { "acm_Arab-acm_Arab": { - "number_of_characters": 57.84, + "number_of_characters": 51232, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06204444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 102.98360655737704, + "max_document_length": 129, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "acm_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-acm_Arab": { - "number_of_characters": 57.84, + "number_of_characters": 51232, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06204444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 102.98360655737704, + "max_document_length": 129, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "afr_Latn-afr_Latn": { - "number_of_characters": 80.04555555555555, + "number_of_characters": 71217, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08671728395061729, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 143.93647540983608, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "afr_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-afr_Latn": { - "number_of_characters": 80.04555555555555, + "number_of_characters": 71217, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08671728395061729, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 143.93647540983608, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "als_Latn-als_Latn": { - "number_of_characters": 78.13555555555556, + "number_of_characters": 69498, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08459506172839507, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 140.4139344262295, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "als_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-als_Latn": { - "number_of_characters": 78.13555555555556, + "number_of_characters": 69498, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08459506172839507, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 140.4139344262295, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "amh_Ethi-amh_Ethi": { - "number_of_characters": 51.16111111111111, + "number_of_characters": 45221, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.05462345679012346, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 90.66598360655738, + "max_document_length": 100, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "amh_Ethi-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-amh_Ethi": { - "number_of_characters": 51.16111111111111, + "number_of_characters": 45221, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.05462345679012346, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 90.66598360655738, + "max_document_length": 100, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "apc_Arab-apc_Arab": { - "number_of_characters": 57.85777777777778, + "number_of_characters": 51248, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.062064197530864194, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 103.01639344262296, + "max_document_length": 134, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "apc_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-apc_Arab": { - "number_of_characters": 57.85777777777778, + "number_of_characters": 51248, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.062064197530864194, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 103.01639344262296, + "max_document_length": 134, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "arb_Arab-arb_Arab": { - "number_of_characters": 60.55, + "number_of_characters": 53671, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06505555555555555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 107.98155737704919, + "max_document_length": 134, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "arb_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-arb_Arab": { - "number_of_characters": 60.55, + "number_of_characters": 53671, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06505555555555555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 107.98155737704919, + "max_document_length": 134, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "arb_Latn-arb_Latn": { - "number_of_characters": 69.02444444444444, + "number_of_characters": 61298, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0744716049382716, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 123.61065573770492, + "max_document_length": 160, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "arb_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-arb_Latn": { - "number_of_characters": 69.02444444444444, + "number_of_characters": 61298, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0744716049382716, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 123.61065573770492, + "max_document_length": 160, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ars_Arab-ars_Arab": { - "number_of_characters": 58.43222222222222, + "number_of_characters": 51765, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06270246913580246, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 104.07581967213115, + "max_document_length": 119, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ars_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ars_Arab": { - "number_of_characters": 58.43222222222222, + "number_of_characters": 51765, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06270246913580246, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 104.07581967213115, + "max_document_length": 119, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ary_Arab-ary_Arab": { - "number_of_characters": 68.01893095768374, + "number_of_characters": 60261, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07351774048739837, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 121.48565573770492, + "max_document_length": 138, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ary_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ary_Arab": { - "number_of_characters": 68.01893095768374, + "number_of_characters": 60261, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07351774048739837, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 121.48565573770492, + "max_document_length": 138, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "arz_Arab-arz_Arab": { - "number_of_characters": 59.14111111111111, + "number_of_characters": 52403, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06349012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 105.38319672131148, + "max_document_length": 115, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "arz_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-arz_Arab": { - "number_of_characters": 59.14111111111111, + "number_of_characters": 52403, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06349012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 105.38319672131148, + "max_document_length": 115, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "asm_Beng-asm_Beng": { - "number_of_characters": 70.26, + "number_of_characters": 62410, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07584444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 4, + "average_document_length": 125.88934426229508, + "max_document_length": 158, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "asm_Beng-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-asm_Beng": { - "number_of_characters": 70.26, + "number_of_characters": 62410, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07584444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 4, + "average_document_length": 125.88934426229508, + "max_document_length": 158, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "azj_Latn-azj_Latn": { - "number_of_characters": 75.51222222222222, + "number_of_characters": 67137, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08168024691358025, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.57581967213116, + "max_document_length": 156, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "azj_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-azj_Latn": { - "number_of_characters": 75.51222222222222, + "number_of_characters": 67137, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08168024691358025, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.57581967213116, + "max_document_length": 156, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "bam_Latn-bam_Latn": { - "number_of_characters": 74.34222222222222, + "number_of_characters": 66084, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08038024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 133.41803278688525, + "max_document_length": 166, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "bam_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-bam_Latn": { - "number_of_characters": 74.34222222222222, + "number_of_characters": 66084, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08038024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 133.41803278688525, + "max_document_length": 166, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ben_Beng-ben_Beng": { - "number_of_characters": 71.48444444444445, + "number_of_characters": 63512, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07720493827160495, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 128.14754098360655, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ben_Beng-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ben_Beng": { - "number_of_characters": 71.48444444444445, + "number_of_characters": 63512, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07720493827160495, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 128.14754098360655, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ben_Latn-ben_Latn": { - "number_of_characters": 76.78777777777778, + "number_of_characters": 68285, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08309753086419754, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 137.92827868852459, + "max_document_length": 185, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ben_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ben_Latn": { - "number_of_characters": 76.78777777777778, + "number_of_characters": 68285, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08309753086419754, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 137.92827868852459, + "max_document_length": 185, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "bod_Tibt-bod_Tibt": { - "number_of_characters": 88.90222222222222, + "number_of_characters": 79188, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09655802469135802, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.2704918032787, + "max_document_length": 213, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "bod_Tibt-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-bod_Tibt": { - "number_of_characters": 88.90222222222222, + "number_of_characters": 79188, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09655802469135802, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.2704918032787, + "max_document_length": 213, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "bul_Cyrl-bul_Cyrl": { - "number_of_characters": 74.89, + "number_of_characters": 66577, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08098888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.42827868852459, + "max_document_length": 177, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "bul_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-bul_Cyrl": { - "number_of_characters": 74.89, + "number_of_characters": 66577, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08098888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.42827868852459, + "max_document_length": 177, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "cat_Latn-cat_Latn": { - "number_of_characters": 77.40666666666667, + "number_of_characters": 68842, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08378518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.06967213114754, + "max_document_length": 163, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "cat_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-cat_Latn": { - "number_of_characters": 77.40666666666667, + "number_of_characters": 68842, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08378518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.06967213114754, + "max_document_length": 163, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ceb_Latn-ceb_Latn": { - "number_of_characters": 83.19666666666667, + "number_of_characters": 74053, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09021851851851853, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 149.74795081967213, + "max_document_length": 184, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ceb_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ceb_Latn": { - "number_of_characters": 83.19666666666667, + "number_of_characters": 74053, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09021851851851853, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 149.74795081967213, + "max_document_length": 184, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ces_Latn-ces_Latn": { - "number_of_characters": 69.73333333333333, + "number_of_characters": 61936, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07525925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 124.91803278688525, + "max_document_length": 139, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ces_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ces_Latn": { - "number_of_characters": 69.73333333333333, + "number_of_characters": 61936, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07525925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 124.91803278688525, + "max_document_length": 139, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ckb_Arab-ckb_Arab": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 131.0266393442623, + "max_document_length": 178, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ckb_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ckb_Arab": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 131.0266393442623, + "max_document_length": 178, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "dan_Latn-dan_Latn": { - "number_of_characters": 74.96888888888888, + "number_of_characters": 66648, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08107654320987653, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.5737704918033, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "dan_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-dan_Latn": { - "number_of_characters": 74.96888888888888, + "number_of_characters": 66648, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08107654320987653, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.5737704918033, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "deu_Latn-deu_Latn": { - "number_of_characters": 77.32444444444444, + "number_of_characters": 68768, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08369382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 138.91803278688525, + "max_document_length": 182, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "deu_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-deu_Latn": { - "number_of_characters": 77.32444444444444, + "number_of_characters": 68768, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08369382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 138.91803278688525, + "max_document_length": 182, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ell_Grek-ell_Grek": { - "number_of_characters": 88.92666666666666, + "number_of_characters": 79210, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09658518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.3155737704918, + "max_document_length": 212, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ell_Grek-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ell_Grek": { - "number_of_characters": 88.92666666666666, + "number_of_characters": 79210, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09658518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.3155737704918, + "max_document_length": 212, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "est_Latn-est_Latn": { - "number_of_characters": 69.55888888888889, + "number_of_characters": 61779, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07506543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 124.59631147540983, + "max_document_length": 164, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "est_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-est_Latn": { - "number_of_characters": 69.55888888888889, + "number_of_characters": 61779, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07506543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 124.59631147540983, + "max_document_length": 164, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eus_Latn-eus_Latn": { - "number_of_characters": 76.44777777777777, + "number_of_characters": 67979, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08271975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 137.3012295081967, + "max_document_length": 169, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eus_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-eus_Latn": { - "number_of_characters": 76.44777777777777, + "number_of_characters": 67979, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08271975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 137.3012295081967, + "max_document_length": 169, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "fin_Latn-fin_Latn": { - "number_of_characters": 74.50888888888889, + "number_of_characters": 66234, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08056543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.72540983606558, + "max_document_length": 161, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "fin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-fin_Latn": { - "number_of_characters": 74.50888888888889, + "number_of_characters": 66234, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08056543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.72540983606558, + "max_document_length": 161, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "fra_Latn-fra_Latn": { - "number_of_characters": 92.54222222222222, + "number_of_characters": 82464, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10060246913580247, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 166.98360655737704, + "max_document_length": 204, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "fra_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-fra_Latn": { - "number_of_characters": 92.54222222222222, + "number_of_characters": 82464, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10060246913580247, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 166.98360655737704, + "max_document_length": 204, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "fuv_Latn-fuv_Latn": { - "number_of_characters": 60.42111111111111, + "number_of_characters": 53555, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06491234567901234, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 107.7438524590164, + "max_document_length": 122, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "fuv_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-fuv_Latn": { - "number_of_characters": 60.42111111111111, + "number_of_characters": 53555, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06491234567901234, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 107.7438524590164, + "max_document_length": 122, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "gaz_Latn-gaz_Latn": { - "number_of_characters": 87.93222222222222, + "number_of_characters": 78315, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09548024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 158.48155737704917, + "max_document_length": 191, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "gaz_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-gaz_Latn": { - "number_of_characters": 87.93222222222222, + "number_of_characters": 78315, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09548024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 158.48155737704917, + "max_document_length": 191, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "grn_Latn-grn_Latn": { - "number_of_characters": 77.10666666666667, + "number_of_characters": 68572, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08345185185185186, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 138.51639344262296, + "max_document_length": 161, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "grn_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-grn_Latn": { - "number_of_characters": 77.10666666666667, + "number_of_characters": 68572, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08345185185185186, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 138.51639344262296, + "max_document_length": 161, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "guj_Gujr-guj_Gujr": { - "number_of_characters": 64.25666666666666, + "number_of_characters": 57007, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06917407407407407, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 114.81762295081967, + "max_document_length": 138, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "guj_Gujr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-guj_Gujr": { - "number_of_characters": 64.25666666666666, + "number_of_characters": 57007, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06917407407407407, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 114.81762295081967, + "max_document_length": 138, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hat_Latn-hat_Latn": { - "number_of_characters": 72.64666666666666, + "number_of_characters": 64558, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07849629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.29098360655738, + "max_document_length": 179, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hat_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-hat_Latn": { - "number_of_characters": 72.64666666666666, + "number_of_characters": 64558, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07849629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.29098360655738, + "max_document_length": 179, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hau_Latn-hau_Latn": { - "number_of_characters": 87.8488888888889, + "number_of_characters": 78240, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09538765432098766, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.327868852459, + "max_document_length": 183, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hau_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-hau_Latn": { - "number_of_characters": 87.8488888888889, + "number_of_characters": 78240, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09538765432098766, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.327868852459, + "max_document_length": 183, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "heb_Hebr-heb_Hebr": { - "number_of_characters": 57.135555555555555, + "number_of_characters": 50598, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06126172839506173, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 101.68442622950819, + "max_document_length": 134, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "heb_Hebr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-heb_Hebr": { - "number_of_characters": 57.135555555555555, + "number_of_characters": 50598, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06126172839506173, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 101.68442622950819, + "max_document_length": 134, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hin_Deva-hin_Deva": { - "number_of_characters": 74.61777777777777, + "number_of_characters": 66332, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08068641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.9262295081967, + "max_document_length": 165, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hin_Deva-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-hin_Deva": { - "number_of_characters": 74.61777777777777, + "number_of_characters": 66332, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08068641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.9262295081967, + "max_document_length": 165, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hin_Latn-hin_Latn": { - "number_of_characters": 76.81222222222222, + "number_of_characters": 68307, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312469135802468, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.9733606557377, + "max_document_length": 170, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-hin_Latn": { - "number_of_characters": 76.81222222222222, + "number_of_characters": 68307, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312469135802468, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.9733606557377, + "max_document_length": 170, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hrv_Latn-hrv_Latn": { - "number_of_characters": 70.83555555555556, + "number_of_characters": 62928, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07648395061728396, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.95081967213115, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hrv_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-hrv_Latn": { - "number_of_characters": 70.83555555555556, + "number_of_characters": 62928, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07648395061728396, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.95081967213115, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hun_Latn-hun_Latn": { - "number_of_characters": 76.40555555555555, + "number_of_characters": 67941, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08267283950617284, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 137.2233606557377, + "max_document_length": 176, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hun_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-hun_Latn": { - "number_of_characters": 76.40555555555555, + "number_of_characters": 67941, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08267283950617284, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 137.2233606557377, + "max_document_length": 176, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hye_Armn-hye_Armn": { - "number_of_characters": 77.42555555555556, + "number_of_characters": 68859, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08380617283950619, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.1045081967213, + "max_document_length": 193, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hye_Armn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-hye_Armn": { - "number_of_characters": 77.42555555555556, + "number_of_characters": 68859, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08380617283950619, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.1045081967213, + "max_document_length": 193, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ibo_Latn-ibo_Latn": { - "number_of_characters": 74.51501668520578, + "number_of_characters": 66167, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08066186505584626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 19, + "average_document_length": 133.58811475409837, + "max_document_length": 156, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0011123470522802, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 1.0011123470522802 }, "ibo_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ibo_Latn": { - "number_of_characters": 74.51501668520578, + "number_of_characters": 66167, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08066186505584626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 19, + "average_document_length": 133.58811475409837, + "max_document_length": 156, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0011123470522802, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 1.0011123470522802 }, "ilo_Latn-ilo_Latn": { - "number_of_characters": 87.7611111111111, + "number_of_characters": 78161, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09529012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.16598360655738, + "max_document_length": 187, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ilo_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ilo_Latn": { - "number_of_characters": 87.7611111111111, + "number_of_characters": 78161, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09529012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.16598360655738, + "max_document_length": 187, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ind_Latn-ind_Latn": { - "number_of_characters": 84.10555555555555, + "number_of_characters": 74871, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09122839506172839, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 151.42418032786884, + "max_document_length": 207, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ind_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ind_Latn": { - "number_of_characters": 84.10555555555555, + "number_of_characters": 74871, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09122839506172839, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 151.42418032786884, + "max_document_length": 207, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "isl_Latn-isl_Latn": { - "number_of_characters": 79.27333333333333, + "number_of_characters": 70522, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08585925925925925, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 142.5122950819672, + "max_document_length": 170, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "isl_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-isl_Latn": { - "number_of_characters": 79.27333333333333, + "number_of_characters": 70522, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08585925925925925, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 142.5122950819672, + "max_document_length": 170, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ita_Latn-ita_Latn": { - "number_of_characters": 85.49777777777778, + "number_of_characters": 76124, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09277530864197532, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 153.99180327868854, + "max_document_length": 185, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ita_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ita_Latn": { - "number_of_characters": 85.49777777777778, + "number_of_characters": 76124, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09277530864197532, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 153.99180327868854, + "max_document_length": 185, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "jav_Latn-jav_Latn": { - "number_of_characters": 80.60666666666667, + "number_of_characters": 71722, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734074074074075, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 144.97131147540983, + "max_document_length": 174, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "jav_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-jav_Latn": { - "number_of_characters": 80.60666666666667, + "number_of_characters": 71722, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734074074074075, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 144.97131147540983, + "max_document_length": 174, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "jpn_Jpan-jpn_Jpan": { - "number_of_characters": 37.79, + "number_of_characters": 33187, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039766666666666665, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 66.0061475409836, + "max_document_length": 76, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "jpn_Jpan-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-jpn_Jpan": { - "number_of_characters": 37.79, + "number_of_characters": 33187, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039766666666666665, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 66.0061475409836, + "max_document_length": 76, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kac_Latn-kac_Latn": { - "number_of_characters": 100.64182424916574, + "number_of_characters": 89655, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10972394243511205, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 181.71926229508196, + "max_document_length": 195, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kac_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-kac_Latn": { - "number_of_characters": 100.64182424916574, + "number_of_characters": 89655, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10972394243511205, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 181.71926229508196, + "max_document_length": 195, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kan_Knda-kan_Knda": { - "number_of_characters": 74.13666666666667, + "number_of_characters": 65899, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08015185185185185, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.0389344262295, + "max_document_length": 165, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kan_Knda-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-kan_Knda": { - "number_of_characters": 74.13666666666667, + "number_of_characters": 65899, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08015185185185185, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.0389344262295, + "max_document_length": 165, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kat_Geor-kat_Geor": { - "number_of_characters": 76.81444444444445, + "number_of_characters": 68309, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312716049382717, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.97745901639345, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kat_Geor-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-kat_Geor": { - "number_of_characters": 76.81444444444445, + "number_of_characters": 68309, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312716049382717, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.97745901639345, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kaz_Cyrl-kaz_Cyrl": { - "number_of_characters": 72.75666666666666, + "number_of_characters": 64657, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07861851851851852, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.49385245901638, + "max_document_length": 158, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kaz_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-kaz_Cyrl": { - "number_of_characters": 72.75666666666666, + "number_of_characters": 64657, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07861851851851852, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.49385245901638, + "max_document_length": 158, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kea_Latn-kea_Latn": { - "number_of_characters": 77.94111111111111, + "number_of_characters": 69323, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08437901234567902, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.05532786885246, + "max_document_length": 183, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kea_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-kea_Latn": { - "number_of_characters": 77.94111111111111, + "number_of_characters": 69323, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08437901234567902, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.05532786885246, + "max_document_length": 183, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "khk_Cyrl-khk_Cyrl": { - "number_of_characters": 75.33444444444444, + "number_of_characters": 66977, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08148271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 135.24795081967213, + "max_document_length": 162, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "khk_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-khk_Cyrl": { - "number_of_characters": 75.33444444444444, + "number_of_characters": 66977, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08148271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 135.24795081967213, + "max_document_length": 162, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "khm_Khmr-khm_Khmr": { - "number_of_characters": 77.74888888888889, + "number_of_characters": 69150, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08416543209876542, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 139.70081967213116, + "max_document_length": 169, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "khm_Khmr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-khm_Khmr": { - "number_of_characters": 77.74888888888889, + "number_of_characters": 69150, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08416543209876542, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 139.70081967213116, + "max_document_length": 169, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kin_Latn-kin_Latn": { - "number_of_characters": 81.89655172413794, + "number_of_characters": 72803, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08887269379770626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 147.18647540983608, + "max_document_length": 194, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0011123470522802, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 1.0011123470522802 }, "kin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-kin_Latn": { - "number_of_characters": 81.89655172413794, + "number_of_characters": 72803, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08887269379770626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 147.18647540983608, + "max_document_length": 194, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0011123470522802, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 1.0011123470522802 }, "kir_Cyrl-kir_Cyrl": { - "number_of_characters": 76.42333333333333, + "number_of_characters": 67957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0826925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 137.25614754098362, + "max_document_length": 182, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kir_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-kir_Cyrl": { - "number_of_characters": 76.42333333333333, + "number_of_characters": 67957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0826925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 137.25614754098362, + "max_document_length": 182, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kor_Hang-kor_Hang": { - "number_of_characters": 37.257777777777775, + "number_of_characters": 32708, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039175308641975305, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 65.02459016393442, + "max_document_length": 88, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "kor_Hang-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-kor_Hang": { - "number_of_characters": 37.257777777777775, + "number_of_characters": 32708, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039175308641975305, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 65.02459016393442, + "max_document_length": 88, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "lao_Laoo-lao_Laoo": { - "number_of_characters": 65.31333333333333, + "number_of_characters": 57958, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07034814814814815, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 116.76639344262296, + "max_document_length": 142, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "lao_Laoo-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-lao_Laoo": { - "number_of_characters": 65.31333333333333, + "number_of_characters": 57958, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07034814814814815, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 116.76639344262296, + "max_document_length": 142, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "lin_Latn-lin_Latn": { - "number_of_characters": 83.56681514476615, + "number_of_characters": 74223, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09083164270018503, - "average_relevant_docs_per_query": 1.0022271714922049 + "min_document_length": 17, + "average_document_length": 150.09631147540983, + "max_document_length": 183, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0022271714922049, + "average_relevant_docs_per_query": 1.0022271714922049, + "max_relevant_docs_per_query": 1.0022271714922049 }, "lin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-lin_Latn": { - "number_of_characters": 83.56681514476615, + "number_of_characters": 74223, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09083164270018503, - "average_relevant_docs_per_query": 1.0022271714922049 + "min_document_length": 17, + "average_document_length": 150.09631147540983, + "max_document_length": 183, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0022271714922049, + "average_relevant_docs_per_query": 1.0022271714922049, + "max_relevant_docs_per_query": 1.0022271714922049 }, "lit_Latn-lit_Latn": { - "number_of_characters": 70.69888888888889, + "number_of_characters": 62805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0763320987654321, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 126.69877049180327, + "max_document_length": 167, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "lit_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-lit_Latn": { - "number_of_characters": 70.69888888888889, + "number_of_characters": 62805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0763320987654321, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 126.69877049180327, + "max_document_length": 167, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "lug_Latn-lug_Latn": { - "number_of_characters": 80.52057842046719, + "number_of_characters": 71566, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734213394935171, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 12, + "average_document_length": 144.6516393442623, + "max_document_length": 237, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0011123470522802, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 1.0011123470522802 }, "lug_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-lug_Latn": { - "number_of_characters": 80.52057842046719, + "number_of_characters": 71566, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734213394935171, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 12, + "average_document_length": 144.6516393442623, + "max_document_length": 237, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0011123470522802, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 1.0011123470522802 }, "luo_Latn-luo_Latn": { - "number_of_characters": 75.14333333333333, + "number_of_characters": 66805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08127037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 134.8954918032787, + "max_document_length": 178, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "luo_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-luo_Latn": { - "number_of_characters": 75.14333333333333, + "number_of_characters": 66805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08127037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 134.8954918032787, + "max_document_length": 178, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "lvs_Latn-lvs_Latn": { - "number_of_characters": 71.97888888888889, + "number_of_characters": 63957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07775432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 129.0594262295082, + "max_document_length": 172, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "lvs_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-lvs_Latn": { - "number_of_characters": 71.97888888888889, + "number_of_characters": 63957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07775432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 129.0594262295082, + "max_document_length": 172, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "mal_Mlym-mal_Mlym": { - "number_of_characters": 82.69222222222223, + "number_of_characters": 73599, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08965802469135803, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.81762295081967, + "max_document_length": 191, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "mal_Mlym-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-mal_Mlym": { - "number_of_characters": 82.69222222222223, + "number_of_characters": 73599, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08965802469135803, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.81762295081967, + "max_document_length": 191, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "mar_Deva-mar_Deva": { - "number_of_characters": 70.62625139043382, + "number_of_characters": 62671, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07633620844319669, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 15, + "average_document_length": 126.42418032786885, + "max_document_length": 160, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0011123470522802, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 1.0011123470522802 }, "mar_Deva-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-mar_Deva": { - "number_of_characters": 70.62625139043382, + "number_of_characters": 62671, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07633620844319669, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 15, + "average_document_length": 126.42418032786885, + "max_document_length": 160, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0011123470522802, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 1.0011123470522802 }, "mkd_Cyrl-mkd_Cyrl": { - "number_of_characters": 76.01333333333334, + "number_of_characters": 67588, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08223703703703704, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 136.5, + "max_document_length": 180, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "mkd_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-mkd_Cyrl": { - "number_of_characters": 76.01333333333334, + "number_of_characters": 67588, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08223703703703704, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 136.5, + "max_document_length": 180, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "mlt_Latn-mlt_Latn": { - "number_of_characters": 77.00444444444445, + "number_of_characters": 68480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08333827160493827, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 138.327868852459, + "max_document_length": 185, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "mlt_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-mlt_Latn": { - "number_of_characters": 77.00444444444445, + "number_of_characters": 68480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08333827160493827, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 138.327868852459, + "max_document_length": 185, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "mri_Latn-mri_Latn": { - "number_of_characters": 83.71444444444444, + "number_of_characters": 74519, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09079382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 150.702868852459, + "max_document_length": 185, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "mri_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-mri_Latn": { - "number_of_characters": 83.71444444444444, + "number_of_characters": 74519, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09079382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 150.702868852459, + "max_document_length": 185, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "mya_Mymr-mya_Mymr": { - "number_of_characters": 91.28333333333333, + "number_of_characters": 81331, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0992037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 164.66188524590163, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "mya_Mymr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-mya_Mymr": { - "number_of_characters": 91.28333333333333, + "number_of_characters": 81331, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0992037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 164.66188524590163, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "nld_Latn-nld_Latn": { - "number_of_characters": 77.34777777777778, + "number_of_characters": 68789, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08371975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 138.9610655737705, + "max_document_length": 183, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "nld_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-nld_Latn": { - "number_of_characters": 77.34777777777778, + "number_of_characters": 68789, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08371975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 138.9610655737705, + "max_document_length": 183, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "nob_Latn-nob_Latn": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 131.0266393442623, + "max_document_length": 168, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "nob_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-nob_Latn": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 131.0266393442623, + "max_document_length": 168, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "npi_Deva-npi_Deva": { - "number_of_characters": 68.89666666666666, + "number_of_characters": 61183, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07432962962962962, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 123.375, + "max_document_length": 154, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "npi_Deva-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-npi_Deva": { - "number_of_characters": 68.89666666666666, + "number_of_characters": 61183, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07432962962962962, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 123.375, + "max_document_length": 154, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "npi_Latn-npi_Latn": { - "number_of_characters": 73.89666666666666, + "number_of_characters": 65683, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07988518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 132.59631147540983, + "max_document_length": 154, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "npi_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-npi_Latn": { - "number_of_characters": 73.89666666666666, + "number_of_characters": 65683, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07988518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 132.59631147540983, + "max_document_length": 154, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "nso_Latn-nso_Latn": { - "number_of_characters": 88.77444444444444, + "number_of_characters": 79073, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09641604938271604, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 160.03483606557376, + "max_document_length": 235, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "nso_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-nso_Latn": { - "number_of_characters": 88.77444444444444, + "number_of_characters": 79073, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09641604938271604, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 160.03483606557376, + "max_document_length": 235, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "nya_Latn-nya_Latn": { - "number_of_characters": 92.78777777777778, + "number_of_characters": 82685, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1008753086419753, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.43647540983608, + "max_document_length": 215, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "nya_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-nya_Latn": { - "number_of_characters": 92.78777777777778, + "number_of_characters": 82685, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1008753086419753, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.43647540983608, + "max_document_length": 215, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ory_Orya-ory_Orya": { - "number_of_characters": 74.95777777777778, + "number_of_characters": 66638, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0810641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 10, + "average_document_length": 134.55327868852459, + "max_document_length": 168, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ory_Orya-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ory_Orya": { - "number_of_characters": 74.95777777777778, + "number_of_characters": 66638, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0810641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 10, + "average_document_length": 134.55327868852459, + "max_document_length": 168, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "pan_Guru-pan_Guru": { - "number_of_characters": 75.29777777777778, + "number_of_characters": 66944, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08144197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.18032786885246, + "max_document_length": 157, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "pan_Guru-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-pan_Guru": { - "number_of_characters": 75.29777777777778, + "number_of_characters": 66944, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08144197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.18032786885246, + "max_document_length": 157, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "pbt_Arab-pbt_Arab": { - "number_of_characters": 69.67111111111112, + "number_of_characters": 61880, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07519012345679013, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 124.80327868852459, + "max_document_length": 155, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "pbt_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-pbt_Arab": { - "number_of_characters": 69.67111111111112, + "number_of_characters": 61880, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07519012345679013, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 124.80327868852459, + "max_document_length": 155, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "pes_Arab-pes_Arab": { - "number_of_characters": 66.75111111111111, + "number_of_characters": 59252, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07194567901234568, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 119.41803278688525, + "max_document_length": 152, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "pes_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-pes_Arab": { - "number_of_characters": 66.75111111111111, + "number_of_characters": 59252, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07194567901234568, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 119.41803278688525, + "max_document_length": 152, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "plt_Latn-plt_Latn": { - "number_of_characters": 96.99555555555555, + "number_of_characters": 86472, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10555061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 175.19672131147541, + "max_document_length": 222, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "plt_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-plt_Latn": { - "number_of_characters": 96.99555555555555, + "number_of_characters": 86472, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10555061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 175.19672131147541, + "max_document_length": 222, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "pol_Latn-pol_Latn": { - "number_of_characters": 76.09777777777778, + "number_of_characters": 67664, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08233086419753087, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 136.65573770491804, + "max_document_length": 196, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "pol_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-pol_Latn": { - "number_of_characters": 76.09777777777778, + "number_of_characters": 67664, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08233086419753087, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 136.65573770491804, + "max_document_length": 196, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "por_Latn-por_Latn": { - "number_of_characters": 80.11666666666666, + "number_of_characters": 71281, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08679629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.06762295081967, + "max_document_length": 179, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "por_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-por_Latn": { - "number_of_characters": 80.11666666666666, + "number_of_characters": 71281, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08679629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.06762295081967, + "max_document_length": 179, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ron_Latn-ron_Latn": { - "number_of_characters": 80.74222222222222, + "number_of_characters": 71844, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08749135802469137, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 145.22131147540983, + "max_document_length": 181, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ron_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ron_Latn": { - "number_of_characters": 80.74222222222222, + "number_of_characters": 71844, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08749135802469137, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 145.22131147540983, + "max_document_length": 181, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "rus_Cyrl-rus_Cyrl": { - "number_of_characters": 85.16333333333333, + "number_of_characters": 75823, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0924037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 153.375, + "max_document_length": 196, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "rus_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-rus_Cyrl": { - "number_of_characters": 85.16333333333333, + "number_of_characters": 75823, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0924037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 153.375, + "max_document_length": 196, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "shn_Mymr-shn_Mymr": { - "number_of_characters": 77.90222222222222, + "number_of_characters": 69288, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0843358024691358, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 139.98360655737704, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "shn_Mymr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-shn_Mymr": { - "number_of_characters": 77.90222222222222, + "number_of_characters": 69288, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0843358024691358, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 139.98360655737704, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sin_Latn-sin_Latn": { - "number_of_characters": 96.46666666666667, + "number_of_characters": 85996, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10496296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 174.22131147540983, + "max_document_length": 224, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-sin_Latn": { - "number_of_characters": 96.46666666666667, + "number_of_characters": 85996, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10496296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 174.22131147540983, + "max_document_length": 224, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sin_Sinh-sin_Sinh": { - "number_of_characters": 71.91777777777777, + "number_of_characters": 63902, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07768641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 128.94672131147541, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sin_Sinh-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-sin_Sinh": { - "number_of_characters": 71.91777777777777, + "number_of_characters": 63902, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07768641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 128.94672131147541, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "slk_Latn-slk_Latn": { - "number_of_characters": 70.5411111111111, + "number_of_characters": 62663, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07615679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 126.4077868852459, + "max_document_length": 146, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "slk_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-slk_Latn": { - "number_of_characters": 70.5411111111111, + "number_of_characters": 62663, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07615679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 126.4077868852459, + "max_document_length": 146, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "slv_Latn-slv_Latn": { - "number_of_characters": 70.79888888888888, + "number_of_characters": 62895, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0764432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.88319672131148, + "max_document_length": 176, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "slv_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-slv_Latn": { - "number_of_characters": 70.79888888888888, + "number_of_characters": 62895, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0764432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.88319672131148, + "max_document_length": 176, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sna_Latn-sna_Latn": { - "number_of_characters": 83.30700778642937, + "number_of_characters": 74071, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09044161044096703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.78483606557376, + "max_document_length": 191, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sna_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-sna_Latn": { - "number_of_characters": 83.30700778642937, + "number_of_characters": 74071, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09044161044096703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.78483606557376, + "max_document_length": 191, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "snd_Arab-snd_Arab": { - "number_of_characters": 65.42333333333333, + "number_of_characters": 58057, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07047037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 116.96926229508196, + "max_document_length": 164, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "snd_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-snd_Arab": { - "number_of_characters": 65.42333333333333, + "number_of_characters": 58057, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07047037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 116.96926229508196, + "max_document_length": 164, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "som_Latn-som_Latn": { - "number_of_characters": 92.95777777777778, + "number_of_characters": 82838, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1010641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.75, + "max_document_length": 201, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "som_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-som_Latn": { - "number_of_characters": 92.95777777777778, + "number_of_characters": 82838, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1010641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.75, + "max_document_length": 201, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sot_Latn-sot_Latn": { - "number_of_characters": 85.13111111111111, + "number_of_characters": 75794, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0923679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 153.3155737704918, + "max_document_length": 186, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sot_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-sot_Latn": { - "number_of_characters": 85.13111111111111, + "number_of_characters": 75794, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0923679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 153.3155737704918, + "max_document_length": 186, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "spa_Latn-spa_Latn": { - "number_of_characters": 84.16, + "number_of_characters": 74920, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09128888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 151.52459016393442, + "max_document_length": 180, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "spa_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-spa_Latn": { - "number_of_characters": 84.16, + "number_of_characters": 74920, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09128888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 151.52459016393442, + "max_document_length": 180, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "srp_Cyrl-srp_Cyrl": { - "number_of_characters": 69.49833147942158, + "number_of_characters": 61657, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07508157005497394, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 124.34631147540983, + "max_document_length": 160, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0011123470522802, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 1.0011123470522802 }, "srp_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-srp_Cyrl": { - "number_of_characters": 69.49833147942158, + "number_of_characters": 61657, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07508157005497394, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 124.34631147540983, + "max_document_length": 160, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0011123470522802, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 1.0011123470522802 }, "ssw_Latn-ssw_Latn": { - "number_of_characters": 83.09777777777778, + "number_of_characters": 73964, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09010864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 149.5655737704918, + "max_document_length": 182, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ssw_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ssw_Latn": { - "number_of_characters": 83.09777777777778, + "number_of_characters": 73964, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09010864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 149.5655737704918, + "max_document_length": 182, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sun_Latn-sun_Latn": { - "number_of_characters": 80.16, + "number_of_characters": 71320, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08684444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 144.14754098360655, + "max_document_length": 173, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sun_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-sun_Latn": { - "number_of_characters": 80.16, + "number_of_characters": 71320, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08684444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 144.14754098360655, + "max_document_length": 173, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "swe_Latn-swe_Latn": { - "number_of_characters": 70.67666666666666, + "number_of_characters": 62785, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07630740740740741, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 126.6577868852459, + "max_document_length": 154, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "swe_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-swe_Latn": { - "number_of_characters": 70.67666666666666, + "number_of_characters": 62785, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07630740740740741, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 126.6577868852459, + "max_document_length": 154, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "swh_Latn-swh_Latn": { - "number_of_characters": 82.56, + "number_of_characters": 73480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08951111111111111, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.5737704918033, + "max_document_length": 194, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "swh_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-swh_Latn": { - "number_of_characters": 82.56, + "number_of_characters": 73480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08951111111111111, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.5737704918033, + "max_document_length": 194, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tam_Taml-tam_Taml": { - "number_of_characters": 83.12777777777778, + "number_of_characters": 73991, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09014197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.62090163934425, + "max_document_length": 181, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tam_Taml-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-tam_Taml": { - "number_of_characters": 83.12777777777778, + "number_of_characters": 73991, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09014197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.62090163934425, + "max_document_length": 181, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tel_Telu-tel_Telu": { - "number_of_characters": 74.18777777777778, + "number_of_characters": 65945, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08020864197530865, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 133.13319672131146, + "max_document_length": 149, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tel_Telu-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-tel_Telu": { - "number_of_characters": 74.18777777777778, + "number_of_characters": 65945, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08020864197530865, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 133.13319672131146, + "max_document_length": 149, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tgk_Cyrl-tgk_Cyrl": { - "number_of_characters": 76.28111111111112, + "number_of_characters": 67829, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08253456790123458, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 136.99385245901638, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tgk_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-tgk_Cyrl": { - "number_of_characters": 76.28111111111112, + "number_of_characters": 67829, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08253456790123458, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 136.99385245901638, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tgl_Latn-tgl_Latn": { - "number_of_characters": 84.34555555555555, + "number_of_characters": 75087, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09149506172839506, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 151.86680327868854, + "max_document_length": 184, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tgl_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-tgl_Latn": { - "number_of_characters": 84.34555555555555, + "number_of_characters": 75087, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09149506172839506, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 151.86680327868854, + "max_document_length": 184, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tha_Thai-tha_Thai": { - "number_of_characters": 61.46666666666667, + "number_of_characters": 54496, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06607407407407408, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 109.67213114754098, + "max_document_length": 123, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tha_Thai-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-tha_Thai": { - "number_of_characters": 61.46666666666667, + "number_of_characters": 54496, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06607407407407408, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 109.67213114754098, + "max_document_length": 123, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tir_Ethi-tir_Ethi": { - "number_of_characters": 53.99888888888889, + "number_of_characters": 47775, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.057776543209876546, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 95.89959016393442, + "max_document_length": 110, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tir_Ethi-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-tir_Ethi": { - "number_of_characters": 53.99888888888889, + "number_of_characters": 47775, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.057776543209876546, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 95.89959016393442, + "max_document_length": 110, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tsn_Latn-tsn_Latn": { - "number_of_characters": 89.12777777777778, + "number_of_characters": 79391, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09680864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 160.68647540983608, + "max_document_length": 204, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tsn_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-tsn_Latn": { - "number_of_characters": 89.12777777777778, + "number_of_characters": 79391, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09680864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 160.68647540983608, + "max_document_length": 204, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tso_Latn-tso_Latn": { - "number_of_characters": 93.69444444444444, + "number_of_characters": 83501, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10188271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 169.10860655737704, + "max_document_length": 215, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tso_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-tso_Latn": { - "number_of_characters": 93.69444444444444, + "number_of_characters": 83501, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10188271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 169.10860655737704, + "max_document_length": 215, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tur_Latn-tur_Latn": { - "number_of_characters": 73.56222222222222, + "number_of_characters": 65382, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07951358024691357, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 131.9795081967213, + "max_document_length": 158, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "tur_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-tur_Latn": { - "number_of_characters": 73.56222222222222, + "number_of_characters": 65382, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07951358024691357, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 131.9795081967213, + "max_document_length": 158, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ukr_Cyrl-ukr_Cyrl": { - "number_of_characters": 74.08222222222223, + "number_of_characters": 65850, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08009135802469136, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 132.93852459016392, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ukr_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-ukr_Cyrl": { - "number_of_characters": 74.08222222222223, + "number_of_characters": 65850, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08009135802469136, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 132.93852459016392, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "urd_Arab-urd_Arab": { - "number_of_characters": 72.52666666666667, + "number_of_characters": 64450, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07836296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 130.06967213114754, + "max_document_length": 187, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "urd_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-urd_Arab": { - "number_of_characters": 72.52666666666667, + "number_of_characters": 64450, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07836296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 130.06967213114754, + "max_document_length": 187, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "urd_Latn-urd_Latn": { - "number_of_characters": 92.07, + "number_of_characters": 82039, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10007777777777777, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 166.1127049180328, + "max_document_length": 230, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "urd_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-urd_Latn": { - "number_of_characters": 92.07, + "number_of_characters": 82039, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10007777777777777, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 166.1127049180328, + "max_document_length": 230, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "uzn_Latn-uzn_Latn": { - "number_of_characters": 79.61333333333333, + "number_of_characters": 70828, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08623703703703703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 143.13934426229508, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "uzn_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-uzn_Latn": { - "number_of_characters": 79.61333333333333, + "number_of_characters": 70828, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08623703703703703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 143.13934426229508, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "vie_Latn-vie_Latn": { - "number_of_characters": 75.05333333333333, + "number_of_characters": 66724, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08117037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 134.7295081967213, + "max_document_length": 161, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "vie_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-vie_Latn": { - "number_of_characters": 75.05333333333333, + "number_of_characters": 66724, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08117037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 134.7295081967213, + "max_document_length": 161, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "war_Latn-war_Latn": { - "number_of_characters": 88.07555555555555, + "number_of_characters": 78444, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0956395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 158.74590163934425, + "max_document_length": 207, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "war_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-war_Latn": { - "number_of_characters": 88.07555555555555, + "number_of_characters": 78444, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0956395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 158.74590163934425, + "max_document_length": 207, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "wol_Latn-wol_Latn": { - "number_of_characters": 72.60555555555555, + "number_of_characters": 64521, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07845061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 130.21516393442624, + "max_document_length": 139, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "wol_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-wol_Latn": { - "number_of_characters": 72.60555555555555, + "number_of_characters": 64521, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07845061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 130.21516393442624, + "max_document_length": 139, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "xho_Latn-xho_Latn": { - "number_of_characters": 80.50333333333333, + "number_of_characters": 71629, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08722592592592593, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.78073770491804, + "max_document_length": 179, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "xho_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-xho_Latn": { - "number_of_characters": 80.50333333333333, + "number_of_characters": 71629, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08722592592592593, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.78073770491804, + "max_document_length": 179, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "yor_Latn-yor_Latn": { - "number_of_characters": 70.64, + "number_of_characters": 62752, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07626666666666666, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 126.59016393442623, + "max_document_length": 143, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "yor_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-yor_Latn": { - "number_of_characters": 70.64, + "number_of_characters": 62752, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07626666666666666, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 126.59016393442623, + "max_document_length": 143, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "zho_Hans-zho_Hans": { - "number_of_characters": 23.747777777777777, + "number_of_characters": 20549, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.024164197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 40.10860655737705, + "max_document_length": 64, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "zho_Hans-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-zho_Hans": { - "number_of_characters": 23.747777777777777, + "number_of_characters": 20549, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.024164197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 40.10860655737705, + "max_document_length": 64, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "zho_Hant-zho_Hant": { - "number_of_characters": 23.07888888888889, + "number_of_characters": 19947, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.02342098765432099, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 38.875, + "max_document_length": 45, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "zho_Hant-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-zho_Hant": { - "number_of_characters": 23.07888888888889, + "number_of_characters": 19947, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.02342098765432099, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 38.875, + "max_document_length": 45, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "zsm_Latn-zsm_Latn": { - "number_of_characters": 80.92444444444445, + "number_of_characters": 72008, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08769382716049383, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 145.55737704918033, + "max_document_length": 210, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "zsm_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-zsm_Latn": { - "number_of_characters": 80.92444444444445, + "number_of_characters": 72008, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08769382716049383, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 145.55737704918033, + "max_document_length": 210, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "zul_Latn-zul_Latn": { - "number_of_characters": 78.0411111111111, + "number_of_characters": 69413, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08449012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.23975409836066, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "zul_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "eng_Latn-zul_Latn": { - "number_of_characters": 78.0411111111111, + "number_of_characters": 69413, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08449012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.23975409836066, + "max_document_length": 171, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "arb_Arab-arb_Latn": { - "number_of_characters": 69.02444444444444, + "number_of_characters": 61298, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0744716049382716, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 123.61065573770492, + "max_document_length": 160, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "arb_Latn-arb_Arab": { - "number_of_characters": 60.55, + "number_of_characters": 53671, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06505555555555555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 107.98155737704919, + "max_document_length": 134, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ben_Beng-ben_Latn": { - "number_of_characters": 76.78777777777778, + "number_of_characters": 68285, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08309753086419754, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 137.92827868852459, + "max_document_length": 185, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ben_Latn-ben_Beng": { - "number_of_characters": 71.48444444444445, + "number_of_characters": 63512, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07720493827160495, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 128.14754098360655, + "max_document_length": 175, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hin_Deva-hin_Latn": { - "number_of_characters": 76.81222222222222, + "number_of_characters": 68307, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312469135802468, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.9733606557377, + "max_document_length": 170, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "hin_Latn-hin_Deva": { - "number_of_characters": 74.61777777777777, + "number_of_characters": 66332, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08068641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.9262295081967, + "max_document_length": 165, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "npi_Deva-npi_Latn": { - "number_of_characters": 73.89666666666666, + "number_of_characters": 65683, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07988518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 132.59631147540983, + "max_document_length": 154, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "npi_Latn-npi_Deva": { - "number_of_characters": 68.89666666666666, + "number_of_characters": 61183, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07432962962962962, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 123.375, + "max_document_length": 154, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sin_Sinh-sin_Latn": { - "number_of_characters": 96.46666666666667, + "number_of_characters": 85996, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10496296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 174.22131147540983, + "max_document_length": 224, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "sin_Latn-sin_Sinh": { - "number_of_characters": 71.91777777777777, + "number_of_characters": 63902, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07768641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 128.94672131147541, + "max_document_length": 159, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "urd_Arab-urd_Latn": { - "number_of_characters": 92.07, + "number_of_characters": 82039, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10007777777777777, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 166.1127049180328, + "max_document_length": 230, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "urd_Latn-urd_Arab": { - "number_of_characters": 72.52666666666667, + "number_of_characters": 64450, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07836296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 130.06967213114754, + "max_document_length": 187, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } } diff --git a/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json b/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json index e9ff1f787..e4bed3000 100644 --- a/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json @@ -1,66 +1,108 @@ { "test": { - "number_of_characters": 664.7686497593272, + "number_of_characters": 36843313, "num_samples": 1056326, "num_queries": 52561, "num_documents": 1003765, - "average_document_length": 1.9924982441109223e-06, - "average_query_length": 0.012609513703303347, + "min_document_length": 54, + "average_document_length": 34.70511822986456, + "max_document_length": 334374, + "min_query_length": 2, + "average_query_length": 38.19428854093339, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 941.4041426464673, + "number_of_characters": 14574651, "num_samples": 295228, "num_queries": 14918, "num_documents": 280310, - "average_document_length": 7.134957725375477e-06, - "average_query_length": 0.0629711853228628, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 95, + "average_document_length": 49.994759373550714, + "max_document_length": 14008, + "min_query_length": 2, + "average_query_length": 37.5801045716584, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "javascript": { - "number_of_characters": 748.8343968398663, + "number_of_characters": 2587540, "num_samples": 68145, "num_queries": 3291, "num_documents": 64854, - "average_document_length": 3.0838498781879296e-05, - "average_query_length": 0.2269323600242681, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 87, + "average_document_length": 37.89792456903198, + "max_document_length": 334374, + "min_query_length": 2, + "average_query_length": 39.412944393801276, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "go": { - "number_of_characters": 405.3770007387343, + "number_of_characters": 3641108, "num_samples": 190562, "num_queries": 8122, "num_documents": 182440, - "average_document_length": 1.0962508221881167e-05, - "average_query_length": 0.049664737840277556, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 54, + "average_document_length": 17.957838193378645, + "max_document_length": 5280, + "min_query_length": 2, + "average_query_length": 44.9248953459739, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ruby": { - "number_of_characters": 457.43695479777955, + "number_of_characters": 629446, "num_samples": 28831, "num_queries": 1261, "num_documents": 27570, - "average_document_length": 7.254261878853827e-05, - "average_query_length": 0.3611712567785722, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 83, + "average_document_length": 20.830830612985128, + "max_document_length": 3992, + "min_query_length": 2, + "average_query_length": 43.72720063441713, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "java": { - "number_of_characters": 588.8922866271109, + "number_of_characters": 6791137, "num_samples": 191821, "num_queries": 10955, "num_documents": 180866, - "average_document_length": 1.1057910276116019e-05, - "average_query_length": 0.053573006538303145, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 77, + "average_document_length": 35.54789180940586, + "max_document_length": 7615, + "min_query_length": 2, + "average_query_length": 33.019808306709265, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "php": { - "number_of_characters": 578.8503639217925, + "number_of_characters": 8619431, "num_samples": 281739, "num_queries": 14014, "num_documents": 267725, - "average_document_length": 7.470352040339901e-06, - "average_query_length": 0.041162434987997175, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 94, + "average_document_length": 30.195091978709495, + "max_document_length": 4904, + "min_query_length": 2, + "average_query_length": 38.20822035107749, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json index a0325c638..247e489b7 100644 --- a/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json @@ -1,129 +1,213 @@ { "train": { - "number_of_characters": 71.98776923076923, + "number_of_characters": 935841, "num_samples": 26000, "num_queries": 13000, "num_documents": 13000, - "average_document_length": 7.692307692307693e-05, - "average_query_length": 0.005460597633136095, + "min_document_length": 18, + "average_document_length": 70.98776923076923, + "max_document_length": 2532, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 70.519, + "number_of_characters": 70519, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.06951900000000001, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 21, + "average_document_length": 69.519, + "max_document_length": 1811, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "javascript": { - "number_of_characters": 57.88, + "number_of_characters": 57880, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.05688, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 56.88, + "max_document_length": 601, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "typescript": { - "number_of_characters": 61.092, + "number_of_characters": 61092, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.060092, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 60.092, + "max_document_length": 659, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "go": { - "number_of_characters": 71.797, + "number_of_characters": 71797, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.070797, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 70.797, + "max_document_length": 1529, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ruby": { - "number_of_characters": 67.9, + "number_of_characters": 67900, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.0669, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 66.9, + "max_document_length": 751, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "java": { - "number_of_characters": 63.984, + "number_of_characters": 63984, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.062984, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 23, + "average_document_length": 62.984, + "max_document_length": 807, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "php": { - "number_of_characters": 62.927, + "number_of_characters": 62927, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.061927, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 21, + "average_document_length": 61.927, + "max_document_length": 766, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "c": { - "number_of_characters": 98.588, + "number_of_characters": 98588, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.097588, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 97.588, + "max_document_length": 1672, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "c++": { - "number_of_characters": 115.48, + "number_of_characters": 115480, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.11448, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 22, + "average_document_length": 114.48, + "max_document_length": 1856, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "rust": { - "number_of_characters": 68.503, + "number_of_characters": 68503, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.067503, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 67.503, + "max_document_length": 2532, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "swift": { - "number_of_characters": 58.279, + "number_of_characters": 58279, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.057279000000000004, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 57.279, + "max_document_length": 727, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "scala": { - "number_of_characters": 65.833, + "number_of_characters": 65833, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.064833, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 22, + "average_document_length": 64.833, + "max_document_length": 685, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "shell": { - "number_of_characters": 73.059, + "number_of_characters": 73059, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.072059, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 72.059, + "max_document_length": 813, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json b/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json index c49801192..d6d5b6cc8 100644 --- a/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json +++ b/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 5894.401984777533, + "number_of_characters": 156266302, "num_samples": 79660, "num_queries": 13277, "num_documents": 66383, - "average_document_length": 0.022127347788495202, - "average_query_length": 0.3333224566192555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 127, + "average_document_length": 885.131117906693, + "max_document_length": 32432, + "min_query_length": 2, + "average_query_length": 7344.177374406869, + "max_query_length": 9403, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json b/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json index a7e653149..2183d94b0 100644 --- a/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json +++ b/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 2246.575885305433, + "number_of_characters": 260957682, "num_samples": 187832, "num_queries": 31306, "num_documents": 156526, - "average_document_length": 0.009725743421916316, - "average_query_length": 0.02313435668710662, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 26, + "average_document_length": 144.85253568097312, + "max_document_length": 13851, + "min_query_length": 1, + "average_query_length": 7611.464064396601, + "max_query_length": 11354, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json index 96802f81d..622694ef2 100644 --- a/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json @@ -1,66 +1,108 @@ { "test": { - "number_of_characters": 390.06276516809044, + "number_of_characters": 22407915, "num_samples": 1058035, "num_queries": 52561, "num_documents": 1005474, - "average_document_length": 1.9891116030847143e-06, - "average_query_length": 0.007383093266263778, + "min_document_length": 23, + "average_document_length": 20.28592186371801, + "max_document_length": 214210, + "min_query_length": 2, + "average_query_length": 38.259317745096176, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 553.7934039415471, + "number_of_characters": 8792958, "num_samples": 295570, "num_queries": 14918, "num_documents": 280652, - "average_document_length": 7.126263130139817e-06, - "average_query_length": 0.0369884303486759, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 38, + "average_document_length": 29.330466200133973, + "max_document_length": 8326, + "min_query_length": 2, + "average_query_length": 37.62595522187961, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "javascript": { - "number_of_characters": 445.70707991491946, + "number_of_characters": 1590642, "num_samples": 68492, "num_queries": 3291, "num_documents": 65201, - "average_document_length": 3.0674376159874846e-05, - "average_query_length": 0.1348243937754237, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 40, + "average_document_length": 22.395975521847824, + "max_document_length": 214210, + "min_query_length": 2, + "average_query_length": 39.6238225463385, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "go": { - "number_of_characters": 235.76803742920464, + "number_of_characters": 2264134, "num_samples": 190857, "num_queries": 8122, "num_documents": 182735, - "average_document_length": 1.0944810791583441e-05, - "average_query_length": 0.028782077989313547, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 23, + "average_document_length": 10.390259118395491, + "max_document_length": 3589, + "min_query_length": 2, + "average_query_length": 44.99753755232701, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ruby": { - "number_of_characters": 268.8731165741475, + "number_of_characters": 391703, "num_samples": 28849, "num_queries": 1261, "num_documents": 27588, - "average_document_length": 7.24952878062926e-05, - "average_query_length": 0.21163609561788066, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 36, + "average_document_length": 12.198310859794113, + "max_document_length": 2244, + "min_query_length": 2, + "average_query_length": 43.75574940523394, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "java": { - "number_of_characters": 344.5341853035144, + "number_of_characters": 4114584, "num_samples": 192016, "num_queries": 10955, "num_documents": 181061, - "average_document_length": 1.1046001071462105e-05, - "average_query_length": 0.03126738341428703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 38, + "average_document_length": 20.724849636310413, + "max_document_length": 5066, + "min_query_length": 2, + "average_query_length": 33.055408489274306, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "php": { - "number_of_characters": 338.62194947909234, + "number_of_characters": 5253894, "num_samples": 282251, "num_queries": 14014, "num_documents": 268237, - "average_document_length": 7.456092932742314e-06, - "average_query_length": 0.024020404558234076, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 40, + "average_document_length": 17.586760961388624, + "max_document_length": 2995, + "min_query_length": 2, + "average_query_length": 38.28129013843299, + "max_query_length": 2, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json index 188d4eb7e..52a641a88 100644 --- a/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json @@ -1,66 +1,108 @@ { "test": { - "number_of_characters": 325.01233333333334, + "number_of_characters": 1950074, "num_samples": 12000, "num_queries": 6000, "num_documents": 6000, - "average_document_length": 0.00016666666666666666, - "average_query_length": 0.05400205555555556, + "min_document_length": 2, + "average_document_length": 324.01233333333334, + "max_document_length": 17533, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 467.546, + "number_of_characters": 467546, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.466546, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 8, + "average_document_length": 466.546, + "max_document_length": 8636, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "javascript": { - "number_of_characters": 187.018, + "number_of_characters": 187018, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.186018, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 2, + "average_document_length": 186.018, + "max_document_length": 7657, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "go": { - "number_of_characters": 126.213, + "number_of_characters": 126213, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.125213, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 125.213, + "max_document_length": 1501, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "ruby": { - "number_of_characters": 314.818, + "number_of_characters": 314818, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.313818, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 5, + "average_document_length": 313.818, + "max_document_length": 17533, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "java": { - "number_of_characters": 691.36, + "number_of_characters": 691360, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.69036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 2, + "average_document_length": 690.36, + "max_document_length": 6473, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 }, "php": { - "number_of_characters": 163.119, + "number_of_characters": 163119, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.162119, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 5, + "average_document_length": 162.119, + "max_document_length": 1240, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json b/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json index da6aa8174..f05ff9de5 100644 --- a/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json +++ b/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 2520.6537967751206, + "number_of_characters": 1744286, "num_samples": 1229, "num_queries": 221, "num_documents": 1008, - "average_document_length": 1.4965681295666415, - "average_query_length": 4.57969738539342, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 8, + "average_document_length": 221.90178571428572, + "max_document_length": 4147, + "min_query_length": 8, + "average_query_length": 6880.58371040724, + "max_query_length": 10852, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json b/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json index cf266671f..71715f554 100644 --- a/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json +++ b/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 3347.695751633987, + "number_of_characters": 1543912, "num_samples": 996, "num_queries": 180, "num_documents": 816, - "average_document_length": 1.8138155997693195, - "average_query_length": 10.37567901234568, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 376, + "average_document_length": 411.97549019607845, + "max_document_length": 8285, + "min_query_length": 58, + "average_query_length": 6709.666666666667, + "max_query_length": 8469, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CosQA.json b/mteb/descriptive_stats/Retrieval/CosQA.json index 5dd3a9637..76febf878 100644 --- a/mteb/descriptive_stats/Retrieval/CosQA.json +++ b/mteb/descriptive_stats/Retrieval/CosQA.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 313.946741215298, + "number_of_characters": 5728450, "num_samples": 21104, "num_queries": 500, "num_documents": 20604, - "average_document_length": 0.013450433955314403, - "average_query_length": 0.073628, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 0.8933702193748787, + "max_document_length": 83, + "min_query_length": 88, + "average_query_length": 11420.086, + "max_query_length": 6396, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json b/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json index 6498bb736..311d92130 100644 --- a/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 3799.701347237398, + "number_of_characters": 428294530, "num_samples": 115226, "num_queries": 997, "num_documents": 114229, - "average_document_length": 0.03281999517532617, - "average_query_length": 0.050858694438380335, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 0.4425671239352529, + "max_document_length": 98, + "min_query_length": 8, + "average_query_length": 429532.5737211635, + "max_query_length": 188424, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/StackOverflowQA.json b/mteb/descriptive_stats/Retrieval/StackOverflowQA.json index d0949feac..f7822563d 100644 --- a/mteb/descriptive_stats/Retrieval/StackOverflowQA.json +++ b/mteb/descriptive_stats/Retrieval/StackOverflowQA.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 2506.1079405241967, + "number_of_characters": 26584028, "num_samples": 21925, "num_queries": 1994, "num_documents": 19931, - "average_document_length": 0.060382397340162784, - "average_query_length": 0.6532730085944896, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 61, + "average_document_length": 130.32145903366614, + "max_document_length": 22234, + "min_query_length": 5, + "average_query_length": 12029.38365095286, + "max_query_length": 46028, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json b/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json index c833692b9..8c4c00d9c 100644 --- a/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json +++ b/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 210.9770886090626, + "number_of_characters": 14041553, "num_samples": 111702, "num_queries": 5851, "num_documents": 105851, - "average_document_length": 0.0012099201759594499, - "average_query_length": 0.014169514281931103, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 4.582686984534865, + "max_document_length": 281, + "min_query_length": 17, + "average_query_length": 2316.9494103572038, + "max_query_length": 762, + "min_relevant_docs_per_query": 1.0, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Touche2020.json b/mteb/descriptive_stats/Retrieval/Touche2020.json index 76798710b..7be31e6f8 100644 --- a/mteb/descriptive_stats/Retrieval/Touche2020.json +++ b/mteb/descriptive_stats/Retrieval/Touche2020.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 1763.7633372731125, + "number_of_characters": 658107591, "num_samples": 382594, "num_queries": 49, "num_documents": 382545, - "average_document_length": 0.00449707816294695, - "average_query_length": 0.8862973760932945, - "average_relevant_docs_per_query": 19.020408163265305 + "min_document_length": 16, + "average_document_length": 0.0055627442523101854, + "max_document_length": 83, + "min_query_length": 3, + "average_query_length": 13430723.734693877, + "max_query_length": 106072, + "min_relevant_docs_per_query": 19.020408163265305, + "average_relevant_docs_per_query": 19.020408163265305, + "max_relevant_docs_per_query": 19.020408163265305 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json b/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json index 3d04c572c..574cdec08 100644 --- a/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json +++ b/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json @@ -1,11 +1,17 @@ { "test": { - "number_of_characters": 2140.8203839475027, + "number_of_characters": 637047138, "num_samples": 303781, "num_queries": 49, "num_documents": 303732, - "average_document_length": 0.006905402830518125, - "average_query_length": 0.8862973760932945, - "average_relevant_docs_per_query": 34.93877551020408 + "min_document_length": 16, + "average_document_length": 0.007006176497701922, + "max_document_length": 83, + "min_query_length": 41, + "average_query_length": 13000918.57142857, + "max_query_length": 105983, + "min_relevant_docs_per_query": 34.93877551020408, + "average_relevant_docs_per_query": 34.93877551020408, + "max_relevant_docs_per_query": 34.93877551020408 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json index 6a48e9b08..112d4f5af 100644 --- a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json @@ -4,48 +4,96 @@ "num_docs": 121635, "num_queries": 123, "number_of_characters": 283654099, + "min_document_length": 74, "average_document_length": 2331.0777818884367, + "max_document_length": 24179, + "min_query_length": 32, "average_query_length": 81.8780487804878, + "max_query_length": 173, + "min_instruction_length": 93, "average_instruction_length": 389.9512195121951, + "max_instruction_length": 887, + "min_changed_instruction_length": 180, "average_changed_instruction_length": 450.5528455284553, + "max_changed_instruction_length": 974, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 10.43089430894309, + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000, "hf_subset_descriptive_stats": { "eng-fas": { "num_samples": 41229, "num_docs": 41189, "num_queries": 40, "number_of_characters": 129597567, + "min_document_length": 99, "average_document_length": 3145.4990895627475, + "max_document_length": 24179, + "min_query_length": 34, "average_query_length": 80.075, + "max_query_length": 124, + "min_instruction_length": 150, "average_instruction_length": 396.875, + "max_instruction_length": 887, + "min_changed_instruction_length": 205, "average_changed_instruction_length": 463.175, + "max_changed_instruction_length": 974, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.85, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 22, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "eng-rus": { "num_samples": 39366, "num_docs": 39326, "num_queries": 40, "number_of_characters": 109522175, + "min_document_length": 75, "average_document_length": 2784.0813456746173, + "max_document_length": 24061, + "min_query_length": 32, "average_query_length": 81.875, + "max_query_length": 173, + "min_instruction_length": 93, "average_instruction_length": 371.125, + "max_instruction_length": 887, + "min_changed_instruction_length": 180, "average_changed_instruction_length": 431.8, + "max_changed_instruction_length": 957, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 9.775, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "eng-zho": { "num_samples": 41163, "num_docs": 41120, "num_queries": 43, "number_of_characters": 44534357, + "min_document_length": 74, "average_document_length": 1082.0501215953307, + "max_document_length": 23840, + "min_query_length": 32, "average_query_length": 83.55813953488372, + "max_query_length": 159, + "min_instruction_length": 157, "average_instruction_length": 401.0232558139535, + "max_instruction_length": 731, + "min_changed_instruction_length": 209, "average_changed_instruction_length": 456.25581395348837, + "max_changed_instruction_length": 822, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.651162790697674, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 } } } diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json b/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json index 893dfde70..4d2067530 100644 --- a/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json @@ -4,48 +4,96 @@ "num_docs": 121635, "num_queries": 123, "number_of_characters": 283622456, + "min_document_length": 74, "average_document_length": 2331.0777818884367, + "max_document_length": 24179, + "min_query_length": 10, "average_query_length": 57.113821138211385, + "max_query_length": 136, + "min_instruction_length": 37, "average_instruction_length": 281.0650406504065, + "max_instruction_length": 1009, + "min_changed_instruction_length": 44, "average_changed_instruction_length": 326.9430894308943, + "max_changed_instruction_length": 1083, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 10.43089430894309, + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000, "hf_subset_descriptive_stats": { "fas": { "num_samples": 41229, "num_docs": 41189, "num_queries": 40, "number_of_characters": 129593838, + "min_document_length": 99, "average_document_length": 3145.4990895627475, + "max_document_length": 24179, + "min_query_length": 34, "average_query_length": 72.65, + "max_query_length": 124, + "min_instruction_length": 121, "average_instruction_length": 358.925, + "max_instruction_length": 759, + "min_changed_instruction_length": 163, "average_changed_instruction_length": 415.325, + "max_changed_instruction_length": 842, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.85, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 22, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "rus": { "num_samples": 39366, "num_docs": 39326, "num_queries": 40, "number_of_characters": 109523683, + "min_document_length": 75, "average_document_length": 2784.0813456746173, + "max_document_length": 24061, + "min_query_length": 26, "average_query_length": 77.5, + "max_query_length": 136, + "min_instruction_length": 78, "average_instruction_length": 387.0, + "max_instruction_length": 1009, + "min_changed_instruction_length": 187, "average_changed_instruction_length": 458.0, + "max_changed_instruction_length": 1083, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 9.775, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "zho": { "num_samples": 41163, "num_docs": 41120, "num_queries": 43, "number_of_characters": 44504935, + "min_document_length": 74, "average_document_length": 1082.0501215953307, + "max_document_length": 23840, + "min_query_length": 10, "average_query_length": 23.697674418604652, + "max_query_length": 44, + "min_instruction_length": 37, "average_instruction_length": 110.09302325581395, + "max_instruction_length": 209, + "min_changed_instruction_length": 44, "average_changed_instruction_length": 122.81395348837209, + "max_changed_instruction_length": 229, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.651162790697674, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 } } } diff --git a/mteb/descriptive_stats/STS/STS12.json b/mteb/descriptive_stats/STS/STS12.json index e9205c172..0e8410dec 100644 --- a/mteb/descriptive_stats/STS/STS12.json +++ b/mteb/descriptive_stats/STS/STS12.json @@ -2,8 +2,14 @@ "test": { "num_samples": 3108, "number_of_characters": 402118, + "min_sentence1_length": 3, "average_sentence1_len": 63.78893178893179, + "max_sentence1_length": 220, + "min_sentence2_length": 7, "average_sentence2_len": 65.5926640926641, - "avg_score": 3.5060643500643507 + "max_sentence2_length": 204, + "min_score": 0.0, + "avg_score": 3.5060643500643507, + "max_score": 5.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STS17.json b/mteb/descriptive_stats/STS/STS17.json index 164cc9d1e..131f35d40 100644 --- a/mteb/descriptive_stats/STS/STS17.json +++ b/mteb/descriptive_stats/STS/STS17.json @@ -2,86 +2,158 @@ "test": { "num_samples": 5346, "number_of_characters": 400264, + "min_sentence1_length": 6, "average_sentence1_len": 38.14665170220726, + "max_sentence1_length": 976, + "min_sentence2_length": 6, "average_sentence2_len": 36.72502805836139, + "max_sentence2_length": 1007, + "min_score": 0.0, "avg_score": 2.3554804214989464, + "max_score": 5.0, "hf_subset_descriptive_stats": { "ko-ko": { "num_samples": 2846, "number_of_characters": 183387, + "min_sentence1_length": 6, "average_sentence1_len": 31.991918482080113, + "max_sentence1_length": 976, + "min_sentence2_length": 6, "average_sentence2_len": 32.44483485593816, - "avg_score": 2.469359920356055 + "max_sentence2_length": 1007, + "min_score": 0.0, + "avg_score": 2.469359920356055, + "max_score": 5.0 }, "ar-ar": { "num_samples": 250, "number_of_characters": 16247, + "min_sentence1_length": 11, "average_sentence1_len": 32.208, + "max_sentence1_length": 99, + "min_sentence2_length": 9, "average_sentence2_len": 32.78, - "avg_score": 2.216800000000001 + "max_sentence2_length": 83, + "min_score": 0.0, + "avg_score": 2.216800000000001, + "max_score": 5.0 }, "en-ar": { "num_samples": 250, "number_of_characters": 18764, + "min_sentence1_length": 13, "average_sentence1_len": 42.36, + "max_sentence1_length": 105, + "min_sentence2_length": 10, "average_sentence2_len": 32.696, - "avg_score": 2.1423999999999994 + "max_sentence2_length": 104, + "min_score": 0.0, + "avg_score": 2.1423999999999994, + "max_score": 5.0 }, "en-de": { "num_samples": 250, "number_of_characters": 22177, + "min_sentence1_length": 12, "average_sentence1_len": 43.952, + "max_sentence1_length": 94, + "min_sentence2_length": 15, "average_sentence2_len": 44.756, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 104, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "en-en": { "num_samples": 250, "number_of_characters": 21669, + "min_sentence1_length": 12, "average_sentence1_len": 43.952, + "max_sentence1_length": 94, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "en-tr": { "num_samples": 250, "number_of_characters": 20879, + "min_sentence1_length": 15, "average_sentence1_len": 41.916, + "max_sentence1_length": 101, + "min_sentence2_length": 10, "average_sentence2_len": 41.6, - "avg_score": 2.1335999999999986 + "max_sentence2_length": 107, + "min_score": 0.0, + "avg_score": 2.1335999999999986, + "max_score": 5.0 }, "es-en": { "num_samples": 250, "number_of_characters": 23216, + "min_sentence1_length": 12, "average_sentence1_len": 50.84, + "max_sentence1_length": 160, + "min_sentence2_length": 14, "average_sentence2_len": 42.024, - "avg_score": 2.1464000000000003 + "max_sentence2_length": 117, + "min_score": 0.0, + "avg_score": 2.1464000000000003, + "max_score": 5.0 }, "es-es": { "num_samples": 250, "number_of_characters": 25265, + "min_sentence1_length": 18, "average_sentence1_len": 49.836, + "max_sentence1_length": 136, + "min_sentence2_length": 13, "average_sentence2_len": 51.224, - "avg_score": 2.2312000000000007 + "max_sentence2_length": 129, + "min_score": 0.0, + "avg_score": 2.2312000000000007, + "max_score": 5.0 }, "fr-en": { "num_samples": 250, "number_of_characters": 23087, + "min_sentence1_length": 19, "average_sentence1_len": 49.624, + "max_sentence1_length": 115, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "it-en": { "num_samples": 250, "number_of_characters": 23188, + "min_sentence1_length": 15, "average_sentence1_len": 50.028, + "max_sentence1_length": 113, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "nl-en": { "num_samples": 250, "number_of_characters": 22385, + "min_sentence1_length": 14, "average_sentence1_len": 46.816, + "max_sentence1_length": 123, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 } } } diff --git a/mteb/descriptive_stats/Summarization/SummEval.json b/mteb/descriptive_stats/Summarization/SummEval.json index e9c0b172e..a705e47fb 100644 --- a/mteb/descriptive_stats/Summarization/SummEval.json +++ b/mteb/descriptive_stats/Summarization/SummEval.json @@ -2,9 +2,51 @@ "test": { "num_samples": 100, "number_of_characters": 212735, - "avg_text_len": 2100.35, - "avg_human_summaries_len": 11.0, - "avg_machine_summaries_len": 16.0, - "avg_relevance": 3.7770833333333336 + "min_text_length": 626, + "avg_text_length": 2100.35, + "max_text_length": 3153, + "min_human_summaries_length": 11, + "avg_human_summaries_length": 11.0, + "max_human_summaries_length": 11, + "min_machine_summaries_length": 16, + "avg_machine_summaries_length": 16.0, + "max_machine_summaries_length": 16, + "min_relevance": [ + 1.0, + 1.3333333333333333, + 3.6666666666666665, + 2.3333333333333335, + 3.6666666666666665, + 3.0, + 4.333333333333333, + 4.0, + 2.6666666666666665, + 4.0, + 2.0, + 4.666666666666667, + 4.333333333333333, + 1.0, + 2.0, + 1.0 + ], + "avg_relevance": 3.7770833333333336, + "max_relevance": [ + 5.0, + 4.666666666666667, + 4.333333333333333, + 2.6666666666666665, + 4.666666666666667, + 4.666666666666667, + 4.666666666666667, + 4.333333333333333, + 4.0, + 4.333333333333333, + 4.666666666666667, + 4.666666666666667, + 4.333333333333333, + 2.3333333333333335, + 4.666666666666667, + 4.666666666666667 + ] } } \ No newline at end of file From 966e6da3a3f162f2c55a081b075108f940c91e99 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Mon, 11 Nov 2024 12:19:56 +0300 Subject: [PATCH 2/3] add more stat --- mteb/abstasks/AbsTaskBitextMining.py | 17 +- mteb/abstasks/AbsTaskClassification.py | 5 + mteb/abstasks/AbsTaskClustering.py | 12 + mteb/abstasks/AbsTaskClusteringFast.py | 6 + mteb/abstasks/AbsTaskInstructionRetrieval.py | 38 +- .../AbsTaskMultilabelClassification.py | 13 +- mteb/abstasks/AbsTaskPairClassification.py | 15 + mteb/abstasks/AbsTaskReranking.py | 20 + mteb/abstasks/AbsTaskRetrieval.py | 37 +- mteb/abstasks/AbsTaskSTS.py | 13 + mteb/abstasks/AbsTaskSummarization.py | 31 +- .../BitextMining/BUCC.v2.json | 59 + .../BitextMining/BibleNLPBitextMining.json | 18231 +++++++++++++ .../BitextMining/BornholmBitextMining.json | 7 +- .../BitextMining/IN22ConvBitextMining.json | 3549 ++- .../BitextMining/IN22GenBitextMining.json | 5581 ++++ .../BitextMining/IWSLT2017BitextMining.json | 279 + .../IndicGenBenchFloresBitextMining.json | 1304 + .../BitextMining/NTREXBitextMining.json | 21091 ++++++++++++++++ .../BitextMining/NollySentiBitextMining.json | 59 + .../NorwegianCourtsBitextMining.json | 13 + .../NusaTranslationBitextMining.json | 84 +- .../BitextMining/PhincBitextMining.json | 14 +- .../TbilisiCityHallBitextMining.json | 37 + .../BitextMining/VieMedEVBitextMining.json | 13 + .../LanguageClassification.json | 1 + .../descriptive_stats/Retrieval/NFCorpus.json | 11 + 27 files changed, 49480 insertions(+), 1060 deletions(-) create mode 100644 mteb/descriptive_stats/BitextMining/BUCC.v2.json create mode 100644 mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/NTREXBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json create mode 100644 mteb/descriptive_stats/Retrieval/NFCorpus.json diff --git a/mteb/abstasks/AbsTaskBitextMining.py b/mteb/abstasks/AbsTaskBitextMining.py index 68d20aea7..eff2c663d 100644 --- a/mteb/abstasks/AbsTaskBitextMining.py +++ b/mteb/abstasks/AbsTaskBitextMining.py @@ -21,10 +21,12 @@ class BitextDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + unique_pairs: Number of duplicate pairs min_sentence1_length: Minimum length of sentence1 average_sentence1_length: Average length of sentence1 max_sentence1_length: Maximum length of sentence1 + unique_sentence1: Number of duplicates in sentence1 min_sentence2_length: Minimum length of sentence2 average_sentence2_length: Average length of sentence2 @@ -33,14 +35,17 @@ class BitextDescriptiveStatistics(DescriptiveStatistics): num_samples: int number_of_characters: int + unique_pairs: int min_sentence1_length: int average_sentence1_length: float max_sentence1_length: int + unique_sentence1: int min_sentence2_length: int average_sentence2_length: float max_sentence2_length: int + unique_sentence2: int class AbsTaskBitextMining(AbsTask): @@ -170,13 +175,21 @@ def _calculate_metrics_from_split( total_s1_len = sum(s1_len) total_s2_len = sum(s2_len) + unique_pairs = len(set(zip(sentence1, sentence2))) + unique_sentence1 = len(set(sentence1)) + unique_sentence2 = len(set(sentence2)) return BitextDescriptiveStatistics( + num_samples=len(sentence1), + number_of_characters=total_s1_len + total_s2_len, + unique_pairs=unique_pairs, + min_sentence1_length=min(s1_len), average_sentence1_length=sum(s1_len) / len(sentence1), max_sentence1_length=max(s1_len), + unique_sentence1=unique_sentence1, + min_sentence2_length=min(s2_len), average_sentence2_length=total_s2_len / len(sentence2), max_sentence2_length=max(s2_len), - num_samples=len(sentence1), - number_of_characters=total_s1_len + total_s2_len, + unique_sentence2=unique_sentence2, ) diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index 7c8556004..6dd6903be 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -26,9 +26,12 @@ class ClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_text_length: Minimum length of text average_text_length: Average length of text max_text_length: Maximum length of text + unique_text: Number of unique texts + unique_labels: Number of unique labels labels: dict of label frequencies """ @@ -38,6 +41,7 @@ class ClassificationDescriptiveStatistics(DescriptiveStatistics): min_text_length: int average_text_length: float max_text_length: int + unique_text: int unique_labels: int labels: dict[str, dict[str, int]] @@ -232,6 +236,7 @@ def _calculate_metrics_from_split( min_text_length=min(text_len), average_text_length=total_text_len / len(text), max_text_length=max(text_len), + unique_text=len(set(text)), unique_labels=len(label_count), labels={ str(label): {"count": count} for label, count in label_count.items() diff --git a/mteb/abstasks/AbsTaskClustering.py b/mteb/abstasks/AbsTaskClustering.py index bc91081a2..6ce3aa57c 100644 --- a/mteb/abstasks/AbsTaskClustering.py +++ b/mteb/abstasks/AbsTaskClustering.py @@ -24,10 +24,15 @@ class ClusteringDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_text_length: Minimum length of text average_text_length: Average length of text max_text_length: Maximum length of text + unique_texts: Number of unique texts + + min_labels_per_text: Minimum number of labels per text average_labels_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text unique_labels: Number of unique labels labels: dict of label frequencies """ @@ -38,6 +43,7 @@ class ClusteringDescriptiveStatistics(DescriptiveStatistics): min_text_length: int average_text_length: float max_text_length: int + unique_texts: int min_labels_per_text: int average_labels_per_text: float @@ -106,6 +112,9 @@ def _calculate_metrics_from_split( labels = self.dataset[split]["labels"] text_len = [len(t) for t in sentences] + all_sentences = [] + for s in sentences: + all_sentences.extend(s) total_text_len = sum(text_len) total_labels = [] for label in labels: @@ -117,9 +126,12 @@ def _calculate_metrics_from_split( return ClusteringDescriptiveStatistics( num_samples=len(sentences), number_of_characters=total_text_len, + min_text_length=min(text_len), average_text_length=total_text_len / len(sentences), max_text_length=max(text_len), + unique_texts=len(set(all_sentences)), + min_labels_per_text=min(label_counter.values()), average_labels_per_text=len(total_labels) / len(sentences), max_labels_per_text=max(label_counter.values()), diff --git a/mteb/abstasks/AbsTaskClusteringFast.py b/mteb/abstasks/AbsTaskClusteringFast.py index 9f462358f..40e36d29e 100644 --- a/mteb/abstasks/AbsTaskClusteringFast.py +++ b/mteb/abstasks/AbsTaskClusteringFast.py @@ -85,9 +85,12 @@ class ClusteringFastDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_text_length: Minimum length of text average_text_length: Average length of text max_text_length: Maximum length of text + unique_texts: Number of unique texts + min_labels_per_text: Minimum number of labels per text average_labels_per_text: Average number of labels per text max_labels_per_text: Maximum number of labels per text @@ -97,9 +100,12 @@ class ClusteringFastDescriptiveStatistics(DescriptiveStatistics): num_samples: int number_of_characters: int + min_text_length: int average_text_length: float max_text_length: int + unique_texts: int + min_labels_per_text: int average_labels_per_text: float max_labels_per_text: int diff --git a/mteb/abstasks/AbsTaskInstructionRetrieval.py b/mteb/abstasks/AbsTaskInstructionRetrieval.py index e3257974a..bc1f129d5 100644 --- a/mteb/abstasks/AbsTaskInstructionRetrieval.py +++ b/mteb/abstasks/AbsTaskInstructionRetrieval.py @@ -230,21 +230,31 @@ class InstructionRetrievalDescriptiveStatistics(DescriptiveStatistics): num_queries: Number of queries num_docs: Number of documents number_of_characters: Total number of symbols in the dataset + min_document_length: Minimum length of documents average_document_length: Average length of documents max_document_length: Maximum length of documents + unique_docs: Number of unique documents + min_query_length: Minimum length of queries average_query_length: Average length of queries max_query_length: Maximum length of queries + unique_queries: Number of unique queries + min_instruction_length: Minimum length of instructions average_instruction_length: Average length of instructions max_instruction_length: Maximum length of instructions + unique_instructions: Number of unique instructions + min_changed_instruction_length: Minimum length of changed instructions average_changed_instruction_length: Average length of changed instructions max_changed_instruction_length: Maximum length of changed instructions + unique_changed_instructions: Number of unique changed instructions + min_average_relevant_docs_per_query: Minimum number of relevant docs per query average_relevant_docs_per_query: Average number of relevant docs per query max_average_relevant_docs_per_query: Maximum number of relevant docs per query + min_average_top_ranked_per_query: Minimum number of top ranked docs per query average_top_ranked_per_query: Average number of top ranked docs per query max_average_top_ranked_per_query: Maximum number of top ranked docs per query @@ -254,21 +264,31 @@ class InstructionRetrievalDescriptiveStatistics(DescriptiveStatistics): num_queries: int num_docs: int number_of_characters: int + min_document_length: int average_document_length: float max_document_length: int + unique_docs: int + min_query_length: int average_query_length: float max_query_length: int + unique_queries: int + min_instruction_length: int average_instruction_length: float max_instruction_length: int + unique_instructions: int + min_changed_instruction_length: int average_changed_instruction_length: float max_changed_instruction_length: int + unique_changed_instructions: int + min_average_relevant_docs_per_query: float average_relevant_docs_per_query: float max_average_relevant_docs_per_query: float + min_average_top_ranked_per_query: float average_top_ranked_per_query: float max_average_top_ranked_per_query: float @@ -692,10 +712,10 @@ def _calculate_metrics_from_split( changed_instructions = self.changed_instructions[split] top_ranked = self.top_ranked[split] - corpus_len = [len(doc.get("title", "")) + len(doc["text"]) for doc in corpus.values()] - total_corpus_len = sum( - corpus_len - ) + corpus_combined = [doc.get("title", "") + doc["text"] for doc in corpus.values()] + corpus_len = [len(doc) for doc in corpus_combined] + total_corpus_len = sum(corpus_len) + queries_len = [len(query) for query in queries.values()] total_queries_len = sum(queries_len) instructions_len = [len(instruction) for instruction in og_instructions.values()] @@ -725,29 +745,39 @@ def _calculate_metrics_from_split( + total_queries_len + total_instructions_len + total_changed_instructions_len, + min_document_length=min(corpus_len), average_document_length=( total_corpus_len / len(corpus) if len(corpus) else 0 ), max_document_length=max(corpus_len), + unique_docs=len(set(corpus_combined)), + min_query_length=min(queries_len), average_query_length=( total_queries_len / len(queries) if len(queries) else 0 ), max_query_length=max(queries_len), + unique_queries=len(set(queries.values())), + min_instruction_length=min(instructions_len), average_instruction_length=( total_instructions_len / len(queries) if len(queries) else 0 ), max_instruction_length=max(instructions_len), + unique_instructions=len(set(og_instructions.values())), + min_changed_instruction_length=min(changed_instructions_len), average_changed_instruction_length=( total_changed_instructions_len / len(queries) if len(queries) else 0 ), max_changed_instruction_length=max(changed_instructions_len), + unique_changed_instructions=len(set(changed_instructions.values())), + min_average_relevant_docs_per_query=min(qrels_non_zero), average_relevant_docs_per_query=qrels_per_doc, max_average_relevant_docs_per_query=max(qrels_non_zero), + min_average_top_ranked_per_query=min(ranked_per_query), average_top_ranked_per_query=top_ranked_per_query, max_average_top_ranked_per_query=max(ranked_per_query), diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index b67c54474..2ae6ae6cf 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -47,9 +47,12 @@ class MultilabelClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_text_length: Minimum length of text average_text_length: Average length of text max_text_length: Maximum length of text + unique_texts: Number of unique texts + min_labels_per_text: Minimum number of labels per text average_label_per_text: Average number of labels per text max_labels_per_text: Maximum number of labels per text @@ -59,9 +62,12 @@ class MultilabelClassificationDescriptiveStatistics(DescriptiveStatistics): num_samples: int number_of_characters: int + min_text_length: int average_text_length: float max_text_length: int + unique_texts: int + min_labels_per_text: int average_label_per_text: float max_labels_per_text: int @@ -259,14 +265,17 @@ def _calculate_metrics_from_split( total_labels.extend(l if len(l) > 0 else [None]) label_count = Counter(total_labels) return MultilabelClassificationDescriptiveStatistics( + num_samples=len(text), + number_of_characters=total_text_len, + min_text_length=min(text_len), average_text_length=total_text_len / len(text), max_text_length=max(text_len), - number_of_characters=total_text_len, + unique_texts=len(set(text)), + min_labels_per_text=min(label_len), average_label_per_text=total_label_len / len(label), max_labels_per_text=max(label_len), - num_samples=len(text), unique_labels=len(label_count), labels={ str(label): { diff --git a/mteb/abstasks/AbsTaskPairClassification.py b/mteb/abstasks/AbsTaskPairClassification.py index 7f140d5ee..a6ff94fe1 100644 --- a/mteb/abstasks/AbsTaskPairClassification.py +++ b/mteb/abstasks/AbsTaskPairClassification.py @@ -20,24 +20,34 @@ class PairClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_sentence1_length: Minimum length of sentence1 avg_sentence1_length: Average length of sentence1 max_sentence1_length: Maximum length of sentence1 + unique_sentence1: Number of unique sentence + min_sentence2_length: Minimum length of sentence2 avg_sentence2_length: Average length of sentence2 max_sentence2_length: Maximum length of sentence2 + unique_sentence2: Number of unique sentence + unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + min_sentence1_length: int avg_sentence1_length: float max_sentence1_length: int + unique_sentence1: int + min_sentence2_length: int avg_sentence2_length: float max_sentence2_length: int + unique_sentence2: int + unique_labels: int labels: dict[str, dict[str, int]] @@ -125,12 +135,17 @@ def _calculate_metrics_from_split( return PairClassificationDescriptiveStatistics( num_samples=len(sentence1), number_of_characters=total_sentence1_len + total_sentence2_len, + min_sentence1_length=min(sentence1_len), avg_sentence1_length=total_sentence1_len / len(sentence1), max_sentence1_length=max(sentence1_len), + unique_sentence1=len(set(sentence1)), + min_sentence2_length=min(sentence2_len), avg_sentence2_length=total_sentence2_len / len(sentence2), max_sentence2_length=max(sentence2_len), + unique_sentence2=len(set(sentence2)), + unique_labels=len(set(labels)), labels={ str(label): {"count": count} for label, count in label_count.items() diff --git a/mteb/abstasks/AbsTaskReranking.py b/mteb/abstasks/AbsTaskReranking.py index 9f2b40b3c..a22aa1951 100644 --- a/mteb/abstasks/AbsTaskReranking.py +++ b/mteb/abstasks/AbsTaskReranking.py @@ -20,30 +20,42 @@ class RerankingDescriptiveStatistics(DescriptiveStatistics): number_of_characters: Total number of symbols in the dataset. num_positive: Number of positive examples num_negative: Number of negative examples + min_query_length: Minimum length of queries avg_query_length: Average length of queries max_query_length: Maximum length of queries + unique_query: Number of unique queries + min_positive_length: Minimum length of positive examples avg_positive_length: Average length of positive examples max_positive_length: Maximum length of positive examples + unique_positive: Number of unique positive examples + min_negative_length: Minimum length of negative examples avg_negative_length: Average length of negative examples max_negative_length: Maximum length of negative examples + unique_negative: Number of unique negative examples """ num_samples: int number_of_characters: int num_positive: int num_negative: int + min_query_length: int avg_query_length: float max_query_length: int + unique_query: int + min_positive_length: int avg_positive_length: float max_positive_length: int + unique_positive: int + min_negative_length: int avg_negative_length: float max_negative_length: int + unique_negative: int class AbsTaskReranking(AbsTask): @@ -123,13 +135,21 @@ def _calculate_metrics_from_split( + total_len_negative, num_positive=len(positive), num_negative=len(negative), + min_query_length=min(len_query), avg_query_length=total_len_query / len(query), max_query_length=max(len_query), + unique_query=len(set(query)), + min_positive_length=min(len_positive), avg_positive_length=total_len_positive / len(positive), max_positive_length=max(len_positive), + unique_positive=len(set(positive)), + + min_negative_length=min(len_negative), avg_negative_length=total_len_negative / len(negative), + max_negative_length=max(len_negative), + unique_negative=len(set(negative)), ) diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index 18300b0c4..c0371da7f 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -206,30 +206,42 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): num_queries: number of queries in the dataset num_documents: Number of documents number_of_characters: Total number of symbols in the dataset + min_document_length: Minimum length of documents average_document_length: Average length of documents max_document_length: Maximum length of documents + unique_documents: Number of unique documents + min_query_length: Minimum length of queries average_query_length: Average length of queries max_query_length: Maximum length of queries + unique_queries: Number of unique queries + min_relevant_docs_per_query: Minimum number of relevant documents per query average_relevant_docs_per_query: Average number of relevant documents per query max_relevant_docs_per_query: Maximum number of relevant documents per query + unique_relevant_docs: Number of unique relevant documents """ num_samples: int num_queries: int num_documents: int number_of_characters: int + min_document_length: int average_document_length: float max_document_length: int + unique_documents: int + min_query_length: int average_query_length: float max_query_length: int + unique_queries: int + min_relevant_docs_per_query: int average_relevant_docs_per_query: float max_relevant_docs_per_query: int + unique_relevant_docs: int class AbsTaskRetrieval(AbsTask): @@ -448,26 +460,37 @@ def _calculate_metrics_from_split( num_documents = len(corpus) num_queries = len(queries) - # number of qrels that are not 0 - num_qrels_non_zero = sum( - sum(1 for doc_id in docs if docs[doc_id] != 0) - for docs in relevant_docs.values() + # create a list of number of relevant docs per query + qrels_lengths = [ + len(relevant_docs[qid]) for qid in relevant_docs if qid in queries + ] + num_qrels = sum( + qrels_lengths ) - qrels_per_doc = num_qrels_non_zero / len(relevant_docs) if num_queries else 0 + qrels_per_doc = num_qrels / len(relevant_docs) if num_queries else 0 + unique_qrels = len(set( + [doc for qid in relevant_docs for doc in relevant_docs[qid]] + )) return RetrievalDescriptiveStatistics( number_of_characters=sum(query_len) + sum(doc_len), num_samples=num_documents + num_queries, num_queries=num_queries, num_documents=num_documents, + min_document_length=min(doc_len), average_document_length=sum(doc_len) / num_documents, max_document_length=max(doc_len), + unique_documents=len(set(corpus)), + min_query_length=min(query_len), average_query_length=sum(query_len) / num_queries, max_query_length=max(query_len), - min_relevant_docs_per_query=qrels_per_doc, + unique_queries=len(set(queries)), + + min_relevant_docs_per_query=min(qrels_lengths), average_relevant_docs_per_query=qrels_per_doc, - max_relevant_docs_per_query=qrels_per_doc, + max_relevant_docs_per_query=max(qrels_lengths), + unique_relevant_docs=unique_qrels, ) diff --git a/mteb/abstasks/AbsTaskSTS.py b/mteb/abstasks/AbsTaskSTS.py index 88a0df87e..1c2cf0aac 100644 --- a/mteb/abstasks/AbsTaskSTS.py +++ b/mteb/abstasks/AbsTaskSTS.py @@ -17,12 +17,15 @@ class STSDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_sentence1_length: Minimum length of sentence1 average_sentence1_len: Average length of sentence1 max_sentence1_length: Maximum length of sentence1 + min_sentence2_length: Minimum length of sentence2 average_sentence2_len: Average length of sentence2 max_sentence2_length: Maximum length of sentence2 + min_score: Minimum score avg_score: Average score max_score: Maximum score @@ -30,12 +33,17 @@ class STSDescriptiveStatistics(DescriptiveStatistics): num_samples: int number_of_characters: int + min_sentence1_length: int average_sentence1_len: float max_sentence1_length: int + unique_sentence1: int + min_sentence2_length: int average_sentence2_len: float max_sentence2_length: int + unique_sentence2: int + min_score: float avg_score: float max_score: float @@ -113,12 +121,17 @@ def _calculate_metrics_from_split( return STSDescriptiveStatistics( num_samples=len(sentence1), number_of_characters=total_sentence1_len + total_sentence2_len, + min_sentence1_length=min(sentence1_len), average_sentence1_len=total_sentence1_len / len(sentence1), max_sentence1_length=max(sentence1_len), + unique_sentence1=len(set(sentence1)), + min_sentence2_length=min(sentence2_len), average_sentence2_len=total_sentence2_len / len(sentence2), max_sentence2_length=max(sentence2_len), + unique_sentence2=len(set(sentence2)), + min_score=min(score), avg_score=avg_score, max_score=max(score), diff --git a/mteb/abstasks/AbsTaskSummarization.py b/mteb/abstasks/AbsTaskSummarization.py index 9cc705f13..40a912637 100644 --- a/mteb/abstasks/AbsTaskSummarization.py +++ b/mteb/abstasks/AbsTaskSummarization.py @@ -21,15 +21,22 @@ class SummarizationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + min_text_length: Minimum length of text avg_text_length: Average length of text max_text_length: Maximum length of text + unique_texts: Number of unique texts + min_human_summaries_length: Minimum length of human summaries avg_human_summaries_length: Average length of human summaries max_human_summaries_length: Maximum length of human summaries + unique_human_summaries: Number of unique human summaries + min_machine_summaries_length: Minimum length of machine summaries avg_machine_summaries_length: Average length of machine summaries - max_machine_summaries_length: Maximum length of machine + max_machine_summaries_length: Maximum length of machine summaries + unique_machine_summaries: Number of unique machine summaries + min_relevance: Minimum relevance score avg_relevance: Average relevance score max_relevance: Maximum relevance score @@ -37,15 +44,22 @@ class SummarizationDescriptiveStatistics(DescriptiveStatistics): num_samples: int number_of_characters: int + min_text_length: int avg_text_length: float max_text_length: int + unique_texts: int + min_human_summaries_length: int avg_human_summaries_length: float max_human_summaries_length: int + unique_human_summaries: int + min_machine_summaries_length: int avg_machine_summaries_length: float max_machine_summaries_length: int + unique_machine_summaries: int + min_relevance: float avg_relevance: float max_relevance: float @@ -128,6 +142,14 @@ def _calculate_metrics_from_split( machine_summaries = self.dataset[split]["machine_summaries"] relevance = self.dataset[split]["relevance"] + all_human_summaries = [] + for s in human_summaries: + all_human_summaries.extend(s) + + all_machine_summaries = [] + for s in machine_summaries: + all_machine_summaries.extend(s) + text_len = [len(t) for t in text] total_text_len = sum(text_len) human_summaries_len = [len(s) for s in human_summaries] @@ -140,15 +162,22 @@ def _calculate_metrics_from_split( number_of_characters=total_text_len + total_human_summaries_len + total_machine_summaries_len, + min_text_length=min(text_len), avg_text_length=total_text_len / len(text), max_text_length=max(text_len), + unique_texts=len(set(text)), + min_human_summaries_length=min(human_summaries_len), avg_human_summaries_length=total_human_summaries_len / len(text), max_human_summaries_length=max(human_summaries_len), + unique_human_summaries=len(set(all_human_summaries)), + min_machine_summaries_length=min(machine_summaries_len), avg_machine_summaries_length=total_machine_summaries_len / len(text), max_machine_summaries_length=max(machine_summaries_len), + unique_machine_summaries=len(set(all_machine_summaries)), + min_relevance=min(relevance), avg_relevance=total_relevance / len(relevance), max_relevance=max(relevance), diff --git a/mteb/descriptive_stats/BitextMining/BUCC.v2.json b/mteb/descriptive_stats/BitextMining/BUCC.v2.json new file mode 100644 index 000000000..983ed7ca8 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/BUCC.v2.json @@ -0,0 +1,59 @@ +{ + "test": { + "min_sentence1_length": 16, + "average_sentence1_length": 99.10931428571429, + "max_sentence1_length": 204, + "min_sentence2_length": 42, + "average_sentence2_length": 90.60588571428572, + "max_sentence2_length": 159, + "num_samples": 35000, + "num_samples_sentence2": 35000, + "number_of_characters": 6640032, + "hf_subset_descriptive_stats": { + "de-en": { + "min_sentence1_length": 50, + "average_sentence1_length": 109.07974947807934, + "max_sentence1_length": 204, + "min_sentence2_length": 46, + "average_sentence2_length": 91.25396659707724, + "max_sentence2_length": 155, + "num_samples": 9580, + "num_samples_sentence2": 9580, + "number_of_characters": 1919197 + }, + "fr-en": { + "min_sentence1_length": 43, + "average_sentence1_length": 99.31785163988553, + "max_sentence1_length": 174, + "min_sentence2_length": 42, + "average_sentence2_length": 85.3117983711204, + "max_sentence2_length": 159, + "num_samples": 9086, + "num_samples_sentence2": 9086, + "number_of_characters": 1677545 + }, + "ru-en": { + "min_sentence1_length": 40, + "average_sentence1_length": 101.6593003117423, + "max_sentence1_length": 186, + "min_sentence2_length": 45, + "average_sentence2_length": 92.88216141323173, + "max_sentence2_length": 159, + "num_samples": 14435, + "num_samples_sentence2": 14435, + "number_of_characters": 2808206 + }, + "zh-en": { + "min_sentence1_length": 16, + "average_sentence1_length": 28.429699842022117, + "max_sentence1_length": 40, + "min_sentence2_length": 48, + "average_sentence2_length": 95.3638757240653, + "max_sentence2_length": 159, + "num_samples": 1899, + "num_samples_sentence2": 1899, + "number_of_characters": 235084 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json b/mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json new file mode 100644 index 000000000..50704e012 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json @@ -0,0 +1,18231 @@ +{ + "train": { + "min_sentence1_length": 1, + "average_sentence1_length": 158.52821402221093, + "max_sentence1_length": 4949, + "min_sentence2_length": 1, + "average_sentence2_length": 158.52821402221093, + "max_sentence2_length": 4949, + "num_samples": 417452, + "num_samples_sentence2": 417452, + "number_of_characters": 132355840, + "hf_subset_descriptive_stats": { + "eng_Latn-aai_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 32, + "average_sentence2_length": 146.66796875, + "max_sentence2_length": 322, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66320 + }, + "aai_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 146.66796875, + "max_sentence1_length": 322, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66320 + }, + "eng_Latn-aak_Arab": { + "min_sentence1_length": 21, + "average_sentence1_length": 112.16015625, + "max_sentence1_length": 227, + "min_sentence2_length": 46, + "average_sentence2_length": 292.203125, + "max_sentence2_length": 809, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103517 + }, + "aak_Arab-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 292.203125, + "max_sentence1_length": 809, + "min_sentence2_length": 21, + "average_sentence2_length": 112.16015625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103517 + }, + "eng_Latn-aau_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.42578125, + "max_sentence1_length": 227, + "min_sentence2_length": 34, + "average_sentence2_length": 197.53515625, + "max_sentence2_length": 496, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78838 + }, + "aau_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 197.53515625, + "max_sentence1_length": 496, + "min_sentence2_length": 24, + "average_sentence2_length": 110.42578125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78838 + }, + "eng_Latn-aaz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.76953125, + "max_sentence1_length": 251, + "min_sentence2_length": 33, + "average_sentence2_length": 281.2265625, + "max_sentence2_length": 1407, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101375 + }, + "aaz_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 281.2265625, + "max_sentence1_length": 1407, + "min_sentence2_length": 24, + "average_sentence2_length": 114.76953125, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101375 + }, + "eng_Latn-abt_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 146.171875, + "max_sentence1_length": 341, + "min_sentence2_length": 29, + "average_sentence2_length": 273.06640625, + "max_sentence2_length": 758, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 107325 + }, + "abt_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 273.06640625, + "max_sentence1_length": 758, + "min_sentence2_length": 1, + "average_sentence2_length": 146.171875, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 107325 + }, + "eng_Latn-abx_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.1796875, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 185.4375, + "max_sentence2_length": 606, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76702 + }, + "abx_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 185.4375, + "max_sentence1_length": 606, + "min_sentence2_length": 24, + "average_sentence2_length": 114.1796875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76702 + }, + "eng_Latn-aby_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.16015625, + "max_sentence1_length": 228, + "min_sentence2_length": 42, + "average_sentence2_length": 282.90234375, + "max_sentence2_length": 931, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101648 + }, + "aby_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 282.90234375, + "max_sentence1_length": 931, + "min_sentence2_length": 24, + "average_sentence2_length": 114.16015625, + "max_sentence2_length": 228, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101648 + }, + "eng_Latn-acf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.02734375, + "max_sentence1_length": 827, + "min_sentence2_length": 28, + "average_sentence2_length": 146.59765625, + "max_sentence2_length": 441, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66720 + }, + "acf_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 146.59765625, + "max_sentence1_length": 441, + "min_sentence2_length": 24, + "average_sentence2_length": 114.02734375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66720 + }, + "eng_Latn-acr_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 145.234375, + "max_sentence1_length": 341, + "min_sentence2_length": 53, + "average_sentence2_length": 199.171875, + "max_sentence2_length": 474, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88168 + }, + "acr_Latn-eng_Latn": { + "min_sentence1_length": 53, + "average_sentence1_length": 199.171875, + "max_sentence1_length": 474, + "min_sentence2_length": 35, + "average_sentence2_length": 145.234375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88168 + }, + "eng_Latn-acu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.046875, + "max_sentence1_length": 238, + "min_sentence2_length": 37, + "average_sentence2_length": 249.4921875, + "max_sentence2_length": 641, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92554 + }, + "acu_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 249.4921875, + "max_sentence1_length": 641, + "min_sentence2_length": 24, + "average_sentence2_length": 112.046875, + "max_sentence2_length": 238, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92554 + }, + "eng_Latn-adz_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 108.69140625, + "max_sentence1_length": 248, + "min_sentence2_length": 45, + "average_sentence2_length": 146.26171875, + "max_sentence2_length": 456, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65268 + }, + "adz_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 146.26171875, + "max_sentence1_length": 456, + "min_sentence2_length": 23, + "average_sentence2_length": 108.69140625, + "max_sentence2_length": 248, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65268 + }, + "eng_Latn-aer_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.55859375, + "max_sentence1_length": 227, + "min_sentence2_length": 48, + "average_sentence2_length": 463.94921875, + "max_sentence2_length": 1597, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 147074 + }, + "aer_Latn-eng_Latn": { + "min_sentence1_length": 48, + "average_sentence1_length": 463.94921875, + "max_sentence1_length": 1597, + "min_sentence2_length": 24, + "average_sentence2_length": 110.55859375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 147074 + }, + "eng_Latn-aey_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.3203125, + "max_sentence1_length": 227, + "min_sentence2_length": 39, + "average_sentence2_length": 208.2109375, + "max_sentence2_length": 769, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81800 + }, + "aey_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 208.2109375, + "max_sentence1_length": 769, + "min_sentence2_length": 31, + "average_sentence2_length": 111.3203125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81800 + }, + "eng_Latn-agd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.18359375, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 180.50390625, + "max_sentence2_length": 442, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75440 + }, + "agd_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 180.50390625, + "max_sentence1_length": 442, + "min_sentence2_length": 24, + "average_sentence2_length": 114.18359375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75440 + }, + "eng_Latn-agg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.45703125, + "max_sentence1_length": 227, + "min_sentence2_length": 44, + "average_sentence2_length": 247.20703125, + "max_sentence2_length": 852, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92330 + }, + "agg_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 247.20703125, + "max_sentence1_length": 852, + "min_sentence2_length": 24, + "average_sentence2_length": 113.45703125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92330 + }, + "eng_Latn-agm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.86328125, + "max_sentence1_length": 827, + "min_sentence2_length": 60, + "average_sentence2_length": 359.12890625, + "max_sentence2_length": 1291, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 121086 + }, + "agm_Latn-eng_Latn": { + "min_sentence1_length": 60, + "average_sentence1_length": 359.12890625, + "max_sentence1_length": 1291, + "min_sentence2_length": 24, + "average_sentence2_length": 113.86328125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 121086 + }, + "eng_Latn-agn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.05078125, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 174.1015625, + "max_sentence2_length": 455, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72743 + }, + "agn_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 174.1015625, + "max_sentence1_length": 455, + "min_sentence2_length": 24, + "average_sentence2_length": 110.05078125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72743 + }, + "eng_Latn-agr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.0078125, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 164.63671875, + "max_sentence2_length": 402, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71589 + }, + "agr_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 164.63671875, + "max_sentence1_length": 402, + "min_sentence2_length": 24, + "average_sentence2_length": 115.0078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71589 + }, + "eng_Latn-agt_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 112.78125, + "max_sentence1_length": 271, + "min_sentence2_length": 37, + "average_sentence2_length": 255.2734375, + "max_sentence2_length": 1289, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94222 + }, + "agt_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 255.2734375, + "max_sentence1_length": 1289, + "min_sentence2_length": 31, + "average_sentence2_length": 112.78125, + "max_sentence2_length": 271, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94222 + }, + "eng_Latn-agu_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.25, + "max_sentence1_length": 827, + "min_sentence2_length": 42, + "average_sentence2_length": 186.59375, + "max_sentence2_length": 486, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77016 + }, + "agu_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 186.59375, + "max_sentence1_length": 486, + "min_sentence2_length": 31, + "average_sentence2_length": 114.25, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77016 + }, + "eng_Latn-aia_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.66015625, + "max_sentence1_length": 827, + "min_sentence2_length": 34, + "average_sentence2_length": 211.5546875, + "max_sentence2_length": 753, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83511 + }, + "aia_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 211.5546875, + "max_sentence1_length": 753, + "min_sentence2_length": 24, + "average_sentence2_length": 114.66015625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83511 + }, + "eng_Latn-aii_Syrc": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 26, + "average_sentence2_length": 118.921875, + "max_sentence2_length": 264, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59217 + }, + "aii_Syrc-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 118.921875, + "max_sentence1_length": 264, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59217 + }, + "eng_Latn-aka_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 21, + "average_sentence2_length": 108.4921875, + "max_sentence2_length": 240, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56547 + }, + "aka_Latn-eng_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 108.4921875, + "max_sentence1_length": 240, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56547 + }, + "eng_Latn-ake_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.05078125, + "max_sentence1_length": 827, + "min_sentence2_length": 33, + "average_sentence2_length": 144.27734375, + "max_sentence2_length": 348, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65876 + }, + "ake_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 144.27734375, + "max_sentence1_length": 348, + "min_sentence2_length": 24, + "average_sentence2_length": 113.05078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65876 + }, + "eng_Latn-alp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.28515625, + "max_sentence1_length": 227, + "min_sentence2_length": 29, + "average_sentence2_length": 193.80078125, + "max_sentence2_length": 605, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78102 + }, + "alp_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 193.80078125, + "max_sentence1_length": 605, + "min_sentence2_length": 24, + "average_sentence2_length": 111.28515625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78102 + }, + "eng_Latn-alq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.6328125, + "max_sentence1_length": 827, + "min_sentence2_length": 37, + "average_sentence2_length": 210.26953125, + "max_sentence2_length": 629, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83431 + }, + "alq_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 210.26953125, + "max_sentence1_length": 629, + "min_sentence2_length": 24, + "average_sentence2_length": 115.6328125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83431 + }, + "eng_Latn-als_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 114.828125, + "max_sentence2_length": 265, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58436 + }, + "als_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 114.828125, + "max_sentence1_length": 265, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58436 + }, + "eng_Latn-aly_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 116.77734375, + "max_sentence1_length": 827, + "min_sentence2_length": 57, + "average_sentence2_length": 306.80859375, + "max_sentence2_length": 2026, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 108438 + }, + "aly_Latn-eng_Latn": { + "min_sentence1_length": 57, + "average_sentence1_length": 306.80859375, + "max_sentence1_length": 2026, + "min_sentence2_length": 24, + "average_sentence2_length": 116.77734375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 108438 + }, + "eng_Latn-ame_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.58984375, + "max_sentence1_length": 227, + "min_sentence2_length": 45, + "average_sentence2_length": 358.25, + "max_sentence2_length": 1180, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 119767 + }, + "ame_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 358.25, + "max_sentence1_length": 1180, + "min_sentence2_length": 24, + "average_sentence2_length": 109.58984375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 119767 + }, + "eng_Latn-amf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.21484375, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 147.92578125, + "max_sentence2_length": 402, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66084 + }, + "amf_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 147.92578125, + "max_sentence1_length": 402, + "min_sentence2_length": 24, + "average_sentence2_length": 110.21484375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66084 + }, + "eng_Latn-amk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.08984375, + "max_sentence1_length": 827, + "min_sentence2_length": 48, + "average_sentence2_length": 207.99609375, + "max_sentence2_length": 556, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82454 + }, + "amk_Latn-eng_Latn": { + "min_sentence1_length": 48, + "average_sentence1_length": 207.99609375, + "max_sentence1_length": 556, + "min_sentence2_length": 24, + "average_sentence2_length": 114.08984375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82454 + }, + "eng_Latn-amm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.375, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 215.09375, + "max_sentence2_length": 714, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83832 + }, + "amm_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 215.09375, + "max_sentence1_length": 714, + "min_sentence2_length": 24, + "average_sentence2_length": 112.375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83832 + }, + "eng_Latn-amn_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 29, + "average_sentence2_length": 180.0234375, + "max_sentence2_length": 484, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83291 + }, + "amn_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 180.0234375, + "max_sentence1_length": 484, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83291 + }, + "eng_Latn-amo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.08203125, + "max_sentence1_length": 827, + "min_sentence2_length": 7, + "average_sentence2_length": 111.30078125, + "max_sentence2_length": 294, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57698 + }, + "amo_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 111.30078125, + "max_sentence1_length": 294, + "min_sentence2_length": 24, + "average_sentence2_length": 114.08203125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57698 + }, + "eng_Latn-amp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.0625, + "max_sentence1_length": 227, + "min_sentence2_length": 42, + "average_sentence2_length": 264.0, + "max_sentence2_length": 1162, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96272 + }, + "amp_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 264.0, + "max_sentence1_length": 1162, + "min_sentence2_length": 24, + "average_sentence2_length": 112.0625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96272 + }, + "eng_Latn-amr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.26953125, + "max_sentence1_length": 227, + "min_sentence2_length": 45, + "average_sentence2_length": 272.71875, + "max_sentence2_length": 805, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98557 + }, + "amr_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 272.71875, + "max_sentence1_length": 805, + "min_sentence2_length": 24, + "average_sentence2_length": 112.26953125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98557 + }, + "eng_Latn-amu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.24609375, + "max_sentence1_length": 227, + "min_sentence2_length": 40, + "average_sentence2_length": 189.109375, + "max_sentence2_length": 505, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76635 + }, + "amu_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 189.109375, + "max_sentence1_length": 505, + "min_sentence2_length": 24, + "average_sentence2_length": 110.24609375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76635 + }, + "eng_Latn-amx_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.9609375, + "max_sentence1_length": 243, + "min_sentence2_length": 37, + "average_sentence2_length": 265.48046875, + "max_sentence2_length": 925, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96369 + }, + "amx_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 265.48046875, + "max_sentence1_length": 925, + "min_sentence2_length": 24, + "average_sentence2_length": 110.9609375, + "max_sentence2_length": 243, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96369 + }, + "eng_Latn-anh_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 110.45045045045045, + "max_sentence1_length": 257, + "min_sentence2_length": 58, + "average_sentence2_length": 218.67567567567568, + "max_sentence2_length": 1063, + "num_samples": 111, + "num_samples_sentence2": 111, + "number_of_characters": 36533 + }, + "anh_Latn-eng_Latn": { + "min_sentence1_length": 58, + "average_sentence1_length": 218.67567567567568, + "max_sentence1_length": 1063, + "min_sentence2_length": 50, + "average_sentence2_length": 110.45045045045045, + "max_sentence2_length": 257, + "num_samples": 111, + "num_samples_sentence2": 111, + "number_of_characters": 36533 + }, + "eng_Latn-anv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.015625, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 155.27734375, + "max_sentence2_length": 441, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68939 + }, + "anv_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 155.27734375, + "max_sentence1_length": 441, + "min_sentence2_length": 24, + "average_sentence2_length": 114.015625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68939 + }, + "eng_Latn-aoi_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 103.12890625, + "max_sentence1_length": 245, + "min_sentence2_length": 84, + "average_sentence2_length": 442.04296875, + "max_sentence2_length": 1797, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 139564 + }, + "aoi_Latn-eng_Latn": { + "min_sentence1_length": 84, + "average_sentence1_length": 442.04296875, + "max_sentence1_length": 1797, + "min_sentence2_length": 23, + "average_sentence2_length": 103.12890625, + "max_sentence2_length": 245, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 139564 + }, + "eng_Latn-aoj_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 44, + "average_sentence2_length": 250.44921875, + "max_sentence2_length": 607, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101320 + }, + "aoj_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 250.44921875, + "max_sentence1_length": 607, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101320 + }, + "eng_Latn-aom_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.6953125, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 229.5234375, + "max_sentence2_length": 756, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88120 + }, + "aom_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 229.5234375, + "max_sentence1_length": 756, + "min_sentence2_length": 24, + "average_sentence2_length": 114.6953125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88120 + }, + "eng_Latn-aon_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 110.48046875, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 311.30859375, + "max_sentence2_length": 1125, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 107978 + }, + "aon_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 311.30859375, + "max_sentence1_length": 1125, + "min_sentence2_length": 21, + "average_sentence2_length": 110.48046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 107978 + }, + "eng_Latn-apb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.05078125, + "max_sentence1_length": 246, + "min_sentence2_length": 45, + "average_sentence2_length": 249.72265625, + "max_sentence2_length": 1260, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92614 + }, + "apb_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 249.72265625, + "max_sentence1_length": 1260, + "min_sentence2_length": 24, + "average_sentence2_length": 112.05078125, + "max_sentence2_length": 246, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92614 + }, + "eng_Latn-ape_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 146.4375, + "max_sentence1_length": 341, + "min_sentence2_length": 69, + "average_sentence2_length": 310.66796875, + "max_sentence2_length": 772, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 117019 + }, + "ape_Latn-eng_Latn": { + "min_sentence1_length": 69, + "average_sentence1_length": 310.66796875, + "max_sentence1_length": 772, + "min_sentence2_length": 35, + "average_sentence2_length": 146.4375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 117019 + }, + "eng_Latn-apn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.38671875, + "max_sentence1_length": 227, + "min_sentence2_length": 61, + "average_sentence2_length": 448.11328125, + "max_sentence2_length": 1608, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 144000 + }, + "apn_Latn-eng_Latn": { + "min_sentence1_length": 61, + "average_sentence1_length": 448.11328125, + "max_sentence1_length": 1608, + "min_sentence2_length": 24, + "average_sentence2_length": 114.38671875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 144000 + }, + "eng_Latn-apr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.44140625, + "max_sentence1_length": 227, + "min_sentence2_length": 54, + "average_sentence2_length": 269.625, + "max_sentence2_length": 956, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97297 + }, + "apr_Latn-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 269.625, + "max_sentence1_length": 956, + "min_sentence2_length": 24, + "average_sentence2_length": 110.44140625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97297 + }, + "eng_Latn-apu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 43, + "average_sentence2_length": 198.68359375, + "max_sentence2_length": 719, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80398 + }, + "apu_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 198.68359375, + "max_sentence1_length": 719, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80398 + }, + "eng_Latn-apw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 150.6875, + "max_sentence2_length": 321, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67616 + }, + "apw_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 150.6875, + "max_sentence1_length": 321, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67616 + }, + "eng_Latn-apz_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.77734375, + "max_sentence1_length": 227, + "min_sentence2_length": 47, + "average_sentence2_length": 314.078125, + "max_sentence2_length": 1291, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 109019 + }, + "apz_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 314.078125, + "max_sentence1_length": 1291, + "min_sentence2_length": 31, + "average_sentence2_length": 111.77734375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 109019 + }, + "eng_Latn-arb_Arab": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 54, + "average_sentence2_length": 162.15625, + "max_sentence2_length": 381, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79287 + }, + "arb_Arab-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 162.15625, + "max_sentence1_length": 381, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79287 + }, + "eng_Latn-are_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.390625, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 182.50390625, + "max_sentence2_length": 559, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76005 + }, + "are_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 182.50390625, + "max_sentence1_length": 559, + "min_sentence2_length": 24, + "average_sentence2_length": 114.390625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76005 + }, + "eng_Latn-arl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.2421875, + "max_sentence1_length": 227, + "min_sentence2_length": 55, + "average_sentence2_length": 294.5234375, + "max_sentence2_length": 1347, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103620 + }, + "arl_Latn-eng_Latn": { + "min_sentence1_length": 55, + "average_sentence1_length": 294.5234375, + "max_sentence1_length": 1347, + "min_sentence2_length": 24, + "average_sentence2_length": 110.2421875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103620 + }, + "eng_Latn-arn_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 113.11328125, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 170.55859375, + "max_sentence2_length": 485, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72620 + }, + "arn_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 170.55859375, + "max_sentence1_length": 485, + "min_sentence2_length": 31, + "average_sentence2_length": 113.11328125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72620 + }, + "eng_Latn-arp_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 107.47311827956989, + "max_sentence1_length": 245, + "min_sentence2_length": 34, + "average_sentence2_length": 119.58064516129032, + "max_sentence2_length": 272, + "num_samples": 93, + "num_samples_sentence2": 93, + "number_of_characters": 21116 + }, + "arp_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 119.58064516129032, + "max_sentence1_length": 272, + "min_sentence2_length": 37, + "average_sentence2_length": 107.47311827956989, + "max_sentence2_length": 245, + "num_samples": 93, + "num_samples_sentence2": 93, + "number_of_characters": 21116 + }, + "eng_Latn-asm_Beng": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 22, + "average_sentence2_length": 122.984375, + "max_sentence2_length": 307, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60257 + }, + "asm_Beng-eng_Latn": { + "min_sentence1_length": 22, + "average_sentence1_length": 122.984375, + "max_sentence1_length": 307, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60257 + }, + "eng_Latn-aso_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.3828125, + "max_sentence1_length": 227, + "min_sentence2_length": 41, + "average_sentence2_length": 309.41015625, + "max_sentence2_length": 1257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106955 + }, + "aso_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 309.41015625, + "max_sentence1_length": 1257, + "min_sentence2_length": 24, + "average_sentence2_length": 108.3828125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106955 + }, + "eng_Latn-ata_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.84375, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 198.95703125, + "max_sentence2_length": 571, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79821 + }, + "ata_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 198.95703125, + "max_sentence1_length": 571, + "min_sentence2_length": 24, + "average_sentence2_length": 112.84375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79821 + }, + "eng_Latn-atb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.2265625, + "max_sentence1_length": 227, + "min_sentence2_length": 30, + "average_sentence2_length": 170.921875, + "max_sentence2_length": 439, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71974 + }, + "atb_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 170.921875, + "max_sentence1_length": 439, + "min_sentence2_length": 24, + "average_sentence2_length": 110.2265625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71974 + }, + "eng_Latn-atd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.36328125, + "max_sentence1_length": 227, + "min_sentence2_length": 44, + "average_sentence2_length": 235.46875, + "max_sentence2_length": 815, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88789 + }, + "atd_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 235.46875, + "max_sentence1_length": 815, + "min_sentence2_length": 24, + "average_sentence2_length": 111.36328125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88789 + }, + "eng_Latn-atg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.75390625, + "max_sentence1_length": 227, + "min_sentence2_length": 24, + "average_sentence2_length": 135.8359375, + "max_sentence2_length": 372, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62871 + }, + "atg_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 135.8359375, + "max_sentence1_length": 372, + "min_sentence2_length": 24, + "average_sentence2_length": 109.75390625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62871 + }, + "eng_Latn-att_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 38, + "average_sentence2_length": 217.02734375, + "max_sentence2_length": 622, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84332 + }, + "att_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 217.02734375, + "max_sentence1_length": 622, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84332 + }, + "eng_Latn-auc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.109375, + "max_sentence1_length": 827, + "min_sentence2_length": 58, + "average_sentence2_length": 262.625, + "max_sentence2_length": 912, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96444 + }, + "auc_Latn-eng_Latn": { + "min_sentence1_length": 58, + "average_sentence1_length": 262.625, + "max_sentence1_length": 912, + "min_sentence2_length": 24, + "average_sentence2_length": 114.109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96444 + }, + "eng_Latn-aui_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 115.68359375, + "max_sentence1_length": 269, + "min_sentence2_length": 56, + "average_sentence2_length": 176.31640625, + "max_sentence2_length": 760, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74752 + }, + "aui_Latn-eng_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 176.31640625, + "max_sentence1_length": 760, + "min_sentence2_length": 38, + "average_sentence2_length": 115.68359375, + "max_sentence2_length": 269, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74752 + }, + "eng_Latn-auy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.5234375, + "max_sentence1_length": 227, + "min_sentence2_length": 42, + "average_sentence2_length": 205.5, + "max_sentence2_length": 632, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80390 + }, + "auy_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 205.5, + "max_sentence1_length": 632, + "min_sentence2_length": 24, + "average_sentence2_length": 108.5234375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80390 + }, + "eng_Latn-avt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.71484375, + "max_sentence1_length": 227, + "min_sentence2_length": 40, + "average_sentence2_length": 200.4765625, + "max_sentence2_length": 657, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79665 + }, + "avt_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 200.4765625, + "max_sentence1_length": 657, + "min_sentence2_length": 24, + "average_sentence2_length": 110.71484375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79665 + }, + "eng_Latn-awb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.54296875, + "max_sentence1_length": 227, + "min_sentence2_length": 30, + "average_sentence2_length": 178.2421875, + "max_sentence2_length": 492, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73673 + }, + "awb_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 178.2421875, + "max_sentence1_length": 492, + "min_sentence2_length": 24, + "average_sentence2_length": 109.54296875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73673 + }, + "eng_Latn-awk_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 106.98989898989899, + "max_sentence1_length": 245, + "min_sentence2_length": 49, + "average_sentence2_length": 126.5959595959596, + "max_sentence2_length": 250, + "num_samples": 99, + "num_samples_sentence2": 99, + "number_of_characters": 23125 + }, + "awk_Latn-eng_Latn": { + "min_sentence1_length": 49, + "average_sentence1_length": 126.5959595959596, + "max_sentence1_length": 250, + "min_sentence2_length": 37, + "average_sentence2_length": 106.98989898989899, + "max_sentence2_length": 245, + "num_samples": 99, + "num_samples_sentence2": 99, + "number_of_characters": 23125 + }, + "eng_Latn-awx_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 120.97265625, + "max_sentence1_length": 251, + "min_sentence2_length": 17, + "average_sentence2_length": 134.5234375, + "max_sentence2_length": 439, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65407 + }, + "awx_Latn-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 134.5234375, + "max_sentence1_length": 439, + "min_sentence2_length": 23, + "average_sentence2_length": 120.97265625, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65407 + }, + "eng_Latn-azb_Arab": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.0859375, + "max_sentence1_length": 227, + "min_sentence2_length": 17, + "average_sentence2_length": 108.828125, + "max_sentence2_length": 333, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56042 + }, + "azb_Arab-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 108.828125, + "max_sentence1_length": 333, + "min_sentence2_length": 24, + "average_sentence2_length": 110.0859375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56042 + }, + "eng_Latn-azg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.5390625, + "max_sentence1_length": 827, + "min_sentence2_length": 52, + "average_sentence2_length": 236.22265625, + "max_sentence2_length": 641, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89539 + }, + "azg_Latn-eng_Latn": { + "min_sentence1_length": 52, + "average_sentence1_length": 236.22265625, + "max_sentence1_length": 641, + "min_sentence2_length": 24, + "average_sentence2_length": 113.5390625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89539 + }, + "eng_Latn-azz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.234375, + "max_sentence1_length": 227, + "min_sentence2_length": 74, + "average_sentence2_length": 282.5859375, + "max_sentence2_length": 618, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100818 + }, + "azz_Latn-eng_Latn": { + "min_sentence1_length": 74, + "average_sentence1_length": 282.5859375, + "max_sentence1_length": 618, + "min_sentence2_length": 24, + "average_sentence2_length": 111.234375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100818 + }, + "eng_Latn-bao_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.05078125, + "max_sentence1_length": 227, + "min_sentence2_length": 41, + "average_sentence2_length": 177.0390625, + "max_sentence2_length": 444, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73239 + }, + "bao_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 177.0390625, + "max_sentence1_length": 444, + "min_sentence2_length": 24, + "average_sentence2_length": 109.05078125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73239 + }, + "eng_Latn-bba_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.53125, + "max_sentence1_length": 227, + "min_sentence2_length": 22, + "average_sentence2_length": 122.30078125, + "max_sentence2_length": 582, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59605 + }, + "bba_Latn-eng_Latn": { + "min_sentence1_length": 22, + "average_sentence1_length": 122.30078125, + "max_sentence1_length": 582, + "min_sentence2_length": 24, + "average_sentence2_length": 110.53125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59605 + }, + "eng_Latn-bbb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.83984375, + "max_sentence1_length": 232, + "min_sentence2_length": 45, + "average_sentence2_length": 251.58984375, + "max_sentence2_length": 671, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92270 + }, + "bbb_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 251.58984375, + "max_sentence1_length": 671, + "min_sentence2_length": 24, + "average_sentence2_length": 108.83984375, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92270 + }, + "eng_Latn-bbr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.89453125, + "max_sentence1_length": 216, + "min_sentence2_length": 39, + "average_sentence2_length": 220.9140625, + "max_sentence2_length": 1295, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85199 + }, + "bbr_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 220.9140625, + "max_sentence1_length": 1295, + "min_sentence2_length": 24, + "average_sentence2_length": 111.89453125, + "max_sentence2_length": 216, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85199 + }, + "eng_Latn-bch_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.9765625, + "max_sentence1_length": 227, + "min_sentence2_length": 38, + "average_sentence2_length": 194.5625, + "max_sentence2_length": 699, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78218 + }, + "bch_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 194.5625, + "max_sentence1_length": 699, + "min_sentence2_length": 24, + "average_sentence2_length": 110.9765625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78218 + }, + "eng_Latn-bco_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 111.3359375, + "max_sentence1_length": 243, + "min_sentence2_length": 34, + "average_sentence2_length": 230.51953125, + "max_sentence2_length": 688, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87515 + }, + "bco_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 230.51953125, + "max_sentence1_length": 688, + "min_sentence2_length": 21, + "average_sentence2_length": 111.3359375, + "max_sentence2_length": 243, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87515 + }, + "eng_Latn-bdd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.73828125, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 164.11328125, + "max_sentence2_length": 432, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70106 + }, + "bdd_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 164.11328125, + "max_sentence1_length": 432, + "min_sentence2_length": 24, + "average_sentence2_length": 109.73828125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70106 + }, + "eng_Latn-bea_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 114.08666666666667, + "max_sentence1_length": 257, + "min_sentence2_length": 45, + "average_sentence2_length": 124.71333333333334, + "max_sentence2_length": 289, + "num_samples": 150, + "num_samples_sentence2": 150, + "number_of_characters": 35820 + }, + "bea_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 124.71333333333334, + "max_sentence1_length": 289, + "min_sentence2_length": 45, + "average_sentence2_length": 114.08666666666667, + "max_sentence2_length": 257, + "num_samples": 150, + "num_samples_sentence2": 150, + "number_of_characters": 35820 + }, + "eng_Latn-bef_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 110.01171875, + "max_sentence1_length": 227, + "min_sentence2_length": 57, + "average_sentence2_length": 243.50390625, + "max_sentence2_length": 693, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90500 + }, + "bef_Latn-eng_Latn": { + "min_sentence1_length": 57, + "average_sentence1_length": 243.50390625, + "max_sentence1_length": 693, + "min_sentence2_length": 32, + "average_sentence2_length": 110.01171875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90500 + }, + "eng_Latn-bel_Cyrl": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 40, + "average_sentence2_length": 127.609375, + "max_sentence2_length": 264, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70443 + }, + "bel_Cyrl-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 127.609375, + "max_sentence1_length": 264, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70443 + }, + "eng_Latn-ben_Beng": { + "min_sentence1_length": 21, + "average_sentence1_length": 114.9375, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 111.85546875, + "max_sentence2_length": 238, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58059 + }, + "ben_Beng-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 111.85546875, + "max_sentence1_length": 238, + "min_sentence2_length": 21, + "average_sentence2_length": 114.9375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58059 + }, + "eng_Latn-beo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.91796875, + "max_sentence1_length": 827, + "min_sentence2_length": 1, + "average_sentence2_length": 176.2578125, + "max_sentence2_length": 515, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74029 + }, + "beo_Latn-eng_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 176.2578125, + "max_sentence1_length": 515, + "min_sentence2_length": 24, + "average_sentence2_length": 112.91796875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74029 + }, + "eng_Latn-beu_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 114.171875, + "max_sentence1_length": 257, + "min_sentence2_length": 44, + "average_sentence2_length": 273.8359375, + "max_sentence2_length": 1204, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99330 + }, + "beu_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 273.8359375, + "max_sentence1_length": 1204, + "min_sentence2_length": 38, + "average_sentence2_length": 114.171875, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99330 + }, + "eng_Latn-bgs_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.97265625, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 167.515625, + "max_sentence2_length": 596, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72317 + }, + "bgs_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 167.515625, + "max_sentence1_length": 596, + "min_sentence2_length": 24, + "average_sentence2_length": 114.97265625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72317 + }, + "eng_Latn-bgt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.68359375, + "max_sentence1_length": 376, + "min_sentence2_length": 39, + "average_sentence2_length": 230.9296875, + "max_sentence2_length": 876, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88221 + }, + "bgt_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 230.9296875, + "max_sentence1_length": 876, + "min_sentence2_length": 24, + "average_sentence2_length": 113.68359375, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88221 + }, + "eng_Latn-bhg_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 111.7578125, + "max_sentence1_length": 243, + "min_sentence2_length": 46, + "average_sentence2_length": 186.07421875, + "max_sentence2_length": 589, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76245 + }, + "bhg_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 186.07421875, + "max_sentence1_length": 589, + "min_sentence2_length": 38, + "average_sentence2_length": 111.7578125, + "max_sentence2_length": 243, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76245 + }, + "eng_Latn-bhl_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.8671875, + "max_sentence1_length": 227, + "min_sentence2_length": 60, + "average_sentence2_length": 268.83984375, + "max_sentence2_length": 1263, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97461 + }, + "bhl_Latn-eng_Latn": { + "min_sentence1_length": 60, + "average_sentence1_length": 268.83984375, + "max_sentence1_length": 1263, + "min_sentence2_length": 31, + "average_sentence2_length": 111.8671875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97461 + }, + "eng_Latn-big_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.85546875, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 245.296875, + "max_sentence2_length": 1052, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91431 + }, + "big_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 245.296875, + "max_sentence1_length": 1052, + "min_sentence2_length": 31, + "average_sentence2_length": 111.85546875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91431 + }, + "eng_Latn-bjk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.4609375, + "max_sentence1_length": 227, + "min_sentence2_length": 33, + "average_sentence2_length": 196.8828125, + "max_sentence2_length": 654, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78424 + }, + "bjk_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 196.8828125, + "max_sentence1_length": 654, + "min_sentence2_length": 24, + "average_sentence2_length": 109.4609375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78424 + }, + "eng_Latn-bjp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.14453125, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 151.33203125, + "max_sentence2_length": 412, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68218 + }, + "bjp_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 151.33203125, + "max_sentence1_length": 412, + "min_sentence2_length": 24, + "average_sentence2_length": 115.14453125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68218 + }, + "eng_Latn-bjr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.734375, + "max_sentence1_length": 239, + "min_sentence2_length": 39, + "average_sentence2_length": 306.8671875, + "max_sentence2_length": 1198, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 107418 + }, + "bjr_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 306.8671875, + "max_sentence1_length": 1198, + "min_sentence2_length": 24, + "average_sentence2_length": 112.734375, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 107418 + }, + "eng_Latn-bjv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 34, + "average_sentence2_length": 141.203125, + "max_sentence2_length": 331, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65683 + }, + "bjv_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 141.203125, + "max_sentence1_length": 331, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65683 + }, + "eng_Latn-bjz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.48828125, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 269.55078125, + "max_sentence2_length": 907, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97546 + }, + "bjz_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 269.55078125, + "max_sentence1_length": 907, + "min_sentence2_length": 24, + "average_sentence2_length": 111.48828125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97546 + }, + "eng_Latn-bkd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.11328125, + "max_sentence1_length": 227, + "min_sentence2_length": 32, + "average_sentence2_length": 154.0, + "max_sentence2_length": 436, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67869 + }, + "bkd_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 154.0, + "max_sentence1_length": 436, + "min_sentence2_length": 24, + "average_sentence2_length": 111.11328125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67869 + }, + "eng_Latn-bki_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.66015625, + "max_sentence1_length": 827, + "min_sentence2_length": 28, + "average_sentence2_length": 266.97265625, + "max_sentence2_length": 1015, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97698 + }, + "bki_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 266.97265625, + "max_sentence1_length": 1015, + "min_sentence2_length": 31, + "average_sentence2_length": 114.66015625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97698 + }, + "eng_Latn-bkq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.3125, + "max_sentence1_length": 216, + "min_sentence2_length": 26, + "average_sentence2_length": 237.59375, + "max_sentence2_length": 901, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89576 + }, + "bkq_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 237.59375, + "max_sentence1_length": 901, + "min_sentence2_length": 24, + "average_sentence2_length": 112.3125, + "max_sentence2_length": 216, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89576 + }, + "eng_Latn-bkx_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 112.90625, + "max_sentence1_length": 257, + "min_sentence2_length": 54, + "average_sentence2_length": 216.9296875, + "max_sentence2_length": 789, + "num_samples": 128, + "num_samples_sentence2": 128, + "number_of_characters": 42219 + }, + "bkx_Latn-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 216.9296875, + "max_sentence1_length": 789, + "min_sentence2_length": 45, + "average_sentence2_length": 112.90625, + "max_sentence2_length": 257, + "num_samples": 128, + "num_samples_sentence2": 128, + "number_of_characters": 42219 + }, + "eng_Latn-blw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.12890625, + "max_sentence1_length": 227, + "min_sentence2_length": 39, + "average_sentence2_length": 209.64453125, + "max_sentence2_length": 667, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81606 + }, + "blw_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 209.64453125, + "max_sentence1_length": 667, + "min_sentence2_length": 24, + "average_sentence2_length": 109.12890625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81606 + }, + "eng_Latn-blz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.46875, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 162.29296875, + "max_sentence2_length": 397, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71107 + }, + "blz_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 162.29296875, + "max_sentence1_length": 397, + "min_sentence2_length": 24, + "average_sentence2_length": 115.46875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71107 + }, + "eng_Latn-bmh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.51171875, + "max_sentence1_length": 273, + "min_sentence2_length": 40, + "average_sentence2_length": 225.7734375, + "max_sentence2_length": 1302, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86345 + }, + "bmh_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 225.7734375, + "max_sentence1_length": 1302, + "min_sentence2_length": 24, + "average_sentence2_length": 111.51171875, + "max_sentence2_length": 273, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86345 + }, + "eng_Latn-bmk_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 111.0, + "max_sentence1_length": 257, + "min_sentence2_length": 44, + "average_sentence2_length": 157.64566929133858, + "max_sentence2_length": 722, + "num_samples": 127, + "num_samples_sentence2": 127, + "number_of_characters": 34118 + }, + "bmk_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 157.64566929133858, + "max_sentence1_length": 722, + "min_sentence2_length": 39, + "average_sentence2_length": 111.0, + "max_sentence2_length": 257, + "num_samples": 127, + "num_samples_sentence2": 127, + "number_of_characters": 34118 + }, + "eng_Latn-bmr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.98828125, + "max_sentence1_length": 376, + "min_sentence2_length": 45, + "average_sentence2_length": 152.140625, + "max_sentence2_length": 348, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67617 + }, + "bmr_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 152.140625, + "max_sentence1_length": 348, + "min_sentence2_length": 24, + "average_sentence2_length": 111.98828125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67617 + }, + "eng_Latn-bmu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.6640625, + "max_sentence1_length": 376, + "min_sentence2_length": 42, + "average_sentence2_length": 217.7734375, + "max_sentence2_length": 623, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84080 + }, + "bmu_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 217.7734375, + "max_sentence1_length": 623, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6640625, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84080 + }, + "eng_Latn-bnp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.5703125, + "max_sentence1_length": 227, + "min_sentence2_length": 35, + "average_sentence2_length": 164.7890625, + "max_sentence2_length": 753, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70748 + }, + "bnp_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 164.7890625, + "max_sentence1_length": 753, + "min_sentence2_length": 24, + "average_sentence2_length": 111.5703125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70748 + }, + "eng_Latn-boa_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 115.5859375, + "max_sentence1_length": 273, + "min_sentence2_length": 35, + "average_sentence2_length": 199.6328125, + "max_sentence2_length": 612, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80696 + }, + "boa_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 199.6328125, + "max_sentence1_length": 612, + "min_sentence2_length": 21, + "average_sentence2_length": 115.5859375, + "max_sentence2_length": 273, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80696 + }, + "eng_Latn-boj_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 43, + "average_sentence2_length": 218.3359375, + "max_sentence2_length": 614, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93099 + }, + "boj_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 218.3359375, + "max_sentence1_length": 614, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93099 + }, + "eng_Latn-bon_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 110.4765625, + "max_sentence1_length": 227, + "min_sentence2_length": 38, + "average_sentence2_length": 209.63671875, + "max_sentence2_length": 808, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81949 + }, + "bon_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 209.63671875, + "max_sentence1_length": 808, + "min_sentence2_length": 31, + "average_sentence2_length": 110.4765625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81949 + }, + "eng_Latn-box_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.04296875, + "max_sentence1_length": 827, + "min_sentence2_length": 37, + "average_sentence2_length": 151.21484375, + "max_sentence2_length": 408, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68162 + }, + "box_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 151.21484375, + "max_sentence1_length": 408, + "min_sentence2_length": 24, + "average_sentence2_length": 115.04296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68162 + }, + "eng_Latn-bpr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.71484375, + "max_sentence1_length": 376, + "min_sentence2_length": 28, + "average_sentence2_length": 124.25, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60407 + }, + "bpr_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 124.25, + "max_sentence1_length": 376, + "min_sentence2_length": 24, + "average_sentence2_length": 111.71484375, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60407 + }, + "eng_Latn-bps_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.96875, + "max_sentence1_length": 227, + "min_sentence2_length": 30, + "average_sentence2_length": 147.59765625, + "max_sentence2_length": 536, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66193 + }, + "bps_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 147.59765625, + "max_sentence1_length": 536, + "min_sentence2_length": 24, + "average_sentence2_length": 110.96875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66193 + }, + "eng_Latn-bqc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.90625, + "max_sentence1_length": 227, + "min_sentence2_length": 21, + "average_sentence2_length": 84.66015625, + "max_sentence2_length": 215, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 49809 + }, + "bqc_Latn-eng_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 84.66015625, + "max_sentence1_length": 215, + "min_sentence2_length": 24, + "average_sentence2_length": 109.90625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 49809 + }, + "eng_Latn-bqp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.90625, + "max_sentence1_length": 227, + "min_sentence2_length": 22, + "average_sentence2_length": 98.44140625, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 53337 + }, + "bqp_Latn-eng_Latn": { + "min_sentence1_length": 22, + "average_sentence1_length": 98.44140625, + "max_sentence1_length": 251, + "min_sentence2_length": 24, + "average_sentence2_length": 109.90625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 53337 + }, + "eng_Latn-bre_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 107.06640625, + "max_sentence1_length": 245, + "min_sentence2_length": 32, + "average_sentence2_length": 108.109375, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 55085 + }, + "bre_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 108.109375, + "max_sentence1_length": 251, + "min_sentence2_length": 40, + "average_sentence2_length": 107.06640625, + "max_sentence2_length": 245, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 55085 + }, + "eng_Latn-bsj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.875, + "max_sentence1_length": 827, + "min_sentence2_length": 24, + "average_sentence2_length": 110.1328125, + "max_sentence2_length": 401, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57602 + }, + "bsj_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.1328125, + "max_sentence1_length": 401, + "min_sentence2_length": 24, + "average_sentence2_length": 114.875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57602 + }, + "eng_Latn-bsn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.49609375, + "max_sentence1_length": 230, + "min_sentence2_length": 44, + "average_sentence2_length": 284.43359375, + "max_sentence2_length": 974, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101102 + }, + "bsn_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 284.43359375, + "max_sentence1_length": 974, + "min_sentence2_length": 24, + "average_sentence2_length": 110.49609375, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101102 + }, + "eng_Latn-bsp_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 113.30859375, + "max_sentence1_length": 245, + "min_sentence2_length": 27, + "average_sentence2_length": 113.1953125, + "max_sentence2_length": 276, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57985 + }, + "bsp_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 113.1953125, + "max_sentence1_length": 276, + "min_sentence2_length": 37, + "average_sentence2_length": 113.30859375, + "max_sentence2_length": 245, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57985 + }, + "eng_Latn-bss_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 37, + "average_sentence2_length": 154.640625, + "max_sentence2_length": 497, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68361 + }, + "bss_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 154.640625, + "max_sentence1_length": 497, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68361 + }, + "eng_Latn-buk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.93359375, + "max_sentence1_length": 227, + "min_sentence2_length": 30, + "average_sentence2_length": 194.6328125, + "max_sentence2_length": 586, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77969 + }, + "buk_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 194.6328125, + "max_sentence1_length": 586, + "min_sentence2_length": 24, + "average_sentence2_length": 109.93359375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77969 + }, + "eng_Latn-bus_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.90625, + "max_sentence1_length": 227, + "min_sentence2_length": 22, + "average_sentence2_length": 98.015625, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 53228 + }, + "bus_Latn-eng_Latn": { + "min_sentence1_length": 22, + "average_sentence1_length": 98.015625, + "max_sentence1_length": 239, + "min_sentence2_length": 24, + "average_sentence2_length": 109.90625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 53228 + }, + "eng_Latn-bvd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.15234375, + "max_sentence1_length": 827, + "min_sentence2_length": 43, + "average_sentence2_length": 204.75, + "max_sentence2_length": 615, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81383 + }, + "bvd_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 204.75, + "max_sentence1_length": 615, + "min_sentence2_length": 24, + "average_sentence2_length": 113.15234375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81383 + }, + "eng_Latn-bvr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.515625, + "max_sentence1_length": 827, + "min_sentence2_length": 49, + "average_sentence2_length": 306.55859375, + "max_sentence2_length": 773, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 107539 + }, + "bvr_Latn-eng_Latn": { + "min_sentence1_length": 49, + "average_sentence1_length": 306.55859375, + "max_sentence1_length": 773, + "min_sentence2_length": 24, + "average_sentence2_length": 113.515625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 107539 + }, + "eng_Latn-bxh_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 115.453125, + "max_sentence1_length": 257, + "min_sentence2_length": 23, + "average_sentence2_length": 133.3359375, + "max_sentence2_length": 387, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63690 + }, + "bxh_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 133.3359375, + "max_sentence1_length": 387, + "min_sentence2_length": 38, + "average_sentence2_length": 115.453125, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63690 + }, + "eng_Latn-byr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 107.50390625, + "max_sentence1_length": 227, + "min_sentence2_length": 33, + "average_sentence2_length": 225.57421875, + "max_sentence2_length": 556, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85268 + }, + "byr_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 225.57421875, + "max_sentence1_length": 556, + "min_sentence2_length": 24, + "average_sentence2_length": 107.50390625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85268 + }, + "eng_Latn-byx_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.93359375, + "max_sentence1_length": 227, + "min_sentence2_length": 42, + "average_sentence2_length": 276.94140625, + "max_sentence2_length": 1031, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98784 + }, + "byx_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 276.94140625, + "max_sentence1_length": 1031, + "min_sentence2_length": 24, + "average_sentence2_length": 108.93359375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98784 + }, + "eng_Latn-bzd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.390625, + "max_sentence1_length": 227, + "min_sentence2_length": 24, + "average_sentence2_length": 202.2890625, + "max_sentence2_length": 587, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80046 + }, + "bzd_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 202.2890625, + "max_sentence1_length": 587, + "min_sentence2_length": 24, + "average_sentence2_length": 110.390625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80046 + }, + "eng_Latn-bzh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.41796875, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 166.80859375, + "max_sentence2_length": 511, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72250 + }, + "bzh_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 166.80859375, + "max_sentence1_length": 511, + "min_sentence2_length": 24, + "average_sentence2_length": 115.41796875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72250 + }, + "eng_Latn-bzj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.90625, + "max_sentence1_length": 227, + "min_sentence2_length": 30, + "average_sentence2_length": 146.23046875, + "max_sentence2_length": 447, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65571 + }, + "bzj_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 146.23046875, + "max_sentence1_length": 447, + "min_sentence2_length": 24, + "average_sentence2_length": 109.90625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65571 + }, + "eng_Latn-caa_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 229.0078125, + "max_sentence2_length": 628, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88161 + }, + "caa_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 229.0078125, + "max_sentence1_length": 628, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88161 + }, + "eng_Latn-cab_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.90234375, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 163.0703125, + "max_sentence2_length": 518, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69881 + }, + "cab_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 163.0703125, + "max_sentence1_length": 518, + "min_sentence2_length": 24, + "average_sentence2_length": 109.90234375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69881 + }, + "eng_Latn-cac_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.96484375, + "max_sentence1_length": 227, + "min_sentence2_length": 27, + "average_sentence2_length": 176.08203125, + "max_sentence2_length": 431, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72972 + }, + "cac_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 176.08203125, + "max_sentence1_length": 431, + "min_sentence2_length": 24, + "average_sentence2_length": 108.96484375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72972 + }, + "eng_Latn-caf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.8125, + "max_sentence1_length": 827, + "min_sentence2_length": 34, + "average_sentence2_length": 174.11328125, + "max_sentence2_length": 433, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73709 + }, + "caf_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 174.11328125, + "max_sentence1_length": 433, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73709 + }, + "eng_Latn-cak_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.15234375, + "max_sentence1_length": 341, + "min_sentence2_length": 78, + "average_sentence2_length": 242.734375, + "max_sentence2_length": 584, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99811 + }, + "cak_Latn-eng_Latn": { + "min_sentence1_length": 78, + "average_sentence1_length": 242.734375, + "max_sentence1_length": 584, + "min_sentence2_length": 56, + "average_sentence2_length": 147.15234375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99811 + }, + "eng_Latn-cao_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.76953125, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 175.046875, + "max_sentence2_length": 445, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73169 + }, + "cao_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 175.046875, + "max_sentence1_length": 445, + "min_sentence2_length": 24, + "average_sentence2_length": 110.76953125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73169 + }, + "eng_Latn-cap_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.47265625, + "max_sentence1_length": 227, + "min_sentence2_length": 46, + "average_sentence2_length": 224.6953125, + "max_sentence2_length": 667, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85803 + }, + "cap_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 224.6953125, + "max_sentence1_length": 667, + "min_sentence2_length": 24, + "average_sentence2_length": 110.47265625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85803 + }, + "eng_Latn-car_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.234375, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 147.98828125, + "max_sentence2_length": 386, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67129 + }, + "car_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 147.98828125, + "max_sentence1_length": 386, + "min_sentence2_length": 24, + "average_sentence2_length": 114.234375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67129 + }, + "eng_Latn-cav_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.59765625, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 313.08984375, + "max_sentence2_length": 1077, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 109488 + }, + "cav_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 313.08984375, + "max_sentence1_length": 1077, + "min_sentence2_length": 24, + "average_sentence2_length": 114.59765625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 109488 + }, + "eng_Latn-cax_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.59375, + "max_sentence1_length": 227, + "min_sentence2_length": 39, + "average_sentence2_length": 179.125, + "max_sentence2_length": 409, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74168 + }, + "cax_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 179.125, + "max_sentence1_length": 409, + "min_sentence2_length": 24, + "average_sentence2_length": 110.59375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74168 + }, + "eng_Latn-cbc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.39453125, + "max_sentence1_length": 827, + "min_sentence2_length": 61, + "average_sentence2_length": 284.9296875, + "max_sentence2_length": 1210, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102227 + }, + "cbc_Latn-eng_Latn": { + "min_sentence1_length": 61, + "average_sentence1_length": 284.9296875, + "max_sentence1_length": 1210, + "min_sentence2_length": 24, + "average_sentence2_length": 114.39453125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102227 + }, + "eng_Latn-cbi_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 113.79296875, + "max_sentence1_length": 827, + "min_sentence2_length": 37, + "average_sentence2_length": 187.0, + "max_sentence2_length": 733, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77003 + }, + "cbi_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 187.0, + "max_sentence1_length": 733, + "min_sentence2_length": 31, + "average_sentence2_length": 113.79296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77003 + }, + "eng_Latn-cbk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.02734375, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 196.265625, + "max_sentence2_length": 532, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79435 + }, + "cbk_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 196.265625, + "max_sentence1_length": 532, + "min_sentence2_length": 24, + "average_sentence2_length": 114.02734375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79435 + }, + "eng_Latn-cbr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.453125, + "max_sentence1_length": 238, + "min_sentence2_length": 30, + "average_sentence2_length": 212.15234375, + "max_sentence2_length": 814, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82587 + }, + "cbr_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 212.15234375, + "max_sentence1_length": 814, + "min_sentence2_length": 24, + "average_sentence2_length": 110.453125, + "max_sentence2_length": 238, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82587 + }, + "eng_Latn-cbs_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 112.0546875, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 250.90234375, + "max_sentence2_length": 1059, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92917 + }, + "cbs_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 250.90234375, + "max_sentence1_length": 1059, + "min_sentence2_length": 31, + "average_sentence2_length": 112.0546875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92917 + }, + "eng_Latn-cbt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.8671875, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 236.9453125, + "max_sentence2_length": 635, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90064 + }, + "cbt_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 236.9453125, + "max_sentence1_length": 635, + "min_sentence2_length": 24, + "average_sentence2_length": 114.8671875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90064 + }, + "eng_Latn-cbu_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 107.87890625, + "max_sentence1_length": 227, + "min_sentence2_length": 24, + "average_sentence2_length": 269.4375, + "max_sentence2_length": 1004, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96593 + }, + "cbu_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 269.4375, + "max_sentence1_length": 1004, + "min_sentence2_length": 21, + "average_sentence2_length": 107.87890625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96593 + }, + "eng_Latn-cbv_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.125, + "max_sentence1_length": 227, + "min_sentence2_length": 39, + "average_sentence2_length": 271.37890625, + "max_sentence2_length": 927, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97921 + }, + "cbv_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 271.37890625, + "max_sentence1_length": 927, + "min_sentence2_length": 31, + "average_sentence2_length": 111.125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97921 + }, + "eng_Latn-cco_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.796875, + "max_sentence1_length": 827, + "min_sentence2_length": 63, + "average_sentence2_length": 259.8671875, + "max_sentence2_length": 671, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 95658 + }, + "cco_Latn-eng_Latn": { + "min_sentence1_length": 63, + "average_sentence1_length": 259.8671875, + "max_sentence1_length": 671, + "min_sentence2_length": 24, + "average_sentence2_length": 113.796875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 95658 + }, + "eng_Latn-ceb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 31, + "average_sentence2_length": 139.43359375, + "max_sentence2_length": 381, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64468 + }, + "ceb_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 139.43359375, + "max_sentence1_length": 381, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64468 + }, + "eng_Latn-cek_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.72265625, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 138.73046875, + "max_sentence2_length": 301, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65140 + }, + "cek_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 138.73046875, + "max_sentence1_length": 301, + "min_sentence2_length": 24, + "average_sentence2_length": 115.72265625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65140 + }, + "eng_Latn-ces_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 146.3203125, + "max_sentence1_length": 341, + "min_sentence2_length": 36, + "average_sentence2_length": 125.69921875, + "max_sentence2_length": 301, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69637 + }, + "ces_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 125.69921875, + "max_sentence1_length": 301, + "min_sentence2_length": 56, + "average_sentence2_length": 146.3203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69637 + }, + "eng_Latn-cgc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.75390625, + "max_sentence1_length": 227, + "min_sentence2_length": 47, + "average_sentence2_length": 224.30078125, + "max_sentence2_length": 618, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85774 + }, + "cgc_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 224.30078125, + "max_sentence1_length": 618, + "min_sentence2_length": 24, + "average_sentence2_length": 110.75390625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85774 + }, + "eng_Latn-cha_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 108.57421875, + "max_sentence1_length": 243, + "min_sentence2_length": 35, + "average_sentence2_length": 113.0078125, + "max_sentence2_length": 258, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56725 + }, + "cha_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 113.0078125, + "max_sentence1_length": 258, + "min_sentence2_length": 38, + "average_sentence2_length": 108.57421875, + "max_sentence2_length": 243, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56725 + }, + "eng_Latn-chd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.58203125, + "max_sentence1_length": 227, + "min_sentence2_length": 38, + "average_sentence2_length": 227.484375, + "max_sentence2_length": 789, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86545 + }, + "chd_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 227.484375, + "max_sentence1_length": 789, + "min_sentence2_length": 24, + "average_sentence2_length": 110.58203125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86545 + }, + "eng_Latn-chf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.53125, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 202.71484375, + "max_sentence2_length": 987, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81471 + }, + "chf_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 202.71484375, + "max_sentence1_length": 987, + "min_sentence2_length": 24, + "average_sentence2_length": 115.53125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81471 + }, + "eng_Latn-chk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 28, + "average_sentence2_length": 135.45703125, + "max_sentence2_length": 330, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63450 + }, + "chk_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 135.45703125, + "max_sentence1_length": 330, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63450 + }, + "eng_Latn-chq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.43359375, + "max_sentence1_length": 227, + "min_sentence2_length": 29, + "average_sentence2_length": 176.98828125, + "max_sentence2_length": 602, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73580 + }, + "chq_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 176.98828125, + "max_sentence1_length": 602, + "min_sentence2_length": 24, + "average_sentence2_length": 110.43359375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73580 + }, + "eng_Latn-chz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 172.62890625, + "max_sentence2_length": 439, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73728 + }, + "chz_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 172.62890625, + "max_sentence1_length": 439, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73728 + }, + "eng_Latn-cjo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.18359375, + "max_sentence1_length": 263, + "min_sentence2_length": 49, + "average_sentence2_length": 285.125, + "max_sentence2_length": 1225, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101711 + }, + "cjo_Latn-eng_Latn": { + "min_sentence1_length": 49, + "average_sentence1_length": 285.125, + "max_sentence1_length": 1225, + "min_sentence2_length": 24, + "average_sentence2_length": 112.18359375, + "max_sentence2_length": 263, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101711 + }, + "eng_Latn-cjv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.625, + "max_sentence1_length": 227, + "min_sentence2_length": 50, + "average_sentence2_length": 248.8359375, + "max_sentence2_length": 908, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91766 + }, + "cjv_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 248.8359375, + "max_sentence1_length": 908, + "min_sentence2_length": 24, + "average_sentence2_length": 109.625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91766 + }, + "eng_Latn-ckb_Arab": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 20, + "average_sentence2_length": 102.04296875, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 55658 + }, + "ckb_Arab-eng_Latn": { + "min_sentence1_length": 20, + "average_sentence1_length": 102.04296875, + "max_sentence1_length": 232, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 55658 + }, + "eng_Latn-cle_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 52, + "average_sentence2_length": 197.43359375, + "max_sentence2_length": 439, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79316 + }, + "cle_Latn-eng_Latn": { + "min_sentence1_length": 52, + "average_sentence1_length": 197.43359375, + "max_sentence1_length": 439, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79316 + }, + "eng_Latn-clu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.1640625, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 177.93359375, + "max_sentence2_length": 843, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74521 + }, + "clu_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 177.93359375, + "max_sentence1_length": 843, + "min_sentence2_length": 24, + "average_sentence2_length": 113.1640625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74521 + }, + "eng_Latn-cme_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.1875, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 143.66796875, + "max_sentence2_length": 436, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66267 + }, + "cme_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 143.66796875, + "max_sentence1_length": 436, + "min_sentence2_length": 24, + "average_sentence2_length": 115.1875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66267 + }, + "eng_Latn-cmn_Hans": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 12, + "average_sentence2_length": 40.19140625, + "max_sentence2_length": 106, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 47494 + }, + "cmn_Hans-eng_Latn": { + "min_sentence1_length": 12, + "average_sentence1_length": 40.19140625, + "max_sentence1_length": 106, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 47494 + }, + "eng_Latn-cni_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.59765625, + "max_sentence1_length": 263, + "min_sentence2_length": 43, + "average_sentence2_length": 240.62890625, + "max_sentence2_length": 1132, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90170 + }, + "cni_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 240.62890625, + "max_sentence1_length": 1132, + "min_sentence2_length": 24, + "average_sentence2_length": 111.59765625, + "max_sentence2_length": 263, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90170 + }, + "eng_Latn-cnl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.390625, + "max_sentence1_length": 827, + "min_sentence2_length": 34, + "average_sentence2_length": 229.64453125, + "max_sentence2_length": 668, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88073 + }, + "cnl_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 229.64453125, + "max_sentence1_length": 668, + "min_sentence2_length": 24, + "average_sentence2_length": 114.390625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88073 + }, + "eng_Latn-cnt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.20703125, + "max_sentence1_length": 827, + "min_sentence2_length": 40, + "average_sentence2_length": 177.01953125, + "max_sentence2_length": 384, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74810 + }, + "cnt_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 177.01953125, + "max_sentence1_length": 384, + "min_sentence2_length": 24, + "average_sentence2_length": 115.20703125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74810 + }, + "eng_Latn-cof_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.8203125, + "max_sentence1_length": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 281.3515625, + "max_sentence2_length": 1760, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100652 + }, + "cof_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 281.3515625, + "max_sentence1_length": 1760, + "min_sentence2_length": 31, + "average_sentence2_length": 111.8203125, + "max_sentence2_length": 256, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100652 + }, + "eng_Latn-con_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.8359375, + "max_sentence1_length": 227, + "min_sentence2_length": 34, + "average_sentence2_length": 187.04296875, + "max_sentence2_length": 681, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76257 + }, + "con_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 187.04296875, + "max_sentence1_length": 681, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8359375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76257 + }, + "eng_Latn-cop_Copt": { + "min_sentence1_length": 56, + "average_sentence1_length": 146.3203125, + "max_sentence1_length": 341, + "min_sentence2_length": 34, + "average_sentence2_length": 137.5390625, + "max_sentence2_length": 279, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72668 + }, + "cop_Copt-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 137.5390625, + "max_sentence1_length": 279, + "min_sentence2_length": 56, + "average_sentence2_length": 146.3203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72668 + }, + "eng_Latn-cot_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.234375, + "max_sentence1_length": 827, + "min_sentence2_length": 45, + "average_sentence2_length": 271.953125, + "max_sentence2_length": 882, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98864 + }, + "cot_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 271.953125, + "max_sentence1_length": 882, + "min_sentence2_length": 24, + "average_sentence2_length": 114.234375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98864 + }, + "eng_Latn-cpa_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 44, + "average_sentence2_length": 167.7421875, + "max_sentence2_length": 387, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72477 + }, + "cpa_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 167.7421875, + "max_sentence1_length": 387, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72477 + }, + "eng_Latn-cpb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.8125, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 162.61328125, + "max_sentence2_length": 426, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70765 + }, + "cpb_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 162.61328125, + "max_sentence1_length": 426, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70765 + }, + "eng_Latn-cpc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.8125, + "max_sentence1_length": 827, + "min_sentence2_length": 42, + "average_sentence2_length": 166.1484375, + "max_sentence2_length": 431, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71670 + }, + "cpc_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 166.1484375, + "max_sentence1_length": 431, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71670 + }, + "eng_Latn-cpu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.8125, + "max_sentence1_length": 827, + "min_sentence2_length": 42, + "average_sentence2_length": 172.44140625, + "max_sentence2_length": 441, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73281 + }, + "cpu_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 172.44140625, + "max_sentence1_length": 441, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73281 + }, + "eng_Latn-cpy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 41, + "average_sentence2_length": 181.5, + "max_sentence2_length": 453, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75504 + }, + "cpy_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 181.5, + "max_sentence1_length": 453, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75504 + }, + "eng_Latn-crn_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 108.921875, + "max_sentence1_length": 227, + "min_sentence2_length": 54, + "average_sentence2_length": 314.71875, + "max_sentence2_length": 1123, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 108452 + }, + "crn_Latn-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 314.71875, + "max_sentence1_length": 1123, + "min_sentence2_length": 21, + "average_sentence2_length": 108.921875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 108452 + }, + "eng_Latn-crx_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.8125, + "max_sentence1_length": 827, + "min_sentence2_length": 33, + "average_sentence2_length": 175.640625, + "max_sentence2_length": 457, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74100 + }, + "crx_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 175.640625, + "max_sentence1_length": 457, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74100 + }, + "eng_Latn-cso_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.13671875, + "max_sentence1_length": 827, + "min_sentence2_length": 47, + "average_sentence2_length": 218.03125, + "max_sentence2_length": 535, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84779 + }, + "cso_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 218.03125, + "max_sentence1_length": 535, + "min_sentence2_length": 24, + "average_sentence2_length": 113.13671875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84779 + }, + "eng_Latn-csy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 31, + "average_sentence2_length": 130.73828125, + "max_sentence2_length": 283, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62242 + }, + "csy_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 130.73828125, + "max_sentence1_length": 283, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62242 + }, + "eng_Latn-cta_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.4296875, + "max_sentence1_length": 827, + "min_sentence2_length": 39, + "average_sentence2_length": 279.05859375, + "max_sentence2_length": 909, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100733 + }, + "cta_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 279.05859375, + "max_sentence1_length": 909, + "min_sentence2_length": 24, + "average_sentence2_length": 114.4296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100733 + }, + "eng_Latn-cth_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 41, + "average_sentence2_length": 135.921875, + "max_sentence2_length": 291, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63569 + }, + "cth_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 135.921875, + "max_sentence1_length": 291, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63569 + }, + "eng_Latn-ctp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.09765625, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 266.01171875, + "max_sentence2_length": 864, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96284 + }, + "ctp_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 266.01171875, + "max_sentence1_length": 864, + "min_sentence2_length": 24, + "average_sentence2_length": 110.09765625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96284 + }, + "eng_Latn-ctu_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 61, + "average_sentence2_length": 238.31640625, + "max_sentence2_length": 670, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98784 + }, + "ctu_Latn-eng_Latn": { + "min_sentence1_length": 61, + "average_sentence1_length": 238.31640625, + "max_sentence1_length": 670, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98784 + }, + "eng_Latn-cub_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.7109375, + "max_sentence1_length": 232, + "min_sentence2_length": 30, + "average_sentence2_length": 276.57421875, + "max_sentence2_length": 1218, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99401 + }, + "cub_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 276.57421875, + "max_sentence1_length": 1218, + "min_sentence2_length": 24, + "average_sentence2_length": 111.7109375, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99401 + }, + "eng_Latn-cuc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.234375, + "max_sentence1_length": 227, + "min_sentence2_length": 35, + "average_sentence2_length": 160.85546875, + "max_sentence2_length": 404, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69143 + }, + "cuc_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 160.85546875, + "max_sentence1_length": 404, + "min_sentence2_length": 24, + "average_sentence2_length": 109.234375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69143 + }, + "eng_Latn-cui_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.5078125, + "max_sentence1_length": 227, + "min_sentence2_length": 79, + "average_sentence2_length": 333.36328125, + "max_sentence2_length": 1448, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 113631 + }, + "cui_Latn-eng_Latn": { + "min_sentence1_length": 79, + "average_sentence1_length": 333.36328125, + "max_sentence1_length": 1448, + "min_sentence2_length": 24, + "average_sentence2_length": 110.5078125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 113631 + }, + "eng_Latn-cuk_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 146.3203125, + "max_sentence1_length": 341, + "min_sentence2_length": 63, + "average_sentence2_length": 198.34375, + "max_sentence2_length": 513, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88234 + }, + "cuk_Latn-eng_Latn": { + "min_sentence1_length": 63, + "average_sentence1_length": 198.34375, + "max_sentence1_length": 513, + "min_sentence2_length": 56, + "average_sentence2_length": 146.3203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88234 + }, + "eng_Latn-cut_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.66015625, + "max_sentence1_length": 216, + "min_sentence2_length": 34, + "average_sentence2_length": 185.71484375, + "max_sentence2_length": 531, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75616 + }, + "cut_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 185.71484375, + "max_sentence1_length": 531, + "min_sentence2_length": 24, + "average_sentence2_length": 109.66015625, + "max_sentence2_length": 216, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75616 + }, + "eng_Latn-cux_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 44, + "average_sentence2_length": 233.81640625, + "max_sentence2_length": 599, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89392 + }, + "cux_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 233.81640625, + "max_sentence1_length": 599, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89392 + }, + "eng_Latn-cwe_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 23, + "average_sentence2_length": 133.17578125, + "max_sentence2_length": 317, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62866 + }, + "cwe_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 133.17578125, + "max_sentence1_length": 317, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62866 + }, + "eng_Latn-cya_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 43, + "average_sentence2_length": 163.6953125, + "max_sentence2_length": 346, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71441 + }, + "cya_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 163.6953125, + "max_sentence1_length": 346, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71441 + }, + "eng_Latn-daa_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.8984375, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 151.21484375, + "max_sentence2_length": 365, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67869 + }, + "daa_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 151.21484375, + "max_sentence1_length": 365, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8984375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67869 + }, + "eng_Latn-dad_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 28, + "average_sentence2_length": 128.60546875, + "max_sentence2_length": 308, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62202 + }, + "dad_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 128.60546875, + "max_sentence1_length": 308, + "min_sentence2_length": 24, + "average_sentence2_length": 114.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62202 + }, + "eng_Latn-dah_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 110.046875, + "max_sentence1_length": 227, + "min_sentence2_length": 47, + "average_sentence2_length": 275.80859375, + "max_sentence2_length": 796, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98779 + }, + "dah_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 275.80859375, + "max_sentence1_length": 796, + "min_sentence2_length": 31, + "average_sentence2_length": 110.046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98779 + }, + "eng_Latn-dan_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 21, + "average_sentence2_length": 109.00390625, + "max_sentence2_length": 226, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56678 + }, + "dan_Latn-eng_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 109.00390625, + "max_sentence1_length": 226, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56678 + }, + "eng_Latn-ded_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.8671875, + "max_sentence1_length": 230, + "min_sentence2_length": 25, + "average_sentence2_length": 178.6875, + "max_sentence2_length": 777, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73870 + }, + "ded_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 178.6875, + "max_sentence1_length": 777, + "min_sentence2_length": 24, + "average_sentence2_length": 109.8671875, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73870 + }, + "eng_Latn-deu_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 55, + "average_sentence2_length": 156.78515625, + "max_sentence2_length": 392, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77912 + }, + "deu_Latn-eng_Latn": { + "min_sentence1_length": 55, + "average_sentence1_length": 156.78515625, + "max_sentence1_length": 392, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77912 + }, + "eng_Latn-dgc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.234375, + "max_sentence1_length": 251, + "min_sentence2_length": 39, + "average_sentence2_length": 164.83203125, + "max_sentence2_length": 394, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70417 + }, + "dgc_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 164.83203125, + "max_sentence1_length": 394, + "min_sentence2_length": 24, + "average_sentence2_length": 110.234375, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70417 + }, + "eng_Latn-dgr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.671875, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 160.18359375, + "max_sentence2_length": 492, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69339 + }, + "dgr_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 160.18359375, + "max_sentence1_length": 492, + "min_sentence2_length": 24, + "average_sentence2_length": 110.671875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69339 + }, + "eng_Latn-dgz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.01171875, + "max_sentence1_length": 376, + "min_sentence2_length": 37, + "average_sentence2_length": 175.05078125, + "max_sentence2_length": 597, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73488 + }, + "dgz_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 175.05078125, + "max_sentence1_length": 597, + "min_sentence2_length": 24, + "average_sentence2_length": 112.01171875, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73488 + }, + "eng_Latn-dhg_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 110.27734375, + "max_sentence1_length": 215, + "min_sentence2_length": 52, + "average_sentence2_length": 268.19921875, + "max_sentence2_length": 1116, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96890 + }, + "dhg_Latn-eng_Latn": { + "min_sentence1_length": 52, + "average_sentence1_length": 268.19921875, + "max_sentence1_length": 1116, + "min_sentence2_length": 28, + "average_sentence2_length": 110.27734375, + "max_sentence2_length": 215, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96890 + }, + "eng_Latn-dif_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 21, + "average_sentence2_length": 135.3515625, + "max_sentence2_length": 345, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63423 + }, + "dif_Latn-eng_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 135.3515625, + "max_sentence1_length": 345, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63423 + }, + "eng_Latn-dik_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.18359375, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 126.83984375, + "max_sentence2_length": 350, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61958 + }, + "dik_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 126.83984375, + "max_sentence1_length": 350, + "min_sentence2_length": 24, + "average_sentence2_length": 115.18359375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61958 + }, + "eng_Latn-dji_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 121.51578947368421, + "max_sentence1_length": 259, + "min_sentence2_length": 50, + "average_sentence2_length": 255.96315789473684, + "max_sentence2_length": 933, + "num_samples": 190, + "num_samples_sentence2": 190, + "number_of_characters": 71721 + }, + "dji_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 255.96315789473684, + "max_sentence1_length": 933, + "min_sentence2_length": 26, + "average_sentence2_length": 121.51578947368421, + "max_sentence2_length": 259, + "num_samples": 190, + "num_samples_sentence2": 190, + "number_of_characters": 71721 + }, + "eng_Latn-djk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.34765625, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 181.88671875, + "max_sentence2_length": 717, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75836 + }, + "djk_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 181.88671875, + "max_sentence1_length": 717, + "min_sentence2_length": 24, + "average_sentence2_length": 114.34765625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75836 + }, + "eng_Latn-djr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.65234375, + "max_sentence1_length": 227, + "min_sentence2_length": 59, + "average_sentence2_length": 406.55078125, + "max_sentence2_length": 1457, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 132660 + }, + "djr_Latn-eng_Latn": { + "min_sentence1_length": 59, + "average_sentence1_length": 406.55078125, + "max_sentence1_length": 1457, + "min_sentence2_length": 24, + "average_sentence2_length": 111.65234375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 132660 + }, + "eng_Latn-dob_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.67578125, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 162.1171875, + "max_sentence2_length": 427, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69579 + }, + "dob_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 162.1171875, + "max_sentence1_length": 427, + "min_sentence2_length": 24, + "average_sentence2_length": 109.67578125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69579 + }, + "eng_Latn-dop_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.33203125, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 140.765625, + "max_sentence2_length": 333, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64281 + }, + "dop_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 140.765625, + "max_sentence1_length": 333, + "min_sentence2_length": 24, + "average_sentence2_length": 110.33203125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64281 + }, + "eng_Latn-dov_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 19, + "average_sentence2_length": 116.83984375, + "max_sentence2_length": 294, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58684 + }, + "dov_Latn-eng_Latn": { + "min_sentence1_length": 19, + "average_sentence1_length": 116.83984375, + "max_sentence1_length": 294, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58684 + }, + "eng_Latn-dwr_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 46, + "average_sentence2_length": 173.80859375, + "max_sentence2_length": 480, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81700 + }, + "dwr_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 173.80859375, + "max_sentence1_length": 480, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81700 + }, + "eng_Latn-dww_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 110.01953125, + "max_sentence1_length": 231, + "min_sentence2_length": 31, + "average_sentence2_length": 185.31640625, + "max_sentence2_length": 606, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75606 + }, + "dww_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 185.31640625, + "max_sentence1_length": 606, + "min_sentence2_length": 31, + "average_sentence2_length": 110.01953125, + "max_sentence2_length": 231, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75606 + }, + "eng_Latn-dwy_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 113.89473684210526, + "max_sentence1_length": 257, + "min_sentence2_length": 3, + "average_sentence2_length": 312.593984962406, + "max_sentence2_length": 1213, + "num_samples": 133, + "num_samples_sentence2": 133, + "number_of_characters": 56723 + }, + "dwy_Latn-eng_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 312.593984962406, + "max_sentence1_length": 1213, + "min_sentence2_length": 42, + "average_sentence2_length": 113.89473684210526, + "max_sentence2_length": 257, + "num_samples": 133, + "num_samples_sentence2": 133, + "number_of_characters": 56723 + }, + "eng_Latn-ebk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.203125, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 184.87109375, + "max_sentence2_length": 492, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75795 + }, + "ebk_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 184.87109375, + "max_sentence1_length": 492, + "min_sentence2_length": 24, + "average_sentence2_length": 111.203125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75795 + }, + "eng_Latn-eko_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 105.97265625, + "max_sentence1_length": 217, + "min_sentence2_length": 17, + "average_sentence2_length": 120.234375, + "max_sentence2_length": 280, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57909 + }, + "eko_Latn-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 120.234375, + "max_sentence1_length": 280, + "min_sentence2_length": 37, + "average_sentence2_length": 105.97265625, + "max_sentence2_length": 217, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57909 + }, + "eng_Latn-emi_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.42578125, + "max_sentence1_length": 827, + "min_sentence2_length": 29, + "average_sentence2_length": 160.08203125, + "max_sentence2_length": 475, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70274 + }, + "emi_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 160.08203125, + "max_sentence1_length": 475, + "min_sentence2_length": 24, + "average_sentence2_length": 114.42578125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70274 + }, + "eng_Latn-emp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.54296875, + "max_sentence1_length": 227, + "min_sentence2_length": 39, + "average_sentence2_length": 185.9765625, + "max_sentence2_length": 529, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75909 + }, + "emp_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 185.9765625, + "max_sentence1_length": 529, + "min_sentence2_length": 24, + "average_sentence2_length": 110.54296875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75909 + }, + "eng_Latn-enq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.29296875, + "max_sentence1_length": 227, + "min_sentence2_length": 52, + "average_sentence2_length": 234.73046875, + "max_sentence2_length": 800, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89350 + }, + "enq_Latn-eng_Latn": { + "min_sentence1_length": 52, + "average_sentence1_length": 234.73046875, + "max_sentence1_length": 800, + "min_sentence2_length": 24, + "average_sentence2_length": 114.29296875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89350 + }, + "eng_Latn-epo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 23, + "average_sentence2_length": 100.90625, + "max_sentence2_length": 240, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 54872 + }, + "epo_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 100.90625, + "max_sentence1_length": 240, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 54872 + }, + "eng_Latn-eri_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.0703125, + "max_sentence1_length": 827, + "min_sentence2_length": 45, + "average_sentence2_length": 223.96875, + "max_sentence2_length": 854, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86538 + }, + "eri_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 223.96875, + "max_sentence1_length": 854, + "min_sentence2_length": 24, + "average_sentence2_length": 114.0703125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86538 + }, + "eng_Latn-ese_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.25, + "max_sentence1_length": 227, + "min_sentence2_length": 58, + "average_sentence2_length": 298.2109375, + "max_sentence2_length": 934, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104310 + }, + "ese_Latn-eng_Latn": { + "min_sentence1_length": 58, + "average_sentence1_length": 298.2109375, + "max_sentence1_length": 934, + "min_sentence2_length": 24, + "average_sentence2_length": 109.25, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104310 + }, + "eng_Latn-esk_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.61328125, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 156.62890625, + "max_sentence2_length": 463, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69438 + }, + "esk_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 156.62890625, + "max_sentence1_length": 463, + "min_sentence2_length": 31, + "average_sentence2_length": 114.61328125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69438 + }, + "eng_Latn-etr_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 109.7265625, + "max_sentence1_length": 243, + "min_sentence2_length": 27, + "average_sentence2_length": 160.1171875, + "max_sentence2_length": 424, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69080 + }, + "etr_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 160.1171875, + "max_sentence1_length": 424, + "min_sentence2_length": 31, + "average_sentence2_length": 109.7265625, + "max_sentence2_length": 243, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69080 + }, + "eng_Latn-ewe_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 24, + "average_sentence2_length": 129.68359375, + "max_sentence2_length": 305, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62734 + }, + "ewe_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 129.68359375, + "max_sentence1_length": 305, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62734 + }, + "eng_Latn-faa_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.94921875, + "max_sentence1_length": 227, + "min_sentence2_length": 58, + "average_sentence2_length": 272.8984375, + "max_sentence2_length": 925, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98521 + }, + "faa_Latn-eng_Latn": { + "min_sentence1_length": 58, + "average_sentence1_length": 272.8984375, + "max_sentence1_length": 925, + "min_sentence2_length": 24, + "average_sentence2_length": 111.94921875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98521 + }, + "eng_Latn-fai_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.20703125, + "max_sentence1_length": 227, + "min_sentence2_length": 38, + "average_sentence2_length": 193.04296875, + "max_sentence2_length": 578, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77632 + }, + "fai_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 193.04296875, + "max_sentence1_length": 578, + "min_sentence2_length": 24, + "average_sentence2_length": 110.20703125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77632 + }, + "eng_Latn-far_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.6328125, + "max_sentence1_length": 227, + "min_sentence2_length": 40, + "average_sentence2_length": 192.46484375, + "max_sentence2_length": 640, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77337 + }, + "far_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 192.46484375, + "max_sentence1_length": 640, + "min_sentence2_length": 24, + "average_sentence2_length": 109.6328125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77337 + }, + "eng_Latn-ffm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.828125, + "max_sentence1_length": 827, + "min_sentence2_length": 1, + "average_sentence2_length": 122.6015625, + "max_sentence2_length": 361, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60526 + }, + "ffm_Latn-eng_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 122.6015625, + "max_sentence1_length": 361, + "min_sentence2_length": 24, + "average_sentence2_length": 113.828125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60526 + }, + "eng_Latn-for_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.94921875, + "max_sentence1_length": 227, + "min_sentence2_length": 27, + "average_sentence2_length": 156.578125, + "max_sentence2_length": 363, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68231 + }, + "for_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 156.578125, + "max_sentence1_length": 363, + "min_sentence2_length": 24, + "average_sentence2_length": 109.94921875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68231 + }, + "eng_Latn-fra_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 44, + "average_sentence2_length": 155.83203125, + "max_sentence2_length": 386, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77098 + }, + "fra_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 155.83203125, + "max_sentence1_length": 386, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77098 + }, + "eng_Latn-fue_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 116.5546875, + "max_sentence1_length": 827, + "min_sentence2_length": 28, + "average_sentence2_length": 109.19921875, + "max_sentence2_length": 247, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57793 + }, + "fue_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 109.19921875, + "max_sentence1_length": 247, + "min_sentence2_length": 31, + "average_sentence2_length": 116.5546875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57793 + }, + "eng_Latn-fuf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.234375, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 119.56640625, + "max_sentence2_length": 264, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59597 + }, + "fuf_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 119.56640625, + "max_sentence1_length": 264, + "min_sentence2_length": 24, + "average_sentence2_length": 113.234375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59597 + }, + "eng_Latn-fuh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.3046875, + "max_sentence1_length": 827, + "min_sentence2_length": 1, + "average_sentence2_length": 120.66015625, + "max_sentence2_length": 295, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60407 + }, + "fuh_Latn-eng_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 120.66015625, + "max_sentence1_length": 295, + "min_sentence2_length": 24, + "average_sentence2_length": 115.3046875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60407 + }, + "eng_Latn-gah_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.3125, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 189.37890625, + "max_sentence2_length": 539, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76721 + }, + "gah_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 189.37890625, + "max_sentence1_length": 539, + "min_sentence2_length": 24, + "average_sentence2_length": 110.3125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76721 + }, + "eng_Latn-gai_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.75390625, + "max_sentence1_length": 227, + "min_sentence2_length": 40, + "average_sentence2_length": 241.88671875, + "max_sentence2_length": 919, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90020 + }, + "gai_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 241.88671875, + "max_sentence1_length": 919, + "min_sentence2_length": 24, + "average_sentence2_length": 109.75390625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90020 + }, + "eng_Latn-gam_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.83984375, + "max_sentence1_length": 376, + "min_sentence2_length": 40, + "average_sentence2_length": 183.171875, + "max_sentence2_length": 607, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75523 + }, + "gam_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 183.171875, + "max_sentence1_length": 607, + "min_sentence2_length": 24, + "average_sentence2_length": 111.83984375, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75523 + }, + "eng_Latn-gaw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.5, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 200.12109375, + "max_sentence2_length": 771, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79263 + }, + "gaw_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 200.12109375, + "max_sentence1_length": 771, + "min_sentence2_length": 24, + "average_sentence2_length": 109.5, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79263 + }, + "eng_Latn-gdn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.8984375, + "max_sentence1_length": 227, + "min_sentence2_length": 52, + "average_sentence2_length": 292.9765625, + "max_sentence2_length": 1210, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103392 + }, + "gdn_Latn-eng_Latn": { + "min_sentence1_length": 52, + "average_sentence1_length": 292.9765625, + "max_sentence1_length": 1210, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8984375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103392 + }, + "eng_Latn-gdr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.28515625, + "max_sentence1_length": 227, + "min_sentence2_length": 45, + "average_sentence2_length": 219.1953125, + "max_sentence2_length": 620, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84347 + }, + "gdr_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 219.1953125, + "max_sentence1_length": 620, + "min_sentence2_length": 24, + "average_sentence2_length": 110.28515625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84347 + }, + "eng_Latn-geb_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 115.3046875, + "max_sentence1_length": 827, + "min_sentence2_length": 41, + "average_sentence2_length": 248.74609375, + "max_sentence2_length": 830, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93197 + }, + "geb_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 248.74609375, + "max_sentence1_length": 830, + "min_sentence2_length": 21, + "average_sentence2_length": 115.3046875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93197 + }, + "eng_Latn-gfk_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 146.45703125, + "max_sentence1_length": 341, + "min_sentence2_length": 41, + "average_sentence2_length": 215.09375, + "max_sentence2_length": 553, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92557 + }, + "gfk_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 215.09375, + "max_sentence1_length": 553, + "min_sentence2_length": 1, + "average_sentence2_length": 146.45703125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92557 + }, + "eng_Latn-ghs_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 107.6953125, + "max_sentence1_length": 248, + "min_sentence2_length": 42, + "average_sentence2_length": 213.65234375, + "max_sentence2_length": 845, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82265 + }, + "ghs_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 213.65234375, + "max_sentence1_length": 845, + "min_sentence2_length": 21, + "average_sentence2_length": 107.6953125, + "max_sentence2_length": 248, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82265 + }, + "eng_Latn-glk_Arab": { + "min_sentence1_length": 40, + "average_sentence1_length": 101.6774193548387, + "max_sentence1_length": 209, + "min_sentence2_length": 31, + "average_sentence2_length": 93.04301075268818, + "max_sentence2_length": 205, + "num_samples": 93, + "num_samples_sentence2": 93, + "number_of_characters": 18109 + }, + "glk_Arab-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 93.04301075268818, + "max_sentence1_length": 205, + "min_sentence2_length": 40, + "average_sentence2_length": 101.6774193548387, + "max_sentence2_length": 209, + "num_samples": 93, + "num_samples_sentence2": 93, + "number_of_characters": 18109 + }, + "eng_Latn-gmv_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 41, + "average_sentence2_length": 152.296875, + "max_sentence2_length": 399, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76193 + }, + "gmv_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 152.296875, + "max_sentence1_length": 399, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76193 + }, + "eng_Latn-gng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.6640625, + "max_sentence1_length": 827, + "min_sentence2_length": 24, + "average_sentence2_length": 125.60546875, + "max_sentence2_length": 372, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61253 + }, + "gng_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 125.60546875, + "max_sentence1_length": 372, + "min_sentence2_length": 24, + "average_sentence2_length": 113.6640625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61253 + }, + "eng_Latn-gnn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.2265625, + "max_sentence1_length": 232, + "min_sentence2_length": 64, + "average_sentence2_length": 520.5859375, + "max_sentence2_length": 2125, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 161744 + }, + "gnn_Latn-eng_Latn": { + "min_sentence1_length": 64, + "average_sentence1_length": 520.5859375, + "max_sentence1_length": 2125, + "min_sentence2_length": 24, + "average_sentence2_length": 111.2265625, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 161744 + }, + "eng_Latn-gnw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.3984375, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 167.72265625, + "max_sentence2_length": 575, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72479 + }, + "gnw_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 167.72265625, + "max_sentence1_length": 575, + "min_sentence2_length": 24, + "average_sentence2_length": 115.3984375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72479 + }, + "eng_Latn-gof_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 34, + "average_sentence2_length": 141.16015625, + "max_sentence2_length": 385, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73342 + }, + "gof_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 141.16015625, + "max_sentence1_length": 385, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73342 + }, + "eng_Latn-grc_Grek": { + "min_sentence1_length": 56, + "average_sentence1_length": 146.3203125, + "max_sentence1_length": 341, + "min_sentence2_length": 44, + "average_sentence2_length": 144.91015625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74555 + }, + "grc_Grek-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 144.91015625, + "max_sentence1_length": 341, + "min_sentence2_length": 56, + "average_sentence2_length": 146.3203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74555 + }, + "eng_Latn-gub_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 110.65625, + "max_sentence1_length": 227, + "min_sentence2_length": 31, + "average_sentence2_length": 287.85546875, + "max_sentence2_length": 1950, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102019 + }, + "gub_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 287.85546875, + "max_sentence1_length": 1950, + "min_sentence2_length": 31, + "average_sentence2_length": 110.65625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102019 + }, + "eng_Latn-guh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.51953125, + "max_sentence1_length": 227, + "min_sentence2_length": 41, + "average_sentence2_length": 334.57421875, + "max_sentence2_length": 1331, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 113944 + }, + "guh_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 334.57421875, + "max_sentence1_length": 1331, + "min_sentence2_length": 24, + "average_sentence2_length": 110.51953125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 113944 + }, + "eng_Latn-gui_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.3984375, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 169.70703125, + "max_sentence2_length": 557, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72987 + }, + "gui_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 169.70703125, + "max_sentence1_length": 557, + "min_sentence2_length": 24, + "average_sentence2_length": 115.3984375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72987 + }, + "eng_Latn-guj_Gujr": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 19, + "average_sentence2_length": 105.0625, + "max_sentence2_length": 237, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 55669 + }, + "guj_Gujr-eng_Latn": { + "min_sentence1_length": 19, + "average_sentence1_length": 105.0625, + "max_sentence1_length": 237, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 55669 + }, + "eng_Latn-gul_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.296875, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 159.125, + "max_sentence2_length": 391, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70252 + }, + "gul_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 159.125, + "max_sentence1_length": 391, + "min_sentence2_length": 24, + "average_sentence2_length": 115.296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70252 + }, + "eng_Latn-gum_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.81640625, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 183.4296875, + "max_sentence2_length": 417, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75071 + }, + "gum_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 183.4296875, + "max_sentence1_length": 417, + "min_sentence2_length": 24, + "average_sentence2_length": 109.81640625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75071 + }, + "eng_Latn-gun_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 17, + "average_sentence2_length": 133.4765625, + "max_sentence2_length": 314, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62943 + }, + "gun_Latn-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 133.4765625, + "max_sentence1_length": 314, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62943 + }, + "eng_Latn-guo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.98046875, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 233.2109375, + "max_sentence2_length": 700, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88369 + }, + "guo_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 233.2109375, + "max_sentence1_length": 700, + "min_sentence2_length": 24, + "average_sentence2_length": 111.98046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88369 + }, + "eng_Latn-gup_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.01171875, + "max_sentence1_length": 227, + "min_sentence2_length": 75, + "average_sentence2_length": 300.48828125, + "max_sentence2_length": 1198, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 105600 + }, + "gup_Latn-eng_Latn": { + "min_sentence1_length": 75, + "average_sentence1_length": 300.48828125, + "max_sentence1_length": 1198, + "min_sentence2_length": 24, + "average_sentence2_length": 112.01171875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 105600 + }, + "eng_Latn-gux_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 45, + "average_sentence2_length": 155.8671875, + "max_sentence2_length": 379, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77107 + }, + "gux_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 155.8671875, + "max_sentence1_length": 379, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77107 + }, + "eng_Latn-gvc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.828125, + "max_sentence1_length": 227, + "min_sentence2_length": 33, + "average_sentence2_length": 251.8515625, + "max_sentence2_length": 832, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93358 + }, + "gvc_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 251.8515625, + "max_sentence1_length": 832, + "min_sentence2_length": 24, + "average_sentence2_length": 112.828125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93358 + }, + "eng_Latn-gvf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.30078125, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 225.60546875, + "max_sentence2_length": 783, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87272 + }, + "gvf_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 225.60546875, + "max_sentence1_length": 783, + "min_sentence2_length": 24, + "average_sentence2_length": 115.30078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87272 + }, + "eng_Latn-gvn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.87890625, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 244.57421875, + "max_sentence2_length": 1121, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91252 + }, + "gvn_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 244.57421875, + "max_sentence1_length": 1121, + "min_sentence2_length": 24, + "average_sentence2_length": 111.87890625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91252 + }, + "eng_Latn-gvs_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.875, + "max_sentence1_length": 227, + "min_sentence2_length": 34, + "average_sentence2_length": 192.9140625, + "max_sentence2_length": 663, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78026 + }, + "gvs_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 192.9140625, + "max_sentence1_length": 663, + "min_sentence2_length": 24, + "average_sentence2_length": 111.875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78026 + }, + "eng_Latn-gwi_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.51171875, + "max_sentence1_length": 227, + "min_sentence2_length": 61, + "average_sentence2_length": 209.765625, + "max_sentence2_length": 749, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82247 + }, + "gwi_Latn-eng_Latn": { + "min_sentence1_length": 61, + "average_sentence1_length": 209.765625, + "max_sentence1_length": 749, + "min_sentence2_length": 24, + "average_sentence2_length": 111.51171875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82247 + }, + "eng_Latn-gym_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 115.24609375, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 255.47265625, + "max_sentence2_length": 810, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94904 + }, + "gym_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 255.47265625, + "max_sentence1_length": 810, + "min_sentence2_length": 31, + "average_sentence2_length": 115.24609375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94904 + }, + "eng_Latn-gyr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.3203125, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 165.68359375, + "max_sentence2_length": 643, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71681 + }, + "gyr_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 165.68359375, + "max_sentence1_length": 643, + "min_sentence2_length": 24, + "average_sentence2_length": 114.3203125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71681 + }, + "eng_Latn-hat_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 39, + "average_sentence2_length": 136.62109375, + "max_sentence2_length": 328, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72750 + }, + "hat_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 136.62109375, + "max_sentence1_length": 328, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72750 + }, + "eng_Latn-hau_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 44, + "average_sentence2_length": 144.5625, + "max_sentence2_length": 317, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74783 + }, + "hau_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 144.5625, + "max_sentence1_length": 317, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74783 + }, + "eng_Latn-haw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 20, + "average_sentence2_length": 125.9140625, + "max_sentence2_length": 289, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61274 + }, + "haw_Latn-eng_Latn": { + "min_sentence1_length": 20, + "average_sentence1_length": 125.9140625, + "max_sentence1_length": 289, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61274 + }, + "eng_Latn-hbo_Hebr": { + "min_sentence1_length": 32, + "average_sentence1_length": 151.23828125, + "max_sentence1_length": 305, + "min_sentence2_length": 38, + "average_sentence2_length": 140.0703125, + "max_sentence2_length": 249, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74575 + }, + "hbo_Hebr-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 140.0703125, + "max_sentence1_length": 249, + "min_sentence2_length": 32, + "average_sentence2_length": 151.23828125, + "max_sentence2_length": 305, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74575 + }, + "eng_Latn-hch_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.94921875, + "max_sentence1_length": 827, + "min_sentence2_length": 22, + "average_sentence2_length": 153.8828125, + "max_sentence2_length": 334, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68309 + }, + "hch_Latn-eng_Latn": { + "min_sentence1_length": 22, + "average_sentence1_length": 153.8828125, + "max_sentence1_length": 334, + "min_sentence2_length": 24, + "average_sentence2_length": 112.94921875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68309 + }, + "eng_Latn-heb_Hebr": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 8, + "average_sentence2_length": 66.01171875, + "max_sentence2_length": 145, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 45939 + }, + "heb_Hebr-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 66.01171875, + "max_sentence1_length": 145, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 45939 + }, + "eng_Latn-heg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.07421875, + "max_sentence1_length": 251, + "min_sentence2_length": 40, + "average_sentence2_length": 272.94921875, + "max_sentence2_length": 1307, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99334 + }, + "heg_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 272.94921875, + "max_sentence1_length": 1307, + "min_sentence2_length": 24, + "average_sentence2_length": 115.07421875, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99334 + }, + "eng_Latn-hin_Deva": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 17, + "average_sentence2_length": 112.6328125, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57607 + }, + "hin_Deva-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 112.6328125, + "max_sentence1_length": 251, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57607 + }, + "eng_Latn-hix_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.28515625, + "max_sentence1_length": 230, + "min_sentence2_length": 26, + "average_sentence2_length": 326.07421875, + "max_sentence2_length": 1365, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 111196 + }, + "hix_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 326.07421875, + "max_sentence1_length": 1365, + "min_sentence2_length": 24, + "average_sentence2_length": 108.28515625, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 111196 + }, + "eng_Latn-hla_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.8203125, + "max_sentence1_length": 232, + "min_sentence2_length": 45, + "average_sentence2_length": 215.89453125, + "max_sentence2_length": 721, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84151 + }, + "hla_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 215.89453125, + "max_sentence1_length": 721, + "min_sentence2_length": 24, + "average_sentence2_length": 112.8203125, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84151 + }, + "eng_Latn-hlt_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 41, + "average_sentence2_length": 158.6796875, + "max_sentence2_length": 362, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77827 + }, + "hlt_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 158.6796875, + "max_sentence1_length": 362, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77827 + }, + "eng_Latn-hmo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.375, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 189.68359375, + "max_sentence2_length": 389, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78095 + }, + "hmo_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 189.68359375, + "max_sentence1_length": 389, + "min_sentence2_length": 24, + "average_sentence2_length": 115.375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78095 + }, + "eng_Latn-hns_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.45703125, + "max_sentence1_length": 230, + "min_sentence2_length": 31, + "average_sentence2_length": 179.94140625, + "max_sentence2_length": 917, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74854 + }, + "hns_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 179.94140625, + "max_sentence1_length": 917, + "min_sentence2_length": 24, + "average_sentence2_length": 112.45703125, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74854 + }, + "eng_Latn-hop_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.4609375, + "max_sentence1_length": 239, + "min_sentence2_length": 31, + "average_sentence2_length": 145.7421875, + "max_sentence2_length": 568, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65844 + }, + "hop_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 145.7421875, + "max_sentence1_length": 568, + "min_sentence2_length": 24, + "average_sentence2_length": 111.4609375, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65844 + }, + "eng_Latn-hot_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.5078125, + "max_sentence1_length": 227, + "min_sentence2_length": 35, + "average_sentence2_length": 172.14453125, + "max_sentence2_length": 638, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71847 + }, + "hot_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 172.14453125, + "max_sentence1_length": 638, + "min_sentence2_length": 24, + "average_sentence2_length": 108.5078125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71847 + }, + "eng_Latn-hrv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.0703125, + "max_sentence1_length": 827, + "min_sentence2_length": 1, + "average_sentence2_length": 88.1328125, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 51252 + }, + "hrv_Latn-eng_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 88.1328125, + "max_sentence1_length": 232, + "min_sentence2_length": 24, + "average_sentence2_length": 112.0703125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 51252 + }, + "eng_Latn-hto_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.77734375, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 206.19140625, + "max_sentence2_length": 990, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81912 + }, + "hto_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 206.19140625, + "max_sentence1_length": 990, + "min_sentence2_length": 24, + "average_sentence2_length": 113.77734375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81912 + }, + "eng_Latn-hub_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.31640625, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 189.55859375, + "max_sentence2_length": 946, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76256 + }, + "hub_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 189.55859375, + "max_sentence1_length": 946, + "min_sentence2_length": 24, + "average_sentence2_length": 108.31640625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76256 + }, + "eng_Latn-hui_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 108.33203125, + "max_sentence1_length": 242, + "min_sentence2_length": 46, + "average_sentence2_length": 213.171875, + "max_sentence2_length": 597, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82305 + }, + "hui_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 213.171875, + "max_sentence1_length": 597, + "min_sentence2_length": 23, + "average_sentence2_length": 108.33203125, + "max_sentence2_length": 242, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82305 + }, + "eng_Latn-hun_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4453125, + "max_sentence1_length": 827, + "min_sentence2_length": 24, + "average_sentence2_length": 109.4375, + "max_sentence2_length": 305, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57058 + }, + "hun_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.4375, + "max_sentence1_length": 305, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4453125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57058 + }, + "eng_Latn-hus_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 60, + "average_sentence2_length": 214.11328125, + "max_sentence2_length": 558, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92588 + }, + "hus_Latn-eng_Latn": { + "min_sentence1_length": 60, + "average_sentence1_length": 214.11328125, + "max_sentence1_length": 558, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92588 + }, + "eng_Latn-huu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.05859375, + "max_sentence1_length": 228, + "min_sentence2_length": 27, + "average_sentence2_length": 193.46875, + "max_sentence2_length": 812, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77703 + }, + "huu_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 193.46875, + "max_sentence1_length": 812, + "min_sentence2_length": 24, + "average_sentence2_length": 110.05859375, + "max_sentence2_length": 228, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77703 + }, + "eng_Latn-huv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 40, + "average_sentence2_length": 173.2578125, + "max_sentence2_length": 400, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73889 + }, + "huv_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 173.2578125, + "max_sentence1_length": 400, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73889 + }, + "eng_Latn-hvn_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 113.0952380952381, + "max_sentence1_length": 257, + "min_sentence2_length": 61, + "average_sentence2_length": 212.65079365079364, + "max_sentence2_length": 763, + "num_samples": 126, + "num_samples_sentence2": 126, + "number_of_characters": 41044 + }, + "hvn_Latn-eng_Latn": { + "min_sentence1_length": 61, + "average_sentence1_length": 212.65079365079364, + "max_sentence1_length": 763, + "min_sentence2_length": 45, + "average_sentence2_length": 113.0952380952381, + "max_sentence2_length": 257, + "num_samples": 126, + "num_samples_sentence2": 126, + "number_of_characters": 41044 + }, + "eng_Latn-ian_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.0234375, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 230.6953125, + "max_sentence2_length": 718, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87736 + }, + "ian_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 230.6953125, + "max_sentence1_length": 718, + "min_sentence2_length": 24, + "average_sentence2_length": 112.0234375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87736 + }, + "eng_Latn-ign_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.6796875, + "max_sentence1_length": 271, + "min_sentence2_length": 58, + "average_sentence2_length": 362.24609375, + "max_sentence2_length": 1229, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 122093 + }, + "ign_Latn-eng_Latn": { + "min_sentence1_length": 58, + "average_sentence1_length": 362.24609375, + "max_sentence1_length": 1229, + "min_sentence2_length": 24, + "average_sentence2_length": 114.6796875, + "max_sentence2_length": 271, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 122093 + }, + "eng_Latn-ikk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 34, + "average_sentence2_length": 130.39453125, + "max_sentence2_length": 338, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62154 + }, + "ikk_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 130.39453125, + "max_sentence1_length": 338, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62154 + }, + "eng_Latn-ikw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 28, + "average_sentence2_length": 117.47265625, + "max_sentence2_length": 288, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58846 + }, + "ikw_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 117.47265625, + "max_sentence1_length": 288, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58846 + }, + "eng_Latn-ilo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 25, + "average_sentence2_length": 140.42578125, + "max_sentence2_length": 314, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64722 + }, + "ilo_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 140.42578125, + "max_sentence1_length": 314, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64722 + }, + "eng_Latn-imo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.90234375, + "max_sentence1_length": 227, + "min_sentence2_length": 61, + "average_sentence2_length": 304.0078125, + "max_sentence2_length": 1169, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106217 + }, + "imo_Latn-eng_Latn": { + "min_sentence1_length": 61, + "average_sentence1_length": 304.0078125, + "max_sentence1_length": 1169, + "min_sentence2_length": 24, + "average_sentence2_length": 110.90234375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106217 + }, + "eng_Latn-inb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.0390625, + "max_sentence1_length": 827, + "min_sentence2_length": 51, + "average_sentence2_length": 195.54296875, + "max_sentence2_length": 520, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79253 + }, + "inb_Latn-eng_Latn": { + "min_sentence1_length": 51, + "average_sentence1_length": 195.54296875, + "max_sentence1_length": 520, + "min_sentence2_length": 24, + "average_sentence2_length": 114.0390625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79253 + }, + "eng_Latn-ind_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 146.52734375, + "max_sentence1_length": 341, + "min_sentence2_length": 41, + "average_sentence2_length": 181.54296875, + "max_sentence2_length": 504, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83986 + }, + "ind_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 181.54296875, + "max_sentence1_length": 504, + "min_sentence2_length": 35, + "average_sentence2_length": 146.52734375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83986 + }, + "eng_Latn-ino_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.29296875, + "max_sentence1_length": 230, + "min_sentence2_length": 54, + "average_sentence2_length": 299.77734375, + "max_sentence2_length": 940, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104722 + }, + "ino_Latn-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 299.77734375, + "max_sentence1_length": 940, + "min_sentence2_length": 24, + "average_sentence2_length": 109.29296875, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104722 + }, + "eng_Latn-iou_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.2890625, + "max_sentence1_length": 239, + "min_sentence2_length": 45, + "average_sentence2_length": 218.77734375, + "max_sentence2_length": 810, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84753 + }, + "iou_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 218.77734375, + "max_sentence1_length": 810, + "min_sentence2_length": 24, + "average_sentence2_length": 112.2890625, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84753 + }, + "eng_Latn-ipi_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 116.83203125, + "max_sentence1_length": 273, + "min_sentence2_length": 113, + "average_sentence2_length": 594.37109375, + "max_sentence2_length": 1750, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 182068 + }, + "ipi_Latn-eng_Latn": { + "min_sentence1_length": 113, + "average_sentence1_length": 594.37109375, + "max_sentence1_length": 1750, + "min_sentence2_length": 24, + "average_sentence2_length": 116.83203125, + "max_sentence2_length": 273, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 182068 + }, + "eng_Latn-isn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 27, + "average_sentence2_length": 121.984375, + "max_sentence2_length": 325, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60001 + }, + "isn_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 121.984375, + "max_sentence1_length": 325, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60001 + }, + "eng_Latn-ita_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 146.515625, + "max_sentence1_length": 341, + "min_sentence2_length": 40, + "average_sentence2_length": 146.9140625, + "max_sentence2_length": 381, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75118 + }, + "ita_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 146.9140625, + "max_sentence1_length": 381, + "min_sentence2_length": 56, + "average_sentence2_length": 146.515625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75118 + }, + "eng_Latn-iws_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 30, + "average_sentence2_length": 296.91015625, + "max_sentence2_length": 854, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104782 + }, + "iws_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 296.91015625, + "max_sentence1_length": 854, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104782 + }, + "eng_Latn-ixl_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 146.31640625, + "max_sentence1_length": 341, + "min_sentence2_length": 47, + "average_sentence2_length": 244.671875, + "max_sentence2_length": 565, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100093 + }, + "ixl_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 244.671875, + "max_sentence1_length": 565, + "min_sentence2_length": 35, + "average_sentence2_length": 146.31640625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100093 + }, + "eng_Latn-jac_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 116.2421875, + "max_sentence1_length": 827, + "min_sentence2_length": 34, + "average_sentence2_length": 182.84765625, + "max_sentence2_length": 573, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76567 + }, + "jac_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 182.84765625, + "max_sentence1_length": 573, + "min_sentence2_length": 31, + "average_sentence2_length": 116.2421875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76567 + }, + "eng_Latn-jae_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.0625, + "max_sentence1_length": 827, + "min_sentence2_length": 24, + "average_sentence2_length": 127.1328125, + "max_sentence2_length": 294, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61746 + }, + "jae_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 127.1328125, + "max_sentence1_length": 294, + "min_sentence2_length": 24, + "average_sentence2_length": 114.0625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61746 + }, + "eng_Latn-jao_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 116.4724409448819, + "max_sentence1_length": 248, + "min_sentence2_length": 59, + "average_sentence2_length": 414.5511811023622, + "max_sentence2_length": 1292, + "num_samples": 127, + "num_samples_sentence2": 127, + "number_of_characters": 67440 + }, + "jao_Latn-eng_Latn": { + "min_sentence1_length": 59, + "average_sentence1_length": 414.5511811023622, + "max_sentence1_length": 1292, + "min_sentence2_length": 34, + "average_sentence2_length": 116.4724409448819, + "max_sentence2_length": 248, + "num_samples": 127, + "num_samples_sentence2": 127, + "number_of_characters": 67440 + }, + "eng_Latn-jic_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.5078125, + "max_sentence1_length": 827, + "min_sentence2_length": 40, + "average_sentence2_length": 236.2578125, + "max_sentence2_length": 691, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89796 + }, + "jic_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 236.2578125, + "max_sentence1_length": 691, + "min_sentence2_length": 24, + "average_sentence2_length": 114.5078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89796 + }, + "eng_Latn-jid_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.1484375, + "max_sentence1_length": 827, + "min_sentence2_length": 16, + "average_sentence2_length": 96.96484375, + "max_sentence2_length": 269, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 54301 + }, + "jid_Latn-eng_Latn": { + "min_sentence1_length": 16, + "average_sentence1_length": 96.96484375, + "max_sentence1_length": 269, + "min_sentence2_length": 24, + "average_sentence2_length": 115.1484375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 54301 + }, + "eng_Latn-jiv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.49609375, + "max_sentence1_length": 230, + "min_sentence2_length": 38, + "average_sentence2_length": 175.2890625, + "max_sentence2_length": 584, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73161 + }, + "jiv_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 175.2890625, + "max_sentence1_length": 584, + "min_sentence2_length": 24, + "average_sentence2_length": 110.49609375, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73161 + }, + "eng_Latn-jni_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.625, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 111.0703125, + "max_sentence2_length": 255, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57522 + }, + "jni_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 111.0703125, + "max_sentence1_length": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 113.625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57522 + }, + "eng_Latn-jpn_Jpan": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.05859375, + "max_sentence1_length": 227, + "min_sentence2_length": 13, + "average_sentence2_length": 57.05078125, + "max_sentence2_length": 116, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 43036 + }, + "jpn_Jpan-eng_Latn": { + "min_sentence1_length": 13, + "average_sentence1_length": 57.05078125, + "max_sentence1_length": 116, + "min_sentence2_length": 24, + "average_sentence2_length": 111.05859375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 43036 + }, + "eng_Latn-jvn_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 112.91015625, + "max_sentence1_length": 227, + "min_sentence2_length": 39, + "average_sentence2_length": 230.34765625, + "max_sentence2_length": 904, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87874 + }, + "jvn_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 230.34765625, + "max_sentence1_length": 904, + "min_sentence2_length": 31, + "average_sentence2_length": 112.91015625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87874 + }, + "eng_Latn-kan_Knda": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.9609375, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 126.96484375, + "max_sentence2_length": 407, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61165 + }, + "kan_Knda-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 126.96484375, + "max_sentence1_length": 407, + "min_sentence2_length": 24, + "average_sentence2_length": 111.9609375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61165 + }, + "eng_Latn-kaq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.203125, + "max_sentence1_length": 227, + "min_sentence2_length": 32, + "average_sentence2_length": 171.16015625, + "max_sentence2_length": 551, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72285 + }, + "kaq_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 171.16015625, + "max_sentence1_length": 551, + "min_sentence2_length": 24, + "average_sentence2_length": 111.203125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72285 + }, + "eng_Latn-kbc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.796875, + "max_sentence1_length": 827, + "min_sentence2_length": 51, + "average_sentence2_length": 264.6015625, + "max_sentence2_length": 948, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97126 + }, + "kbc_Latn-eng_Latn": { + "min_sentence1_length": 51, + "average_sentence1_length": 264.6015625, + "max_sentence1_length": 948, + "min_sentence2_length": 24, + "average_sentence2_length": 114.796875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97126 + }, + "eng_Latn-kbh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.74609375, + "max_sentence1_length": 227, + "min_sentence2_length": 27, + "average_sentence2_length": 253.4609375, + "max_sentence2_length": 903, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93237 + }, + "kbh_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 253.4609375, + "max_sentence1_length": 903, + "min_sentence2_length": 24, + "average_sentence2_length": 110.74609375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93237 + }, + "eng_Latn-kbm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.171875, + "max_sentence1_length": 227, + "min_sentence2_length": 38, + "average_sentence2_length": 209.3125, + "max_sentence2_length": 748, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81788 + }, + "kbm_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 209.3125, + "max_sentence1_length": 748, + "min_sentence2_length": 24, + "average_sentence2_length": 110.171875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81788 + }, + "eng_Latn-kbq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.42578125, + "max_sentence1_length": 827, + "min_sentence2_length": 44, + "average_sentence2_length": 151.98046875, + "max_sentence2_length": 381, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68456 + }, + "kbq_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 151.98046875, + "max_sentence1_length": 381, + "min_sentence2_length": 24, + "average_sentence2_length": 115.42578125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68456 + }, + "eng_Latn-kdc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 29, + "average_sentence2_length": 127.23828125, + "max_sentence2_length": 285, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61346 + }, + "kdc_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 127.23828125, + "max_sentence1_length": 285, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61346 + }, + "eng_Latn-kde_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.59375, + "max_sentence1_length": 376, + "min_sentence2_length": 27, + "average_sentence2_length": 155.28125, + "max_sentence2_length": 424, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68320 + }, + "kde_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 155.28125, + "max_sentence1_length": 424, + "min_sentence2_length": 24, + "average_sentence2_length": 111.59375, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68320 + }, + "eng_Latn-kdl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.4921875, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 181.47265625, + "max_sentence2_length": 664, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74999 + }, + "kdl_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 181.47265625, + "max_sentence1_length": 664, + "min_sentence2_length": 24, + "average_sentence2_length": 111.4921875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74999 + }, + "eng_Latn-kek_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.296875, + "max_sentence1_length": 827, + "min_sentence2_length": 40, + "average_sentence2_length": 182.1640625, + "max_sentence2_length": 517, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76150 + }, + "kek_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 182.1640625, + "max_sentence1_length": 517, + "min_sentence2_length": 24, + "average_sentence2_length": 115.296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76150 + }, + "eng_Latn-ken_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.41796875, + "max_sentence1_length": 227, + "min_sentence2_length": 29, + "average_sentence2_length": 152.58203125, + "max_sentence2_length": 419, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67584 + }, + "ken_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 152.58203125, + "max_sentence1_length": 419, + "min_sentence2_length": 24, + "average_sentence2_length": 111.41796875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67584 + }, + "eng_Latn-kew_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.6328125, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 182.625, + "max_sentence2_length": 531, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74818 + }, + "kew_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 182.625, + "max_sentence1_length": 531, + "min_sentence2_length": 24, + "average_sentence2_length": 109.6328125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74818 + }, + "eng_Latn-kgf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.9609375, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 150.54296875, + "max_sentence2_length": 389, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66689 + }, + "kgf_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 150.54296875, + "max_sentence1_length": 389, + "min_sentence2_length": 24, + "average_sentence2_length": 109.9609375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66689 + }, + "eng_Latn-kgk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.0625, + "max_sentence1_length": 271, + "min_sentence2_length": 46, + "average_sentence2_length": 345.99609375, + "max_sentence2_length": 1240, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 117007 + }, + "kgk_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 345.99609375, + "max_sentence1_length": 1240, + "min_sentence2_length": 24, + "average_sentence2_length": 111.0625, + "max_sentence2_length": 271, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 117007 + }, + "eng_Latn-kgp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.8984375, + "max_sentence1_length": 228, + "min_sentence2_length": 17, + "average_sentence2_length": 180.0546875, + "max_sentence2_length": 529, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74740 + }, + "kgp_Latn-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 180.0546875, + "max_sentence1_length": 529, + "min_sentence2_length": 24, + "average_sentence2_length": 111.8984375, + "max_sentence2_length": 228, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74740 + }, + "eng_Latn-khs_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.5859375, + "max_sentence1_length": 227, + "min_sentence2_length": 52, + "average_sentence2_length": 277.28125, + "max_sentence2_length": 953, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99038 + }, + "khs_Latn-eng_Latn": { + "min_sentence1_length": 52, + "average_sentence1_length": 277.28125, + "max_sentence1_length": 953, + "min_sentence2_length": 24, + "average_sentence2_length": 109.5859375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99038 + }, + "eng_Latn-khz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.4921875, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 166.2734375, + "max_sentence2_length": 750, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71876 + }, + "khz_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 166.2734375, + "max_sentence1_length": 750, + "min_sentence2_length": 24, + "average_sentence2_length": 114.4921875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71876 + }, + "eng_Latn-kik_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.34765625, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 132.87890625, + "max_sentence2_length": 353, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62010 + }, + "kik_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 132.87890625, + "max_sentence1_length": 353, + "min_sentence2_length": 24, + "average_sentence2_length": 109.34765625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62010 + }, + "eng_Latn-kiw_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 105.37349397590361, + "max_sentence1_length": 245, + "min_sentence2_length": 38, + "average_sentence2_length": 133.75903614457832, + "max_sentence2_length": 262, + "num_samples": 83, + "num_samples_sentence2": 83, + "number_of_characters": 19848 + }, + "kiw_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 133.75903614457832, + "max_sentence1_length": 262, + "min_sentence2_length": 45, + "average_sentence2_length": 105.37349397590361, + "max_sentence2_length": 245, + "num_samples": 83, + "num_samples_sentence2": 83, + "number_of_characters": 19848 + }, + "eng_Latn-kiz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.8828125, + "max_sentence1_length": 827, + "min_sentence2_length": 16, + "average_sentence2_length": 131.95703125, + "max_sentence2_length": 386, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62935 + }, + "kiz_Latn-eng_Latn": { + "min_sentence1_length": 16, + "average_sentence1_length": 131.95703125, + "max_sentence1_length": 386, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8828125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62935 + }, + "eng_Latn-kje_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.15234375, + "max_sentence1_length": 227, + "min_sentence2_length": 30, + "average_sentence2_length": 224.1015625, + "max_sentence2_length": 857, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85569 + }, + "kje_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 224.1015625, + "max_sentence1_length": 857, + "min_sentence2_length": 24, + "average_sentence2_length": 110.15234375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85569 + }, + "eng_Latn-kjs_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.6328125, + "max_sentence1_length": 227, + "min_sentence2_length": 38, + "average_sentence2_length": 187.8359375, + "max_sentence2_length": 530, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76152 + }, + "kjs_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 187.8359375, + "max_sentence1_length": 530, + "min_sentence2_length": 24, + "average_sentence2_length": 109.6328125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76152 + }, + "eng_Latn-kkc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.05859375, + "max_sentence1_length": 243, + "min_sentence2_length": 33, + "average_sentence2_length": 231.08984375, + "max_sentence2_length": 836, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88358 + }, + "kkc_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 231.08984375, + "max_sentence1_length": 836, + "min_sentence2_length": 24, + "average_sentence2_length": 114.05859375, + "max_sentence2_length": 243, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88358 + }, + "eng_Latn-kkl_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 111.1640625, + "max_sentence1_length": 243, + "min_sentence2_length": 44, + "average_sentence2_length": 365.1328125, + "max_sentence2_length": 1372, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 121932 + }, + "kkl_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 365.1328125, + "max_sentence1_length": 1372, + "min_sentence2_length": 25, + "average_sentence2_length": 111.1640625, + "max_sentence2_length": 243, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 121932 + }, + "eng_Latn-klt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.5546875, + "max_sentence1_length": 243, + "min_sentence2_length": 43, + "average_sentence2_length": 203.6015625, + "max_sentence2_length": 765, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81192 + }, + "klt_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 203.6015625, + "max_sentence1_length": 765, + "min_sentence2_length": 24, + "average_sentence2_length": 113.5546875, + "max_sentence2_length": 243, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81192 + }, + "eng_Latn-klv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.8046875, + "max_sentence1_length": 227, + "min_sentence2_length": 31, + "average_sentence2_length": 168.53125, + "max_sentence2_length": 465, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71510 + }, + "klv_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 168.53125, + "max_sentence1_length": 465, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71510 + }, + "eng_Latn-kmg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 28, + "average_sentence2_length": 119.46484375, + "max_sentence2_length": 253, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59356 + }, + "kmg_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 119.46484375, + "max_sentence1_length": 253, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59356 + }, + "eng_Latn-kmh_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.40625, + "max_sentence1_length": 341, + "min_sentence2_length": 37, + "average_sentence2_length": 198.69140625, + "max_sentence2_length": 544, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88089 + }, + "kmh_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 198.69140625, + "max_sentence1_length": 544, + "min_sentence2_length": 1, + "average_sentence2_length": 145.40625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88089 + }, + "eng_Latn-kmk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.44921875, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 166.76171875, + "max_sentence2_length": 763, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72246 + }, + "kmk_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 166.76171875, + "max_sentence1_length": 763, + "min_sentence2_length": 24, + "average_sentence2_length": 115.44921875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72246 + }, + "eng_Latn-kmo_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 112.0859375, + "max_sentence1_length": 248, + "min_sentence2_length": 45, + "average_sentence2_length": 198.87109375, + "max_sentence2_length": 627, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79605 + }, + "kmo_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 198.87109375, + "max_sentence1_length": 627, + "min_sentence2_length": 31, + "average_sentence2_length": 112.0859375, + "max_sentence2_length": 248, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79605 + }, + "eng_Latn-kms_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.30078125, + "max_sentence1_length": 827, + "min_sentence2_length": 37, + "average_sentence2_length": 203.09375, + "max_sentence2_length": 631, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81253 + }, + "kms_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 203.09375, + "max_sentence1_length": 631, + "min_sentence2_length": 24, + "average_sentence2_length": 114.30078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81253 + }, + "eng_Latn-kmu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.50390625, + "max_sentence1_length": 227, + "min_sentence2_length": 53, + "average_sentence2_length": 256.25, + "max_sentence2_length": 873, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93633 + }, + "kmu_Latn-eng_Latn": { + "min_sentence1_length": 53, + "average_sentence1_length": 256.25, + "max_sentence1_length": 873, + "min_sentence2_length": 24, + "average_sentence2_length": 109.50390625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93633 + }, + "eng_Latn-kne_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.421875, + "max_sentence1_length": 239, + "min_sentence2_length": 27, + "average_sentence2_length": 187.78125, + "max_sentence2_length": 678, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76340 + }, + "kne_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 187.78125, + "max_sentence1_length": 678, + "min_sentence2_length": 24, + "average_sentence2_length": 110.421875, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76340 + }, + "eng_Latn-knf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.7578125, + "max_sentence1_length": 227, + "min_sentence2_length": 17, + "average_sentence2_length": 122.140625, + "max_sentence2_length": 328, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59366 + }, + "knf_Latn-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 122.140625, + "max_sentence1_length": 328, + "min_sentence2_length": 24, + "average_sentence2_length": 109.7578125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59366 + }, + "eng_Latn-knj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.7578125, + "max_sentence1_length": 227, + "min_sentence2_length": 29, + "average_sentence2_length": 193.25, + "max_sentence2_length": 528, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77826 + }, + "knj_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 193.25, + "max_sentence1_length": 528, + "min_sentence2_length": 24, + "average_sentence2_length": 110.7578125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77826 + }, + "eng_Latn-knv_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.7265625, + "max_sentence1_length": 341, + "min_sentence2_length": 75, + "average_sentence2_length": 310.953125, + "max_sentence2_length": 789, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 117422 + }, + "knv_Latn-eng_Latn": { + "min_sentence1_length": 75, + "average_sentence1_length": 310.953125, + "max_sentence1_length": 789, + "min_sentence2_length": 56, + "average_sentence2_length": 147.7265625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 117422 + }, + "eng_Latn-kos_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 30, + "average_sentence2_length": 129.9453125, + "max_sentence2_length": 322, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62039 + }, + "kos_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 129.9453125, + "max_sentence1_length": 322, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62039 + }, + "eng_Latn-kpf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.43359375, + "max_sentence1_length": 227, + "min_sentence2_length": 27, + "average_sentence2_length": 143.1171875, + "max_sentence2_length": 520, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64909 + }, + "kpf_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 143.1171875, + "max_sentence1_length": 520, + "min_sentence2_length": 24, + "average_sentence2_length": 110.43359375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64909 + }, + "eng_Latn-kpg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.41015625, + "max_sentence1_length": 227, + "min_sentence2_length": 34, + "average_sentence2_length": 165.4765625, + "max_sentence2_length": 502, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70883 + }, + "kpg_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 165.4765625, + "max_sentence1_length": 502, + "min_sentence2_length": 24, + "average_sentence2_length": 111.41015625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70883 + }, + "eng_Latn-kpj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.46875, + "max_sentence1_length": 249, + "min_sentence2_length": 31, + "average_sentence2_length": 163.3203125, + "max_sentence2_length": 583, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70346 + }, + "kpj_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 163.3203125, + "max_sentence1_length": 583, + "min_sentence2_length": 24, + "average_sentence2_length": 111.46875, + "max_sentence2_length": 249, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70346 + }, + "eng_Latn-kpr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.37109375, + "max_sentence1_length": 227, + "min_sentence2_length": 29, + "average_sentence2_length": 281.9296875, + "max_sentence2_length": 1215, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100685 + }, + "kpr_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 281.9296875, + "max_sentence1_length": 1215, + "min_sentence2_length": 24, + "average_sentence2_length": 111.37109375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100685 + }, + "eng_Latn-kpw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.72265625, + "max_sentence1_length": 227, + "min_sentence2_length": 32, + "average_sentence2_length": 204.51953125, + "max_sentence2_length": 617, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80446 + }, + "kpw_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 204.51953125, + "max_sentence1_length": 617, + "min_sentence2_length": 24, + "average_sentence2_length": 109.72265625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80446 + }, + "eng_Latn-kpx_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.4765625, + "max_sentence1_length": 216, + "min_sentence2_length": 36, + "average_sentence2_length": 183.6015625, + "max_sentence2_length": 549, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75284 + }, + "kpx_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 183.6015625, + "max_sentence1_length": 549, + "min_sentence2_length": 24, + "average_sentence2_length": 110.4765625, + "max_sentence2_length": 216, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75284 + }, + "eng_Latn-kqa_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 105.74603174603175, + "max_sentence1_length": 257, + "min_sentence2_length": 75, + "average_sentence2_length": 229.20634920634922, + "max_sentence2_length": 718, + "num_samples": 63, + "num_samples_sentence2": 63, + "number_of_characters": 21102 + }, + "kqa_Latn-eng_Latn": { + "min_sentence1_length": 75, + "average_sentence1_length": 229.20634920634922, + "max_sentence1_length": 718, + "min_sentence2_length": 42, + "average_sentence2_length": 105.74603174603175, + "max_sentence2_length": 257, + "num_samples": 63, + "num_samples_sentence2": 63, + "number_of_characters": 21102 + }, + "eng_Latn-kqc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.890625, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 137.24609375, + "max_sentence2_length": 357, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64803 + }, + "kqc_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 137.24609375, + "max_sentence1_length": 357, + "min_sentence2_length": 24, + "average_sentence2_length": 115.890625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64803 + }, + "eng_Latn-kqf_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 114.31640625, + "max_sentence1_length": 257, + "min_sentence2_length": 40, + "average_sentence2_length": 148.671875, + "max_sentence2_length": 670, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67325 + }, + "kqf_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 148.671875, + "max_sentence1_length": 670, + "min_sentence2_length": 38, + "average_sentence2_length": 114.31640625, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67325 + }, + "eng_Latn-kql_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 112.87857142857143, + "max_sentence1_length": 257, + "min_sentence2_length": 34, + "average_sentence2_length": 160.8357142857143, + "max_sentence2_length": 501, + "num_samples": 140, + "num_samples_sentence2": 140, + "number_of_characters": 38320 + }, + "kql_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 160.8357142857143, + "max_sentence1_length": 501, + "min_sentence2_length": 45, + "average_sentence2_length": 112.87857142857143, + "max_sentence2_length": 257, + "num_samples": 140, + "num_samples_sentence2": 140, + "number_of_characters": 38320 + }, + "eng_Latn-kqw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.9453125, + "max_sentence1_length": 227, + "min_sentence2_length": 34, + "average_sentence2_length": 166.953125, + "max_sentence2_length": 437, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70886 + }, + "kqw_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 166.953125, + "max_sentence1_length": 437, + "min_sentence2_length": 24, + "average_sentence2_length": 109.9453125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70886 + }, + "eng_Latn-ksd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 138.44140625, + "max_sentence2_length": 387, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64976 + }, + "ksd_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 138.44140625, + "max_sentence1_length": 387, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64976 + }, + "eng_Latn-ksj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.5078125, + "max_sentence1_length": 245, + "min_sentence2_length": 43, + "average_sentence2_length": 153.34765625, + "max_sentence2_length": 471, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68059 + }, + "ksj_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 153.34765625, + "max_sentence1_length": 471, + "min_sentence2_length": 24, + "average_sentence2_length": 112.5078125, + "max_sentence2_length": 245, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68059 + }, + "eng_Latn-ksr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.6640625, + "max_sentence1_length": 376, + "min_sentence2_length": 39, + "average_sentence2_length": 223.3125, + "max_sentence2_length": 652, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85498 + }, + "ksr_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 223.3125, + "max_sentence1_length": 652, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6640625, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85498 + }, + "eng_Latn-ktm_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 113.71484375, + "max_sentence1_length": 257, + "min_sentence2_length": 36, + "average_sentence2_length": 160.20703125, + "max_sentence2_length": 676, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70124 + }, + "ktm_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 160.20703125, + "max_sentence1_length": 676, + "min_sentence2_length": 23, + "average_sentence2_length": 113.71484375, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70124 + }, + "eng_Latn-kto_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.33984375, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 250.73828125, + "max_sentence2_length": 1025, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92436 + }, + "kto_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 250.73828125, + "max_sentence1_length": 1025, + "min_sentence2_length": 24, + "average_sentence2_length": 110.33984375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92436 + }, + "eng_Latn-kud_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 116.34765625, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 164.359375, + "max_sentence2_length": 449, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71861 + }, + "kud_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 164.359375, + "max_sentence1_length": 449, + "min_sentence2_length": 24, + "average_sentence2_length": 116.34765625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71861 + }, + "eng_Latn-kue_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 52, + "average_sentence2_length": 171.66796875, + "max_sentence2_length": 416, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72720 + }, + "kue_Latn-eng_Latn": { + "min_sentence1_length": 52, + "average_sentence1_length": 171.66796875, + "max_sentence1_length": 416, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72720 + }, + "eng_Latn-kup_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.52734375, + "max_sentence1_length": 232, + "min_sentence2_length": 72, + "average_sentence2_length": 294.80078125, + "max_sentence2_length": 801, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103764 + }, + "kup_Latn-eng_Latn": { + "min_sentence1_length": 72, + "average_sentence1_length": 294.80078125, + "max_sentence1_length": 801, + "min_sentence2_length": 24, + "average_sentence2_length": 110.52734375, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103764 + }, + "eng_Latn-kvg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.64453125, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 179.86328125, + "max_sentence2_length": 422, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74370 + }, + "kvg_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 179.86328125, + "max_sentence1_length": 422, + "min_sentence2_length": 24, + "average_sentence2_length": 110.64453125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74370 + }, + "eng_Latn-kvn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.171875, + "max_sentence1_length": 827, + "min_sentence2_length": 51, + "average_sentence2_length": 182.14453125, + "max_sentence2_length": 451, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75857 + }, + "kvn_Latn-eng_Latn": { + "min_sentence1_length": 51, + "average_sentence1_length": 182.14453125, + "max_sentence1_length": 451, + "min_sentence2_length": 24, + "average_sentence2_length": 114.171875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75857 + }, + "eng_Latn-kwd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.453125, + "max_sentence1_length": 827, + "min_sentence2_length": 48, + "average_sentence2_length": 221.33984375, + "max_sentence2_length": 826, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85451 + }, + "kwd_Latn-eng_Latn": { + "min_sentence1_length": 48, + "average_sentence1_length": 221.33984375, + "max_sentence1_length": 826, + "min_sentence2_length": 24, + "average_sentence2_length": 112.453125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85451 + }, + "eng_Latn-kwf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.73046875, + "max_sentence1_length": 827, + "min_sentence2_length": 46, + "average_sentence2_length": 214.3515625, + "max_sentence2_length": 719, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83989 + }, + "kwf_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 214.3515625, + "max_sentence1_length": 719, + "min_sentence2_length": 24, + "average_sentence2_length": 113.73046875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83989 + }, + "eng_Latn-kwi_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.2109375, + "max_sentence1_length": 827, + "min_sentence2_length": 33, + "average_sentence2_length": 178.421875, + "max_sentence2_length": 526, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74914 + }, + "kwi_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 178.421875, + "max_sentence1_length": 526, + "min_sentence2_length": 31, + "average_sentence2_length": 114.2109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74914 + }, + "eng_Latn-kwj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.15234375, + "max_sentence1_length": 227, + "min_sentence2_length": 49, + "average_sentence2_length": 220.26953125, + "max_sentence2_length": 697, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84588 + }, + "kwj_Latn-eng_Latn": { + "min_sentence1_length": 49, + "average_sentence1_length": 220.26953125, + "max_sentence1_length": 697, + "min_sentence2_length": 24, + "average_sentence2_length": 110.15234375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84588 + }, + "eng_Latn-kyc_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 114.67578125, + "max_sentence1_length": 827, + "min_sentence2_length": 37, + "average_sentence2_length": 182.01953125, + "max_sentence2_length": 766, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75954 + }, + "kyc_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 182.01953125, + "max_sentence1_length": 766, + "min_sentence2_length": 21, + "average_sentence2_length": 114.67578125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75954 + }, + "eng_Latn-kyf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4921875, + "max_sentence1_length": 827, + "min_sentence2_length": 23, + "average_sentence2_length": 143.33203125, + "max_sentence2_length": 476, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65747 + }, + "kyf_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 143.33203125, + "max_sentence1_length": 476, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4921875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65747 + }, + "eng_Latn-kyg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.09375, + "max_sentence1_length": 227, + "min_sentence2_length": 56, + "average_sentence2_length": 234.01953125, + "max_sentence2_length": 726, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88349 + }, + "kyg_Latn-eng_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 234.01953125, + "max_sentence1_length": 726, + "min_sentence2_length": 24, + "average_sentence2_length": 111.09375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88349 + }, + "eng_Latn-kyq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.11328125, + "max_sentence1_length": 227, + "min_sentence2_length": 40, + "average_sentence2_length": 166.59375, + "max_sentence2_length": 586, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70837 + }, + "kyq_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 166.59375, + "max_sentence1_length": 586, + "min_sentence2_length": 24, + "average_sentence2_length": 110.11328125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70837 + }, + "eng_Latn-kyz_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 113.44921875, + "max_sentence1_length": 246, + "min_sentence2_length": 50, + "average_sentence2_length": 406.671875, + "max_sentence2_length": 1885, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 133151 + }, + "kyz_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 406.671875, + "max_sentence1_length": 1885, + "min_sentence2_length": 31, + "average_sentence2_length": 113.44921875, + "max_sentence2_length": 246, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 133151 + }, + "eng_Latn-kze_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.52734375, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 210.296875, + "max_sentence2_length": 632, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81619 + }, + "kze_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 210.296875, + "max_sentence1_length": 632, + "min_sentence2_length": 24, + "average_sentence2_length": 108.52734375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81619 + }, + "eng_Latn-lac_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 112.69921875, + "max_sentence1_length": 827, + "min_sentence2_length": 24, + "average_sentence2_length": 251.0625, + "max_sentence2_length": 821, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93123 + }, + "lac_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 251.0625, + "max_sentence1_length": 821, + "min_sentence2_length": 31, + "average_sentence2_length": 112.69921875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93123 + }, + "eng_Latn-lat_Latn": { + "min_sentence1_length": 20, + "average_sentence1_length": 122.43359375, + "max_sentence1_length": 422, + "min_sentence2_length": 20, + "average_sentence2_length": 110.3046875, + "max_sentence2_length": 365, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59581 + }, + "lat_Latn-eng_Latn": { + "min_sentence1_length": 20, + "average_sentence1_length": 110.3046875, + "max_sentence1_length": 365, + "min_sentence2_length": 20, + "average_sentence2_length": 122.43359375, + "max_sentence2_length": 422, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59581 + }, + "eng_Latn-lbb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.8828125, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 168.0859375, + "max_sentence2_length": 456, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71416 + }, + "lbb_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 168.0859375, + "max_sentence1_length": 456, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8828125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71416 + }, + "eng_Latn-lbk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.3125, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 171.08984375, + "max_sentence2_length": 529, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72807 + }, + "lbk_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 171.08984375, + "max_sentence1_length": 529, + "min_sentence2_length": 24, + "average_sentence2_length": 113.3125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72807 + }, + "eng_Latn-lcm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 37, + "average_sentence2_length": 154.9921875, + "max_sentence2_length": 347, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68451 + }, + "lcm_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 154.9921875, + "max_sentence1_length": 347, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68451 + }, + "eng_Latn-leu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.5546875, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 189.1875, + "max_sentence2_length": 676, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78014 + }, + "leu_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 189.1875, + "max_sentence1_length": 676, + "min_sentence2_length": 24, + "average_sentence2_length": 115.5546875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78014 + }, + "eng_Latn-lex_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.51171875, + "max_sentence1_length": 827, + "min_sentence2_length": 44, + "average_sentence2_length": 247.8828125, + "max_sentence2_length": 675, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92773 + }, + "lex_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 247.8828125, + "max_sentence1_length": 675, + "min_sentence2_length": 24, + "average_sentence2_length": 114.51171875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92773 + }, + "eng_Latn-lgl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.73046875, + "max_sentence1_length": 827, + "min_sentence2_length": 37, + "average_sentence2_length": 187.2421875, + "max_sentence2_length": 651, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77049 + }, + "lgl_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 187.2421875, + "max_sentence1_length": 651, + "min_sentence2_length": 24, + "average_sentence2_length": 113.73046875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77049 + }, + "eng_Latn-lid_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.03515625, + "max_sentence1_length": 227, + "min_sentence2_length": 30, + "average_sentence2_length": 241.02734375, + "max_sentence2_length": 922, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90128 + }, + "lid_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 241.02734375, + "max_sentence1_length": 922, + "min_sentence2_length": 24, + "average_sentence2_length": 111.03515625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90128 + }, + "eng_Latn-lif_Deva": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 65, + "average_sentence2_length": 243.16015625, + "max_sentence2_length": 598, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99454 + }, + "lif_Deva-eng_Latn": { + "min_sentence1_length": 65, + "average_sentence1_length": 243.16015625, + "max_sentence1_length": 598, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99454 + }, + "eng_Latn-lin_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 21, + "average_sentence2_length": 138.46484375, + "max_sentence2_length": 325, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64220 + }, + "lin_Latn-eng_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 138.46484375, + "max_sentence1_length": 325, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64220 + }, + "eng_Latn-lit_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.9921875, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 108.26953125, + "max_sentence2_length": 264, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56899 + }, + "lit_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 108.26953125, + "max_sentence1_length": 264, + "min_sentence2_length": 24, + "average_sentence2_length": 113.9921875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56899 + }, + "eng_Latn-llg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.48046875, + "max_sentence1_length": 251, + "min_sentence2_length": 28, + "average_sentence2_length": 273.5078125, + "max_sentence2_length": 1339, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99325 + }, + "llg_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 273.5078125, + "max_sentence1_length": 1339, + "min_sentence2_length": 24, + "average_sentence2_length": 114.48046875, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99325 + }, + "eng_Latn-lug_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 29, + "average_sentence2_length": 118.4453125, + "max_sentence2_length": 288, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59095 + }, + "lug_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 118.4453125, + "max_sentence1_length": 288, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59095 + }, + "eng_Latn-luo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 23, + "average_sentence2_length": 115.45703125, + "max_sentence2_length": 312, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59092 + }, + "luo_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 115.45703125, + "max_sentence1_length": 312, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59092 + }, + "eng_Latn-lww_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.66015625, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 260.0234375, + "max_sentence2_length": 992, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 95919 + }, + "lww_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 260.0234375, + "max_sentence1_length": 992, + "min_sentence2_length": 31, + "average_sentence2_length": 114.66015625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 95919 + }, + "eng_Latn-maa_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.65234375, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 181.85546875, + "max_sentence2_length": 479, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75650 + }, + "maa_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 181.85546875, + "max_sentence1_length": 479, + "min_sentence2_length": 24, + "average_sentence2_length": 113.65234375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75650 + }, + "eng_Latn-maj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 46, + "average_sentence2_length": 161.359375, + "max_sentence2_length": 353, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70081 + }, + "maj_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 161.359375, + "max_sentence1_length": 353, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70081 + }, + "eng_Latn-mal_Mlym": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.140625, + "max_sentence1_length": 341, + "min_sentence2_length": 52, + "average_sentence2_length": 156.7265625, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77790 + }, + "mal_Mlym-eng_Latn": { + "min_sentence1_length": 52, + "average_sentence1_length": 156.7265625, + "max_sentence1_length": 376, + "min_sentence2_length": 56, + "average_sentence2_length": 147.140625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77790 + }, + "eng_Latn-mam_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 145.8828125, + "max_sentence1_length": 341, + "min_sentence2_length": 46, + "average_sentence2_length": 176.44140625, + "max_sentence2_length": 399, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82515 + }, + "mam_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 176.44140625, + "max_sentence1_length": 399, + "min_sentence2_length": 35, + "average_sentence2_length": 145.8828125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82515 + }, + "eng_Latn-maq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.89453125, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 193.59765625, + "max_sentence2_length": 460, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77694 + }, + "maq_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 193.59765625, + "max_sentence1_length": 460, + "min_sentence2_length": 24, + "average_sentence2_length": 109.89453125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77694 + }, + "eng_Latn-mar_Deva": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.921875, + "max_sentence1_length": 227, + "min_sentence2_length": 23, + "average_sentence2_length": 118.359375, + "max_sentence2_length": 295, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58184 + }, + "mar_Deva-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 118.359375, + "max_sentence1_length": 295, + "min_sentence2_length": 24, + "average_sentence2_length": 108.921875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58184 + }, + "eng_Latn-mau_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 169.7890625, + "max_sentence2_length": 442, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73001 + }, + "mau_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 169.7890625, + "max_sentence1_length": 442, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73001 + }, + "eng_Latn-mav_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 45, + "average_sentence2_length": 337.78515625, + "max_sentence2_length": 1352, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 115246 + }, + "mav_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 337.78515625, + "max_sentence1_length": 1352, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 115246 + }, + "eng_Latn-maz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.00390625, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 224.94140625, + "max_sentence2_length": 949, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86514 + }, + "maz_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 224.94140625, + "max_sentence1_length": 949, + "min_sentence2_length": 24, + "average_sentence2_length": 113.00390625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86514 + }, + "eng_Latn-mbb_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 128.66015625, + "max_sentence1_length": 422, + "min_sentence2_length": 26, + "average_sentence2_length": 247.30078125, + "max_sentence2_length": 815, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96246 + }, + "mbb_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 247.30078125, + "max_sentence1_length": 815, + "min_sentence2_length": 25, + "average_sentence2_length": 128.66015625, + "max_sentence2_length": 422, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 96246 + }, + "eng_Latn-mbc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.953125, + "max_sentence1_length": 376, + "min_sentence2_length": 51, + "average_sentence2_length": 235.421875, + "max_sentence2_length": 640, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89184 + }, + "mbc_Latn-eng_Latn": { + "min_sentence1_length": 51, + "average_sentence1_length": 235.421875, + "max_sentence1_length": 640, + "min_sentence2_length": 24, + "average_sentence2_length": 112.953125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89184 + }, + "eng_Latn-mbh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.6328125, + "max_sentence1_length": 376, + "min_sentence2_length": 39, + "average_sentence2_length": 216.8984375, + "max_sentence2_length": 828, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83848 + }, + "mbh_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 216.8984375, + "max_sentence1_length": 828, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6328125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83848 + }, + "eng_Latn-mbj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 116.0703125, + "max_sentence1_length": 827, + "min_sentence2_length": 33, + "average_sentence2_length": 219.76953125, + "max_sentence2_length": 796, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85975 + }, + "mbj_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 219.76953125, + "max_sentence1_length": 796, + "min_sentence2_length": 24, + "average_sentence2_length": 116.0703125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85975 + }, + "eng_Latn-mbl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.20703125, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 169.68359375, + "max_sentence2_length": 515, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72932 + }, + "mbl_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 169.68359375, + "max_sentence1_length": 515, + "min_sentence2_length": 24, + "average_sentence2_length": 115.20703125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72932 + }, + "eng_Latn-mbs_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.09765625, + "max_sentence1_length": 376, + "min_sentence2_length": 34, + "average_sentence2_length": 196.75, + "max_sentence2_length": 742, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79065 + }, + "mbs_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 196.75, + "max_sentence1_length": 742, + "min_sentence2_length": 24, + "average_sentence2_length": 112.09765625, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79065 + }, + "eng_Latn-mbt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.73046875, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 168.234375, + "max_sentence2_length": 461, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71671 + }, + "mbt_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 168.234375, + "max_sentence1_length": 461, + "min_sentence2_length": 24, + "average_sentence2_length": 111.73046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71671 + }, + "eng_Latn-mca_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.296875, + "max_sentence1_length": 827, + "min_sentence2_length": 44, + "average_sentence2_length": 191.4609375, + "max_sentence2_length": 448, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78530 + }, + "mca_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 191.4609375, + "max_sentence1_length": 448, + "min_sentence2_length": 24, + "average_sentence2_length": 115.296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78530 + }, + "eng_Latn-mcb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.671875, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 250.12109375, + "max_sentence2_length": 799, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93643 + }, + "mcb_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 250.12109375, + "max_sentence1_length": 799, + "min_sentence2_length": 24, + "average_sentence2_length": 115.671875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93643 + }, + "eng_Latn-mcd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.93359375, + "max_sentence1_length": 271, + "min_sentence2_length": 30, + "average_sentence2_length": 341.234375, + "max_sentence2_length": 1750, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 117035 + }, + "mcd_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 341.234375, + "max_sentence1_length": 1750, + "min_sentence2_length": 24, + "average_sentence2_length": 115.93359375, + "max_sentence2_length": 271, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 117035 + }, + "eng_Latn-mcf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.546875, + "max_sentence1_length": 827, + "min_sentence2_length": 40, + "average_sentence2_length": 238.4375, + "max_sentence2_length": 842, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89852 + }, + "mcf_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 238.4375, + "max_sentence1_length": 842, + "min_sentence2_length": 24, + "average_sentence2_length": 112.546875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89852 + }, + "eng_Latn-mco_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 112.02734375, + "max_sentence1_length": 227, + "min_sentence2_length": 34, + "average_sentence2_length": 244.5859375, + "max_sentence2_length": 778, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91293 + }, + "mco_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 244.5859375, + "max_sentence1_length": 778, + "min_sentence2_length": 31, + "average_sentence2_length": 112.02734375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91293 + }, + "eng_Latn-mcp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 51, + "average_sentence2_length": 155.43359375, + "max_sentence2_length": 476, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68564 + }, + "mcp_Latn-eng_Latn": { + "min_sentence1_length": 51, + "average_sentence1_length": 155.43359375, + "max_sentence1_length": 476, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68564 + }, + "eng_Latn-mcq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 39, + "average_sentence2_length": 142.0390625, + "max_sentence2_length": 379, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65402 + }, + "mcq_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 142.0390625, + "max_sentence1_length": 379, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65402 + }, + "eng_Latn-mcr_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 110.4453125, + "max_sentence1_length": 225, + "min_sentence2_length": 45, + "average_sentence2_length": 245.9921875, + "max_sentence2_length": 845, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91248 + }, + "mcr_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 245.9921875, + "max_sentence1_length": 845, + "min_sentence2_length": 31, + "average_sentence2_length": 110.4453125, + "max_sentence2_length": 225, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91248 + }, + "eng_Latn-mdy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.859375, + "max_sentence1_length": 827, + "min_sentence2_length": 17, + "average_sentence2_length": 80.9921875, + "max_sentence2_length": 240, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 50138 + }, + "mdy_Latn-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 80.9921875, + "max_sentence1_length": 240, + "min_sentence2_length": 24, + "average_sentence2_length": 114.859375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 50138 + }, + "eng_Latn-med_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 110.2578125, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 231.96875, + "max_sentence2_length": 914, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87610 + }, + "med_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 231.96875, + "max_sentence1_length": 914, + "min_sentence2_length": 25, + "average_sentence2_length": 110.2578125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87610 + }, + "eng_Latn-mee_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.90234375, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 226.9140625, + "max_sentence2_length": 624, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87505 + }, + "mee_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 226.9140625, + "max_sentence1_length": 624, + "min_sentence2_length": 31, + "average_sentence2_length": 114.90234375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87505 + }, + "eng_Latn-mek_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.62890625, + "max_sentence1_length": 227, + "min_sentence2_length": 34, + "average_sentence2_length": 285.19140625, + "max_sentence2_length": 880, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101586 + }, + "mek_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 285.19140625, + "max_sentence1_length": 880, + "min_sentence2_length": 24, + "average_sentence2_length": 111.62890625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101586 + }, + "eng_Latn-meq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.39453125, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 190.38671875, + "max_sentence2_length": 727, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78280 + }, + "meq_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 190.38671875, + "max_sentence1_length": 727, + "min_sentence2_length": 24, + "average_sentence2_length": 115.39453125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78280 + }, + "eng_Latn-met_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.3359375, + "max_sentence1_length": 827, + "min_sentence2_length": 40, + "average_sentence2_length": 165.375, + "max_sentence2_length": 717, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71606 + }, + "met_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 165.375, + "max_sentence1_length": 717, + "min_sentence2_length": 31, + "average_sentence2_length": 114.3359375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71606 + }, + "eng_Latn-meu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.01171875, + "max_sentence1_length": 827, + "min_sentence2_length": 23, + "average_sentence2_length": 123.78125, + "max_sentence2_length": 301, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60875 + }, + "meu_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 123.78125, + "max_sentence1_length": 301, + "min_sentence2_length": 24, + "average_sentence2_length": 114.01171875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60875 + }, + "eng_Latn-mgc_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 109.9375, + "max_sentence1_length": 243, + "min_sentence2_length": 23, + "average_sentence2_length": 110.7265625, + "max_sentence2_length": 313, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56490 + }, + "mgc_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 110.7265625, + "max_sentence1_length": 313, + "min_sentence2_length": 38, + "average_sentence2_length": 109.9375, + "max_sentence2_length": 243, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56490 + }, + "eng_Latn-mgh_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 108.90234375, + "max_sentence1_length": 210, + "min_sentence2_length": 29, + "average_sentence2_length": 128.78125, + "max_sentence2_length": 284, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60847 + }, + "mgh_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 128.78125, + "max_sentence1_length": 284, + "min_sentence2_length": 28, + "average_sentence2_length": 108.90234375, + "max_sentence2_length": 210, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60847 + }, + "eng_Latn-mgw_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 111.75598086124401, + "max_sentence1_length": 243, + "min_sentence2_length": 39, + "average_sentence2_length": 125.02870813397129, + "max_sentence2_length": 322, + "num_samples": 209, + "num_samples_sentence2": 209, + "number_of_characters": 49488 + }, + "mgw_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 125.02870813397129, + "max_sentence1_length": 322, + "min_sentence2_length": 37, + "average_sentence2_length": 111.75598086124401, + "max_sentence2_length": 243, + "num_samples": 209, + "num_samples_sentence2": 209, + "number_of_characters": 49488 + }, + "eng_Latn-mhl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.6640625, + "max_sentence1_length": 376, + "min_sentence2_length": 39, + "average_sentence2_length": 211.4609375, + "max_sentence2_length": 653, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82464 + }, + "mhl_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 211.4609375, + "max_sentence1_length": 653, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6640625, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82464 + }, + "eng_Latn-mib_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.32421875, + "max_sentence1_length": 827, + "min_sentence2_length": 42, + "average_sentence2_length": 162.9609375, + "max_sentence2_length": 367, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71241 + }, + "mib_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 162.9609375, + "max_sentence1_length": 367, + "min_sentence2_length": 24, + "average_sentence2_length": 115.32421875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71241 + }, + "eng_Latn-mic_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.13671875, + "max_sentence1_length": 227, + "min_sentence2_length": 38, + "average_sentence2_length": 196.65234375, + "max_sentence2_length": 1958, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78794 + }, + "mic_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 196.65234375, + "max_sentence1_length": 1958, + "min_sentence2_length": 24, + "average_sentence2_length": 111.13671875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78794 + }, + "eng_Latn-mie_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 43, + "average_sentence2_length": 163.33203125, + "max_sentence2_length": 380, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71348 + }, + "mie_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 163.33203125, + "max_sentence1_length": 380, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71348 + }, + "eng_Latn-mig_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 145.109375, + "max_sentence2_length": 302, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66683 + }, + "mig_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 145.109375, + "max_sentence1_length": 302, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66683 + }, + "eng_Latn-mih_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.421875, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 177.89453125, + "max_sentence2_length": 571, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74833 + }, + "mih_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 177.89453125, + "max_sentence1_length": 571, + "min_sentence2_length": 31, + "average_sentence2_length": 114.421875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74833 + }, + "eng_Latn-mil_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.60546875, + "max_sentence1_length": 227, + "min_sentence2_length": 50, + "average_sentence2_length": 227.28515625, + "max_sentence2_length": 613, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86500 + }, + "mil_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 227.28515625, + "max_sentence1_length": 613, + "min_sentence2_length": 24, + "average_sentence2_length": 110.60546875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86500 + }, + "eng_Latn-mio_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.41015625, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 181.44140625, + "max_sentence2_length": 444, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74714 + }, + "mio_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 181.44140625, + "max_sentence1_length": 444, + "min_sentence2_length": 24, + "average_sentence2_length": 110.41015625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74714 + }, + "eng_Latn-mir_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.8046875, + "max_sentence1_length": 246, + "min_sentence2_length": 32, + "average_sentence2_length": 281.359375, + "max_sentence2_length": 1042, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100394 + }, + "mir_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 281.359375, + "max_sentence1_length": 1042, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8046875, + "max_sentence2_length": 246, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100394 + }, + "eng_Latn-mit_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.02734375, + "max_sentence1_length": 229, + "min_sentence2_length": 36, + "average_sentence2_length": 220.85546875, + "max_sentence2_length": 664, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84962 + }, + "mit_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 220.85546875, + "max_sentence1_length": 664, + "min_sentence2_length": 24, + "average_sentence2_length": 111.02734375, + "max_sentence2_length": 229, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84962 + }, + "eng_Latn-miz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 41, + "average_sentence2_length": 150.4453125, + "max_sentence2_length": 340, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68049 + }, + "miz_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 150.4453125, + "max_sentence1_length": 340, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68049 + }, + "eng_Latn-mjc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.140625, + "max_sentence1_length": 827, + "min_sentence2_length": 37, + "average_sentence2_length": 191.84375, + "max_sentence2_length": 545, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78332 + }, + "mjc_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 191.84375, + "max_sentence1_length": 545, + "min_sentence2_length": 24, + "average_sentence2_length": 114.140625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78332 + }, + "eng_Latn-mkj_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 109.86328125, + "max_sentence1_length": 257, + "min_sentence2_length": 39, + "average_sentence2_length": 122.85546875, + "max_sentence2_length": 319, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59576 + }, + "mkj_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 122.85546875, + "max_sentence1_length": 319, + "min_sentence2_length": 37, + "average_sentence2_length": 109.86328125, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59576 + }, + "eng_Latn-mkl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.32421875, + "max_sentence1_length": 827, + "min_sentence2_length": 19, + "average_sentence2_length": 127.73046875, + "max_sentence2_length": 372, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61966 + }, + "mkl_Latn-eng_Latn": { + "min_sentence1_length": 19, + "average_sentence1_length": 127.73046875, + "max_sentence1_length": 372, + "min_sentence2_length": 24, + "average_sentence2_length": 114.32421875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61966 + }, + "eng_Latn-mkn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.76953125, + "max_sentence1_length": 251, + "min_sentence2_length": 37, + "average_sentence2_length": 279.13671875, + "max_sentence2_length": 1299, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100840 + }, + "mkn_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 279.13671875, + "max_sentence1_length": 1299, + "min_sentence2_length": 24, + "average_sentence2_length": 114.76953125, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100840 + }, + "eng_Latn-mks_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 211.61328125, + "max_sentence2_length": 521, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83708 + }, + "mks_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 211.61328125, + "max_sentence1_length": 521, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83708 + }, + "eng_Latn-mle_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.97265625, + "max_sentence1_length": 239, + "min_sentence2_length": 46, + "average_sentence2_length": 332.56640625, + "max_sentence2_length": 1304, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 113546 + }, + "mle_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 332.56640625, + "max_sentence1_length": 1304, + "min_sentence2_length": 24, + "average_sentence2_length": 110.97265625, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 113546 + }, + "eng_Latn-mlh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.6640625, + "max_sentence1_length": 376, + "min_sentence2_length": 39, + "average_sentence2_length": 211.4609375, + "max_sentence2_length": 653, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82464 + }, + "mlh_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 211.4609375, + "max_sentence1_length": 653, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6640625, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82464 + }, + "eng_Latn-mlp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.23828125, + "max_sentence1_length": 827, + "min_sentence2_length": 50, + "average_sentence2_length": 232.94921875, + "max_sentence2_length": 748, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89136 + }, + "mlp_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 232.94921875, + "max_sentence1_length": 748, + "min_sentence2_length": 24, + "average_sentence2_length": 115.23828125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89136 + }, + "eng_Latn-mmo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 53, + "average_sentence2_length": 200.91796875, + "max_sentence2_length": 528, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80208 + }, + "mmo_Latn-eng_Latn": { + "min_sentence1_length": 53, + "average_sentence1_length": 200.91796875, + "max_sentence1_length": 528, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80208 + }, + "eng_Latn-mmx_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.87890625, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 180.81640625, + "max_sentence2_length": 520, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74674 + }, + "mmx_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 180.81640625, + "max_sentence1_length": 520, + "min_sentence2_length": 24, + "average_sentence2_length": 110.87890625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74674 + }, + "eng_Latn-mna_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 110.01953125, + "max_sentence1_length": 231, + "min_sentence2_length": 49, + "average_sentence2_length": 234.32421875, + "max_sentence2_length": 760, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88152 + }, + "mna_Latn-eng_Latn": { + "min_sentence1_length": 49, + "average_sentence1_length": 234.32421875, + "max_sentence1_length": 760, + "min_sentence2_length": 31, + "average_sentence2_length": 110.01953125, + "max_sentence2_length": 231, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88152 + }, + "eng_Latn-mop_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.30078125, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 200.06640625, + "max_sentence2_length": 757, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80222 + }, + "mop_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 200.06640625, + "max_sentence1_length": 757, + "min_sentence2_length": 24, + "average_sentence2_length": 113.30078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80222 + }, + "eng_Latn-mox_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.58984375, + "max_sentence1_length": 232, + "min_sentence2_length": 34, + "average_sentence2_length": 225.30859375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85990 + }, + "mox_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 225.30859375, + "max_sentence1_length": 827, + "min_sentence2_length": 24, + "average_sentence2_length": 110.58984375, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85990 + }, + "eng_Latn-mph_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 104.98795180722891, + "max_sentence1_length": 207, + "min_sentence2_length": 117, + "average_sentence2_length": 378.1807228915663, + "max_sentence2_length": 1106, + "num_samples": 83, + "num_samples_sentence2": 83, + "number_of_characters": 40103 + }, + "mph_Latn-eng_Latn": { + "min_sentence1_length": 117, + "average_sentence1_length": 378.1807228915663, + "max_sentence1_length": 1106, + "min_sentence2_length": 42, + "average_sentence2_length": 104.98795180722891, + "max_sentence2_length": 207, + "num_samples": 83, + "num_samples_sentence2": 83, + "number_of_characters": 40103 + }, + "eng_Latn-mpj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.93359375, + "max_sentence1_length": 246, + "min_sentence2_length": 57, + "average_sentence2_length": 339.09765625, + "max_sentence2_length": 1893, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 115208 + }, + "mpj_Latn-eng_Latn": { + "min_sentence1_length": 57, + "average_sentence1_length": 339.09765625, + "max_sentence1_length": 1893, + "min_sentence2_length": 24, + "average_sentence2_length": 110.93359375, + "max_sentence2_length": 246, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 115208 + }, + "eng_Latn-mpm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.17578125, + "max_sentence1_length": 227, + "min_sentence2_length": 42, + "average_sentence2_length": 234.5546875, + "max_sentence2_length": 727, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87995 + }, + "mpm_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 234.5546875, + "max_sentence1_length": 727, + "min_sentence2_length": 24, + "average_sentence2_length": 109.17578125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87995 + }, + "eng_Latn-mpp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.265625, + "max_sentence1_length": 230, + "min_sentence2_length": 33, + "average_sentence2_length": 223.0390625, + "max_sentence2_length": 978, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85326 + }, + "mpp_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 223.0390625, + "max_sentence1_length": 978, + "min_sentence2_length": 24, + "average_sentence2_length": 110.265625, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85326 + }, + "eng_Latn-mps_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.17578125, + "max_sentence1_length": 230, + "min_sentence2_length": 43, + "average_sentence2_length": 305.2109375, + "max_sentence2_length": 1272, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106083 + }, + "mps_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 305.2109375, + "max_sentence1_length": 1272, + "min_sentence2_length": 24, + "average_sentence2_length": 109.17578125, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106083 + }, + "eng_Latn-mpt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.9140625, + "max_sentence1_length": 227, + "min_sentence2_length": 39, + "average_sentence2_length": 276.9375, + "max_sentence2_length": 1274, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99802 + }, + "mpt_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 276.9375, + "max_sentence1_length": 1274, + "min_sentence2_length": 24, + "average_sentence2_length": 112.9140625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99802 + }, + "eng_Latn-mpx_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.59375, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 175.19140625, + "max_sentence2_length": 568, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74185 + }, + "mpx_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 175.19140625, + "max_sentence1_length": 568, + "min_sentence2_length": 24, + "average_sentence2_length": 114.59375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74185 + }, + "eng_Latn-mqb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.796875, + "max_sentence1_length": 827, + "min_sentence2_length": 42, + "average_sentence2_length": 194.33984375, + "max_sentence2_length": 670, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79139 + }, + "mqb_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 194.33984375, + "max_sentence1_length": 670, + "min_sentence2_length": 24, + "average_sentence2_length": 114.796875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79139 + }, + "eng_Latn-mqj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.84375, + "max_sentence1_length": 227, + "min_sentence2_length": 39, + "average_sentence2_length": 176.8671875, + "max_sentence2_length": 452, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73654 + }, + "mqj_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 176.8671875, + "max_sentence1_length": 452, + "min_sentence2_length": 24, + "average_sentence2_length": 110.84375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73654 + }, + "eng_Latn-msb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.91015625, + "max_sentence1_length": 827, + "min_sentence2_length": 29, + "average_sentence2_length": 157.33984375, + "max_sentence2_length": 394, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69696 + }, + "msb_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 157.33984375, + "max_sentence1_length": 394, + "min_sentence2_length": 24, + "average_sentence2_length": 114.91015625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69696 + }, + "eng_Latn-msc_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 117.7265625, + "max_sentence1_length": 248, + "min_sentence2_length": 20, + "average_sentence2_length": 127.48828125, + "max_sentence2_length": 336, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62775 + }, + "msc_Latn-eng_Latn": { + "min_sentence1_length": 20, + "average_sentence1_length": 127.48828125, + "max_sentence1_length": 336, + "min_sentence2_length": 34, + "average_sentence2_length": 117.7265625, + "max_sentence2_length": 248, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62775 + }, + "eng_Latn-msk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.06640625, + "max_sentence1_length": 227, + "min_sentence2_length": 29, + "average_sentence2_length": 232.93359375, + "max_sentence2_length": 837, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87552 + }, + "msk_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 232.93359375, + "max_sentence1_length": 837, + "min_sentence2_length": 24, + "average_sentence2_length": 109.06640625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87552 + }, + "eng_Latn-msm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.375, + "max_sentence1_length": 827, + "min_sentence2_length": 23, + "average_sentence2_length": 180.16015625, + "max_sentence2_length": 673, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74633 + }, + "msm_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 180.16015625, + "max_sentence1_length": 673, + "min_sentence2_length": 24, + "average_sentence2_length": 111.375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74633 + }, + "eng_Latn-msy_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 146.0, + "max_sentence1_length": 341, + "min_sentence2_length": 31, + "average_sentence2_length": 227.23828125, + "max_sentence2_length": 585, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 95549 + }, + "msy_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 227.23828125, + "max_sentence1_length": 585, + "min_sentence2_length": 1, + "average_sentence2_length": 146.0, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 95549 + }, + "eng_Latn-mti_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.96875, + "max_sentence1_length": 827, + "min_sentence2_length": 3, + "average_sentence2_length": 126.86328125, + "max_sentence2_length": 327, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61653 + }, + "mti_Latn-eng_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 126.86328125, + "max_sentence1_length": 327, + "min_sentence2_length": 24, + "average_sentence2_length": 113.96875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61653 + }, + "eng_Latn-mto_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.54296875, + "max_sentence1_length": 827, + "min_sentence2_length": 57, + "average_sentence2_length": 224.1171875, + "max_sentence2_length": 552, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86441 + }, + "mto_Latn-eng_Latn": { + "min_sentence1_length": 57, + "average_sentence1_length": 224.1171875, + "max_sentence1_length": 552, + "min_sentence2_length": 24, + "average_sentence2_length": 113.54296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86441 + }, + "eng_Latn-mux_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.97265625, + "max_sentence1_length": 227, + "min_sentence2_length": 47, + "average_sentence2_length": 372.21484375, + "max_sentence2_length": 1454, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 123440 + }, + "mux_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 372.21484375, + "max_sentence1_length": 1454, + "min_sentence2_length": 24, + "average_sentence2_length": 109.97265625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 123440 + }, + "eng_Latn-muy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.53125, + "max_sentence1_length": 827, + "min_sentence2_length": 33, + "average_sentence2_length": 184.390625, + "max_sentence2_length": 416, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76268 + }, + "muy_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 184.390625, + "max_sentence1_length": 416, + "min_sentence2_length": 24, + "average_sentence2_length": 113.53125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76268 + }, + "eng_Latn-mva_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.2109375, + "max_sentence1_length": 376, + "min_sentence2_length": 33, + "average_sentence2_length": 189.0390625, + "max_sentence2_length": 523, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76608 + }, + "mva_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 189.0390625, + "max_sentence1_length": 523, + "min_sentence2_length": 24, + "average_sentence2_length": 110.2109375, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76608 + }, + "eng_Latn-mvn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.421875, + "max_sentence1_length": 227, + "min_sentence2_length": 42, + "average_sentence2_length": 255.61328125, + "max_sentence2_length": 1123, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93961 + }, + "mvn_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 255.61328125, + "max_sentence1_length": 1123, + "min_sentence2_length": 24, + "average_sentence2_length": 111.421875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93961 + }, + "eng_Latn-mwc_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 112.6875, + "max_sentence1_length": 245, + "min_sentence2_length": 38, + "average_sentence2_length": 129.9453125, + "max_sentence2_length": 273, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62114 + }, + "mwc_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 129.9453125, + "max_sentence1_length": 273, + "min_sentence2_length": 37, + "average_sentence2_length": 112.6875, + "max_sentence2_length": 245, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62114 + }, + "eng_Latn-mwe_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 37, + "average_sentence2_length": 139.8359375, + "max_sentence2_length": 343, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64571 + }, + "mwe_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 139.8359375, + "max_sentence1_length": 343, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64571 + }, + "eng_Latn-mwf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 105.94140625, + "max_sentence1_length": 245, + "min_sentence2_length": 53, + "average_sentence2_length": 367.21484375, + "max_sentence2_length": 1738, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 121128 + }, + "mwf_Latn-eng_Latn": { + "min_sentence1_length": 53, + "average_sentence1_length": 367.21484375, + "max_sentence1_length": 1738, + "min_sentence2_length": 24, + "average_sentence2_length": 105.94140625, + "max_sentence2_length": 245, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 121128 + }, + "eng_Latn-mwp_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.61328125, + "max_sentence1_length": 827, + "min_sentence2_length": 47, + "average_sentence2_length": 192.2890625, + "max_sentence2_length": 634, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77799 + }, + "mwp_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 192.2890625, + "max_sentence1_length": 634, + "min_sentence2_length": 31, + "average_sentence2_length": 111.61328125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77799 + }, + "eng_Latn-mxb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 42, + "average_sentence2_length": 186.51171875, + "max_sentence2_length": 503, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77282 + }, + "mxb_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 186.51171875, + "max_sentence1_length": 503, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77282 + }, + "eng_Latn-mxp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 178.78515625, + "max_sentence2_length": 465, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74553 + }, + "mxp_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 178.78515625, + "max_sentence1_length": 465, + "min_sentence2_length": 24, + "average_sentence2_length": 112.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74553 + }, + "eng_Latn-mxq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 162.6640625, + "max_sentence2_length": 395, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71177 + }, + "mxq_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 162.6640625, + "max_sentence1_length": 395, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71177 + }, + "eng_Latn-mxt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.390625, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 185.3046875, + "max_sentence2_length": 540, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76978 + }, + "mxt_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 185.3046875, + "max_sentence1_length": 540, + "min_sentence2_length": 24, + "average_sentence2_length": 115.390625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76978 + }, + "eng_Latn-mya_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 146.3203125, + "max_sentence1_length": 341, + "min_sentence2_length": 1, + "average_sentence2_length": 167.9140625, + "max_sentence2_length": 392, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80444 + }, + "mya_Latn-eng_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 167.9140625, + "max_sentence1_length": 392, + "min_sentence2_length": 56, + "average_sentence2_length": 146.3203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80444 + }, + "eng_Latn-myk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 41, + "average_sentence2_length": 155.09375, + "max_sentence2_length": 372, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68477 + }, + "myk_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 155.09375, + "max_sentence1_length": 372, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68477 + }, + "eng_Latn-myu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.890625, + "max_sentence1_length": 230, + "min_sentence2_length": 32, + "average_sentence2_length": 254.5390625, + "max_sentence2_length": 1078, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93550 + }, + "myu_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 254.5390625, + "max_sentence1_length": 1078, + "min_sentence2_length": 24, + "average_sentence2_length": 110.890625, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93550 + }, + "eng_Latn-myw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 116.5234375, + "max_sentence1_length": 232, + "min_sentence2_length": 30, + "average_sentence2_length": 246.9453125, + "max_sentence2_length": 663, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93048 + }, + "myw_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 246.9453125, + "max_sentence1_length": 663, + "min_sentence2_length": 24, + "average_sentence2_length": 116.5234375, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93048 + }, + "eng_Latn-myy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.23828125, + "max_sentence1_length": 827, + "min_sentence2_length": 37, + "average_sentence2_length": 241.66796875, + "max_sentence2_length": 1002, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90856 + }, + "myy_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 241.66796875, + "max_sentence1_length": 1002, + "min_sentence2_length": 24, + "average_sentence2_length": 113.23828125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90856 + }, + "eng_Latn-mzz_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 113.664, + "max_sentence1_length": 257, + "min_sentence2_length": 43, + "average_sentence2_length": 168.8, + "max_sentence2_length": 597, + "num_samples": 125, + "num_samples_sentence2": 125, + "number_of_characters": 35308 + }, + "mzz_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 168.8, + "max_sentence1_length": 597, + "min_sentence2_length": 45, + "average_sentence2_length": 113.664, + "max_sentence2_length": 257, + "num_samples": 125, + "num_samples_sentence2": 125, + "number_of_characters": 35308 + }, + "eng_Latn-nab_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.921875, + "max_sentence1_length": 227, + "min_sentence2_length": 51, + "average_sentence2_length": 389.17578125, + "max_sentence2_length": 1402, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 128025 + }, + "nab_Latn-eng_Latn": { + "min_sentence1_length": 51, + "average_sentence1_length": 389.17578125, + "max_sentence1_length": 1402, + "min_sentence2_length": 24, + "average_sentence2_length": 110.921875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 128025 + }, + "eng_Latn-naf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.5390625, + "max_sentence1_length": 227, + "min_sentence2_length": 44, + "average_sentence2_length": 223.32421875, + "max_sentence2_length": 882, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85981 + }, + "naf_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 223.32421875, + "max_sentence1_length": 882, + "min_sentence2_length": 24, + "average_sentence2_length": 112.5390625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85981 + }, + "eng_Latn-nak_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.59765625, + "max_sentence1_length": 227, + "min_sentence2_length": 53, + "average_sentence2_length": 254.71875, + "max_sentence2_length": 1236, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93521 + }, + "nak_Latn-eng_Latn": { + "min_sentence1_length": 53, + "average_sentence1_length": 254.71875, + "max_sentence1_length": 1236, + "min_sentence2_length": 24, + "average_sentence2_length": 110.59765625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93521 + }, + "eng_Latn-nas_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.05078125, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 175.453125, + "max_sentence2_length": 465, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73089 + }, + "nas_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 175.453125, + "max_sentence1_length": 465, + "min_sentence2_length": 24, + "average_sentence2_length": 110.05078125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73089 + }, + "eng_Latn-nbq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.86328125, + "max_sentence1_length": 243, + "min_sentence2_length": 33, + "average_sentence2_length": 255.109375, + "max_sentence2_length": 973, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94201 + }, + "nbq_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 255.109375, + "max_sentence1_length": 973, + "min_sentence2_length": 24, + "average_sentence2_length": 112.86328125, + "max_sentence2_length": 243, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94201 + }, + "eng_Latn-nca_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.53125, + "max_sentence1_length": 376, + "min_sentence2_length": 26, + "average_sentence2_length": 157.6171875, + "max_sentence2_length": 395, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68902 + }, + "nca_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 157.6171875, + "max_sentence1_length": 395, + "min_sentence2_length": 24, + "average_sentence2_length": 111.53125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68902 + }, + "eng_Latn-nch_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.34375, + "max_sentence1_length": 827, + "min_sentence2_length": 46, + "average_sentence2_length": 207.09375, + "max_sentence2_length": 519, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82032 + }, + "nch_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 207.09375, + "max_sentence1_length": 519, + "min_sentence2_length": 24, + "average_sentence2_length": 113.34375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82032 + }, + "eng_Latn-ncj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.16015625, + "max_sentence1_length": 227, + "min_sentence2_length": 38, + "average_sentence2_length": 173.796875, + "max_sentence2_length": 460, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72693 + }, + "ncj_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 173.796875, + "max_sentence1_length": 460, + "min_sentence2_length": 24, + "average_sentence2_length": 110.16015625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72693 + }, + "eng_Latn-ncl_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 115.09375, + "max_sentence1_length": 827, + "min_sentence2_length": 28, + "average_sentence2_length": 290.17578125, + "max_sentence2_length": 794, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103749 + }, + "ncl_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 290.17578125, + "max_sentence1_length": 794, + "min_sentence2_length": 31, + "average_sentence2_length": 115.09375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103749 + }, + "eng_Latn-ncu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.92578125, + "max_sentence1_length": 227, + "min_sentence2_length": 57, + "average_sentence2_length": 269.1171875, + "max_sentence2_length": 1008, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97291 + }, + "ncu_Latn-eng_Latn": { + "min_sentence1_length": 57, + "average_sentence1_length": 269.1171875, + "max_sentence1_length": 1008, + "min_sentence2_length": 24, + "average_sentence2_length": 110.92578125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97291 + }, + "eng_Latn-ndg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.41796875, + "max_sentence1_length": 227, + "min_sentence2_length": 21, + "average_sentence2_length": 116.5703125, + "max_sentence2_length": 277, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58109 + }, + "ndg_Latn-eng_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 116.5703125, + "max_sentence1_length": 277, + "min_sentence2_length": 24, + "average_sentence2_length": 110.41796875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58109 + }, + "eng_Latn-ndj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 32, + "average_sentence2_length": 131.58984375, + "max_sentence2_length": 321, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62460 + }, + "ndj_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 131.58984375, + "max_sentence1_length": 321, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62460 + }, + "eng_Latn-nfa_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.23046875, + "max_sentence1_length": 227, + "min_sentence2_length": 31, + "average_sentence2_length": 268.37890625, + "max_sentence2_length": 1222, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97436 + }, + "nfa_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 268.37890625, + "max_sentence1_length": 1222, + "min_sentence2_length": 24, + "average_sentence2_length": 112.23046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97436 + }, + "eng_Latn-ngp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 27, + "average_sentence2_length": 128.859375, + "max_sentence2_length": 319, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61761 + }, + "ngp_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 128.859375, + "max_sentence1_length": 319, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61761 + }, + "eng_Latn-ngu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.703125, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 193.3515625, + "max_sentence2_length": 601, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77838 + }, + "ngu_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 193.3515625, + "max_sentence1_length": 601, + "min_sentence2_length": 24, + "average_sentence2_length": 110.703125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77838 + }, + "eng_Latn-nhe_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 80, + "average_sentence2_length": 253.1953125, + "max_sentence2_length": 615, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102593 + }, + "nhe_Latn-eng_Latn": { + "min_sentence1_length": 80, + "average_sentence1_length": 253.1953125, + "max_sentence1_length": 615, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102593 + }, + "eng_Latn-nhg_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.359375, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 177.26171875, + "max_sentence2_length": 664, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74655 + }, + "nhg_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 177.26171875, + "max_sentence1_length": 664, + "min_sentence2_length": 31, + "average_sentence2_length": 114.359375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74655 + }, + "eng_Latn-nhi_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 39, + "average_sentence2_length": 171.62109375, + "max_sentence2_length": 418, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72708 + }, + "nhi_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 171.62109375, + "max_sentence1_length": 418, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72708 + }, + "eng_Latn-nho_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 113.66796875, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 188.51953125, + "max_sentence2_length": 474, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77360 + }, + "nho_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 188.51953125, + "max_sentence1_length": 474, + "min_sentence2_length": 31, + "average_sentence2_length": 113.66796875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77360 + }, + "eng_Latn-nhr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.19140625, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 156.34765625, + "max_sentence2_length": 465, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69514 + }, + "nhr_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 156.34765625, + "max_sentence1_length": 465, + "min_sentence2_length": 24, + "average_sentence2_length": 115.19140625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69514 + }, + "eng_Latn-nhu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 32, + "average_sentence2_length": 157.3671875, + "max_sentence2_length": 507, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69059 + }, + "nhu_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 157.3671875, + "max_sentence1_length": 507, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69059 + }, + "eng_Latn-nhw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.34375, + "max_sentence1_length": 827, + "min_sentence2_length": 50, + "average_sentence2_length": 212.4296875, + "max_sentence2_length": 531, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83398 + }, + "nhw_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 212.4296875, + "max_sentence1_length": 531, + "min_sentence2_length": 24, + "average_sentence2_length": 113.34375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83398 + }, + "eng_Latn-nhy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.60546875, + "max_sentence1_length": 827, + "min_sentence2_length": 47, + "average_sentence2_length": 190.59375, + "max_sentence2_length": 450, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78387 + }, + "nhy_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 190.59375, + "max_sentence1_length": 450, + "min_sentence2_length": 24, + "average_sentence2_length": 115.60546875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78387 + }, + "eng_Latn-nif_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 106.3046875, + "max_sentence1_length": 245, + "min_sentence2_length": 48, + "average_sentence2_length": 165.8671875, + "max_sentence2_length": 641, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69676 + }, + "nif_Latn-eng_Latn": { + "min_sentence1_length": 48, + "average_sentence1_length": 165.8671875, + "max_sentence1_length": 641, + "min_sentence2_length": 37, + "average_sentence2_length": 106.3046875, + "max_sentence2_length": 245, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69676 + }, + "eng_Latn-nii_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.26953125, + "max_sentence1_length": 273, + "min_sentence2_length": 59, + "average_sentence2_length": 255.140625, + "max_sentence2_length": 984, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93801 + }, + "nii_Latn-eng_Latn": { + "min_sentence1_length": 59, + "average_sentence1_length": 255.140625, + "max_sentence1_length": 984, + "min_sentence2_length": 24, + "average_sentence2_length": 111.26953125, + "max_sentence2_length": 273, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93801 + }, + "eng_Latn-nin_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 41, + "average_sentence2_length": 157.5703125, + "max_sentence2_length": 411, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69111 + }, + "nin_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 157.5703125, + "max_sentence1_length": 411, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69111 + }, + "eng_Latn-nko_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.01171875, + "max_sentence1_length": 227, + "min_sentence2_length": 23, + "average_sentence2_length": 130.33203125, + "max_sentence2_length": 394, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62040 + }, + "nko_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 130.33203125, + "max_sentence1_length": 394, + "min_sentence2_length": 24, + "average_sentence2_length": 112.01171875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62040 + }, + "eng_Latn-nld_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.88671875, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 119.83203125, + "max_sentence2_length": 226, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60088 + }, + "nld_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 119.83203125, + "max_sentence1_length": 226, + "min_sentence2_length": 24, + "average_sentence2_length": 114.88671875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60088 + }, + "eng_Latn-nlg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.23046875, + "max_sentence1_length": 376, + "min_sentence2_length": 40, + "average_sentence2_length": 182.6015625, + "max_sentence2_length": 525, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75477 + }, + "nlg_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 182.6015625, + "max_sentence1_length": 525, + "min_sentence2_length": 24, + "average_sentence2_length": 112.23046875, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75477 + }, + "eng_Latn-nna_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 117.91796875, + "max_sentence1_length": 257, + "min_sentence2_length": 34, + "average_sentence2_length": 318.4765625, + "max_sentence2_length": 2306, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 111717 + }, + "nna_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 318.4765625, + "max_sentence1_length": 2306, + "min_sentence2_length": 23, + "average_sentence2_length": 117.91796875, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 111717 + }, + "eng_Latn-nnq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 25, + "average_sentence2_length": 125.62109375, + "max_sentence2_length": 323, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60932 + }, + "nnq_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 125.62109375, + "max_sentence1_length": 323, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60932 + }, + "eng_Latn-noa_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 147.40625, + "max_sentence1_length": 341, + "min_sentence2_length": 42, + "average_sentence2_length": 267.203125, + "max_sentence2_length": 709, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106140 + }, + "noa_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 267.203125, + "max_sentence1_length": 709, + "min_sentence2_length": 35, + "average_sentence2_length": 147.40625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106140 + }, + "eng_Latn-nop_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.6640625, + "max_sentence1_length": 827, + "min_sentence2_length": 42, + "average_sentence2_length": 176.57421875, + "max_sentence2_length": 511, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74301 + }, + "nop_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 176.57421875, + "max_sentence1_length": 511, + "min_sentence2_length": 24, + "average_sentence2_length": 113.6640625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74301 + }, + "eng_Latn-not_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.58984375, + "max_sentence1_length": 227, + "min_sentence2_length": 34, + "average_sentence2_length": 228.4375, + "max_sentence2_length": 807, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86791 + }, + "not_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 228.4375, + "max_sentence1_length": 807, + "min_sentence2_length": 24, + "average_sentence2_length": 110.58984375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86791 + }, + "eng_Latn-nou_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.13671875, + "max_sentence1_length": 228, + "min_sentence2_length": 3, + "average_sentence2_length": 214.30859375, + "max_sentence2_length": 811, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83058 + }, + "nou_Latn-eng_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 214.30859375, + "max_sentence1_length": 811, + "min_sentence2_length": 24, + "average_sentence2_length": 110.13671875, + "max_sentence2_length": 228, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83058 + }, + "eng_Latn-npi_Deva": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 24, + "average_sentence2_length": 120.40234375, + "max_sentence2_length": 313, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59596 + }, + "npi_Deva-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 120.40234375, + "max_sentence1_length": 313, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59596 + }, + "eng_Latn-npl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 31, + "average_sentence2_length": 146.73828125, + "max_sentence2_length": 337, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66338 + }, + "npl_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 146.73828125, + "max_sentence1_length": 337, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66338 + }, + "eng_Latn-nsn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.8046875, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 192.5078125, + "max_sentence2_length": 683, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77648 + }, + "nsn_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 192.5078125, + "max_sentence1_length": 683, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77648 + }, + "eng_Latn-nss_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 112.18359375, + "max_sentence1_length": 245, + "min_sentence2_length": 44, + "average_sentence2_length": 167.26171875, + "max_sentence2_length": 449, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71538 + }, + "nss_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 167.26171875, + "max_sentence1_length": 449, + "min_sentence2_length": 37, + "average_sentence2_length": 112.18359375, + "max_sentence2_length": 245, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71538 + }, + "eng_Latn-ntj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.59765625, + "max_sentence1_length": 227, + "min_sentence2_length": 56, + "average_sentence2_length": 258.41796875, + "max_sentence2_length": 857, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94468 + }, + "ntj_Latn-eng_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 258.41796875, + "max_sentence1_length": 857, + "min_sentence2_length": 24, + "average_sentence2_length": 110.59765625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94468 + }, + "eng_Latn-ntp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.97265625, + "max_sentence1_length": 230, + "min_sentence2_length": 31, + "average_sentence2_length": 239.52734375, + "max_sentence2_length": 1474, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89472 + }, + "ntp_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 239.52734375, + "max_sentence1_length": 1474, + "min_sentence2_length": 24, + "average_sentence2_length": 109.97265625, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89472 + }, + "eng_Latn-ntu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.15625, + "max_sentence1_length": 273, + "min_sentence2_length": 26, + "average_sentence2_length": 241.875, + "max_sentence2_length": 1550, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91144 + }, + "ntu_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 241.875, + "max_sentence1_length": 1550, + "min_sentence2_length": 24, + "average_sentence2_length": 114.15625, + "max_sentence2_length": 273, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91144 + }, + "eng_Latn-nuy_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 112.17578125, + "max_sentence1_length": 227, + "min_sentence2_length": 86, + "average_sentence2_length": 313.828125, + "max_sentence2_length": 1006, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 109057 + }, + "nuy_Latn-eng_Latn": { + "min_sentence1_length": 86, + "average_sentence1_length": 313.828125, + "max_sentence1_length": 1006, + "min_sentence2_length": 32, + "average_sentence2_length": 112.17578125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 109057 + }, + "eng_Latn-nvm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.984375, + "max_sentence1_length": 232, + "min_sentence2_length": 43, + "average_sentence2_length": 257.30859375, + "max_sentence2_length": 979, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94027 + }, + "nvm_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 257.30859375, + "max_sentence1_length": 979, + "min_sentence2_length": 24, + "average_sentence2_length": 109.984375, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94027 + }, + "eng_Latn-nwi_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.6796875, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 219.140625, + "max_sentence2_length": 738, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84946 + }, + "nwi_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 219.140625, + "max_sentence1_length": 738, + "min_sentence2_length": 24, + "average_sentence2_length": 112.6796875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84946 + }, + "eng_Latn-nya_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 25, + "average_sentence2_length": 127.3203125, + "max_sentence2_length": 328, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61367 + }, + "nya_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 127.3203125, + "max_sentence1_length": 328, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61367 + }, + "eng_Latn-nys_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 114.35245901639344, + "max_sentence1_length": 268, + "min_sentence2_length": 50, + "average_sentence2_length": 230.327868852459, + "max_sentence2_length": 1366, + "num_samples": 122, + "num_samples_sentence2": 122, + "number_of_characters": 42051 + }, + "nys_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 230.327868852459, + "max_sentence1_length": 1366, + "min_sentence2_length": 37, + "average_sentence2_length": 114.35245901639344, + "max_sentence2_length": 268, + "num_samples": 122, + "num_samples_sentence2": 122, + "number_of_characters": 42051 + }, + "eng_Latn-nyu_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 110.2890625, + "max_sentence1_length": 257, + "min_sentence2_length": 34, + "average_sentence2_length": 126.015625, + "max_sentence2_length": 288, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60494 + }, + "nyu_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 126.015625, + "max_sentence1_length": 288, + "min_sentence2_length": 37, + "average_sentence2_length": 110.2890625, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60494 + }, + "eng_Latn-obo_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 115.52734375, + "max_sentence1_length": 827, + "min_sentence2_length": 42, + "average_sentence2_length": 196.0859375, + "max_sentence2_length": 668, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79773 + }, + "obo_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 196.0859375, + "max_sentence1_length": 668, + "min_sentence2_length": 31, + "average_sentence2_length": 115.52734375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79773 + }, + "eng_Latn-okv_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 146.515625, + "max_sentence1_length": 341, + "min_sentence2_length": 50, + "average_sentence2_length": 174.48046875, + "max_sentence2_length": 807, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82175 + }, + "okv_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 174.48046875, + "max_sentence1_length": 807, + "min_sentence2_length": 35, + "average_sentence2_length": 146.515625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82175 + }, + "eng_Latn-omw_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 122.4375, + "max_sentence1_length": 290, + "min_sentence2_length": 58, + "average_sentence2_length": 202.26171875, + "max_sentence2_length": 460, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83123 + }, + "omw_Latn-eng_Latn": { + "min_sentence1_length": 58, + "average_sentence1_length": 202.26171875, + "max_sentence1_length": 460, + "min_sentence2_length": 37, + "average_sentence2_length": 122.4375, + "max_sentence2_length": 290, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83123 + }, + "eng_Latn-ong_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.62890625, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 222.56640625, + "max_sentence2_length": 864, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86322 + }, + "ong_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 222.56640625, + "max_sentence1_length": 864, + "min_sentence2_length": 31, + "average_sentence2_length": 114.62890625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86322 + }, + "eng_Latn-ons_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.984375, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 188.4609375, + "max_sentence2_length": 580, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77426 + }, + "ons_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 188.4609375, + "max_sentence1_length": 580, + "min_sentence2_length": 24, + "average_sentence2_length": 113.984375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77426 + }, + "eng_Latn-ood_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.00390625, + "max_sentence1_length": 239, + "min_sentence2_length": 41, + "average_sentence2_length": 199.0078125, + "max_sentence2_length": 631, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79363 + }, + "ood_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 199.0078125, + "max_sentence1_length": 631, + "min_sentence2_length": 24, + "average_sentence2_length": 111.00390625, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79363 + }, + "eng_Latn-opm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.70703125, + "max_sentence1_length": 227, + "min_sentence2_length": 59, + "average_sentence2_length": 298.8671875, + "max_sentence2_length": 1026, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104595 + }, + "opm_Latn-eng_Latn": { + "min_sentence1_length": 59, + "average_sentence1_length": 298.8671875, + "max_sentence1_length": 1026, + "min_sentence2_length": 24, + "average_sentence2_length": 109.70703125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104595 + }, + "eng_Latn-ory_Orya": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.76171875, + "max_sentence1_length": 827, + "min_sentence2_length": 23, + "average_sentence2_length": 120.41015625, + "max_sentence2_length": 279, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60460 + }, + "ory_Orya-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 120.41015625, + "max_sentence1_length": 279, + "min_sentence2_length": 24, + "average_sentence2_length": 115.76171875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60460 + }, + "eng_Latn-ote_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.55078125, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 167.05078125, + "max_sentence2_length": 424, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72346 + }, + "ote_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 167.05078125, + "max_sentence1_length": 424, + "min_sentence2_length": 24, + "average_sentence2_length": 115.55078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72346 + }, + "eng_Latn-otm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.12890625, + "max_sentence1_length": 827, + "min_sentence2_length": 34, + "average_sentence2_length": 164.359375, + "max_sentence2_length": 500, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71293 + }, + "otm_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 164.359375, + "max_sentence1_length": 500, + "min_sentence2_length": 24, + "average_sentence2_length": 114.12890625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71293 + }, + "eng_Latn-otn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.01953125, + "max_sentence1_length": 827, + "min_sentence2_length": 41, + "average_sentence2_length": 199.9765625, + "max_sentence2_length": 741, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80639 + }, + "otn_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 199.9765625, + "max_sentence1_length": 741, + "min_sentence2_length": 24, + "average_sentence2_length": 115.01953125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80639 + }, + "eng_Latn-otq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 130.1953125, + "max_sentence2_length": 328, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62865 + }, + "otq_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 130.1953125, + "max_sentence1_length": 328, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62865 + }, + "eng_Latn-ots_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.46875, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 265.98828125, + "max_sentence2_length": 818, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97141 + }, + "ots_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 265.98828125, + "max_sentence1_length": 818, + "min_sentence2_length": 24, + "average_sentence2_length": 113.46875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97141 + }, + "eng_Latn-pab_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 40, + "average_sentence2_length": 164.875, + "max_sentence2_length": 422, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70981 + }, + "pab_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 164.875, + "max_sentence1_length": 422, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70981 + }, + "eng_Latn-pad_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 110.1796875, + "max_sentence1_length": 230, + "min_sentence2_length": 35, + "average_sentence2_length": 313.1875, + "max_sentence2_length": 1173, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 108382 + }, + "pad_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 313.1875, + "max_sentence1_length": 1173, + "min_sentence2_length": 31, + "average_sentence2_length": 110.1796875, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 108382 + }, + "eng_Latn-pah_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.34375, + "max_sentence1_length": 216, + "min_sentence2_length": 40, + "average_sentence2_length": 293.7109375, + "max_sentence2_length": 1340, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103694 + }, + "pah_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 293.7109375, + "max_sentence1_length": 1340, + "min_sentence2_length": 24, + "average_sentence2_length": 111.34375, + "max_sentence2_length": 216, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 103694 + }, + "eng_Latn-pan_Guru": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 18, + "average_sentence2_length": 109.73046875, + "max_sentence2_length": 287, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56864 + }, + "pan_Guru-eng_Latn": { + "min_sentence1_length": 18, + "average_sentence1_length": 109.73046875, + "max_sentence1_length": 287, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56864 + }, + "eng_Latn-pao_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 116.69140625, + "max_sentence1_length": 254, + "min_sentence2_length": 27, + "average_sentence2_length": 266.83984375, + "max_sentence2_length": 1493, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98184 + }, + "pao_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 266.83984375, + "max_sentence1_length": 1493, + "min_sentence2_length": 24, + "average_sentence2_length": 116.69140625, + "max_sentence2_length": 254, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98184 + }, + "eng_Latn-pes_Arab": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.51953125, + "max_sentence1_length": 827, + "min_sentence2_length": 17, + "average_sentence2_length": 87.43359375, + "max_sentence2_length": 190, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 51444 + }, + "pes_Arab-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 87.43359375, + "max_sentence1_length": 190, + "min_sentence2_length": 24, + "average_sentence2_length": 113.51953125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 51444 + }, + "eng_Latn-pib_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.3046875, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 130.75390625, + "max_sentence2_length": 409, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62991 + }, + "pib_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 130.75390625, + "max_sentence1_length": 409, + "min_sentence2_length": 24, + "average_sentence2_length": 115.3046875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62991 + }, + "eng_Latn-pio_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.78515625, + "max_sentence1_length": 227, + "min_sentence2_length": 44, + "average_sentence2_length": 292.265625, + "max_sentence2_length": 997, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102925 + }, + "pio_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 292.265625, + "max_sentence1_length": 997, + "min_sentence2_length": 24, + "average_sentence2_length": 109.78515625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102925 + }, + "eng_Latn-pir_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.71484375, + "max_sentence1_length": 227, + "min_sentence2_length": 31, + "average_sentence2_length": 273.71484375, + "max_sentence2_length": 1050, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98670 + }, + "pir_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 273.71484375, + "max_sentence1_length": 1050, + "min_sentence2_length": 31, + "average_sentence2_length": 111.71484375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98670 + }, + "eng_Latn-piu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.66015625, + "max_sentence1_length": 233, + "min_sentence2_length": 62, + "average_sentence2_length": 426.9921875, + "max_sentence2_length": 1444, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 138919 + }, + "piu_Latn-eng_Latn": { + "min_sentence1_length": 62, + "average_sentence1_length": 426.9921875, + "max_sentence1_length": 1444, + "min_sentence2_length": 24, + "average_sentence2_length": 115.66015625, + "max_sentence2_length": 233, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 138919 + }, + "eng_Latn-pjt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.1953125, + "max_sentence1_length": 235, + "min_sentence2_length": 50, + "average_sentence2_length": 404.37109375, + "max_sentence2_length": 1358, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 131729 + }, + "pjt_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 404.37109375, + "max_sentence1_length": 1358, + "min_sentence2_length": 24, + "average_sentence2_length": 110.1953125, + "max_sentence2_length": 235, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 131729 + }, + "eng_Latn-pls_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.71484375, + "max_sentence1_length": 827, + "min_sentence2_length": 40, + "average_sentence2_length": 190.99609375, + "max_sentence2_length": 497, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78006 + }, + "pls_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 190.99609375, + "max_sentence1_length": 497, + "min_sentence2_length": 24, + "average_sentence2_length": 113.71484375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78006 + }, + "eng_Latn-plu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.98828125, + "max_sentence1_length": 376, + "min_sentence2_length": 44, + "average_sentence2_length": 212.9375, + "max_sentence2_length": 1292, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82925 + }, + "plu_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 212.9375, + "max_sentence1_length": 1292, + "min_sentence2_length": 24, + "average_sentence2_length": 110.98828125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82925 + }, + "eng_Latn-pma_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.41015625, + "max_sentence1_length": 827, + "min_sentence2_length": 28, + "average_sentence2_length": 188.00390625, + "max_sentence2_length": 549, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77418 + }, + "pma_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 188.00390625, + "max_sentence1_length": 549, + "min_sentence2_length": 24, + "average_sentence2_length": 114.41015625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77418 + }, + "eng_Latn-poe_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.53515625, + "max_sentence1_length": 827, + "min_sentence2_length": 39, + "average_sentence2_length": 171.234375, + "max_sentence2_length": 435, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72901 + }, + "poe_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 171.234375, + "max_sentence1_length": 435, + "min_sentence2_length": 24, + "average_sentence2_length": 113.53515625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72901 + }, + "eng_Latn-poh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 113, + "average_sentence2_length": 352.9453125, + "max_sentence2_length": 889, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 119889 + }, + "poh_Latn-eng_Latn": { + "min_sentence1_length": 113, + "average_sentence1_length": 352.9453125, + "max_sentence1_length": 889, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 119889 + }, + "eng_Latn-poi_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.55078125, + "max_sentence1_length": 827, + "min_sentence2_length": 40, + "average_sentence2_length": 169.44921875, + "max_sentence2_length": 390, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72960 + }, + "poi_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 169.44921875, + "max_sentence1_length": 390, + "min_sentence2_length": 24, + "average_sentence2_length": 115.55078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72960 + }, + "eng_Latn-pol_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 146.67578125, + "max_sentence1_length": 341, + "min_sentence2_length": 33, + "average_sentence2_length": 133.08203125, + "max_sentence2_length": 307, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71618 + }, + "pol_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 133.08203125, + "max_sentence1_length": 307, + "min_sentence2_length": 35, + "average_sentence2_length": 146.67578125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71618 + }, + "eng_Latn-pon_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 37, + "average_sentence2_length": 148.0234375, + "max_sentence2_length": 401, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75669 + }, + "pon_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 148.0234375, + "max_sentence1_length": 401, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75669 + }, + "eng_Latn-por_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 53, + "average_sentence2_length": 161.03125, + "max_sentence2_length": 398, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78999 + }, + "por_Latn-eng_Latn": { + "min_sentence1_length": 53, + "average_sentence1_length": 161.03125, + "max_sentence1_length": 398, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78999 + }, + "eng_Latn-poy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 25, + "average_sentence2_length": 142.625, + "max_sentence2_length": 360, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65285 + }, + "poy_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 142.625, + "max_sentence1_length": 360, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65285 + }, + "eng_Latn-ppo_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 108.71875, + "max_sentence1_length": 227, + "min_sentence2_length": 59, + "average_sentence2_length": 275.375, + "max_sentence2_length": 1155, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98328 + }, + "ppo_Latn-eng_Latn": { + "min_sentence1_length": 59, + "average_sentence1_length": 275.375, + "max_sentence1_length": 1155, + "min_sentence2_length": 25, + "average_sentence2_length": 108.71875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 98328 + }, + "eng_Latn-prf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.51953125, + "max_sentence1_length": 827, + "min_sentence2_length": 33, + "average_sentence2_length": 141.7890625, + "max_sentence2_length": 318, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65871 + }, + "prf_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 141.7890625, + "max_sentence1_length": 318, + "min_sentence2_length": 24, + "average_sentence2_length": 115.51953125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65871 + }, + "eng_Latn-pri_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.42578125, + "max_sentence1_length": 232, + "min_sentence2_length": 23, + "average_sentence2_length": 234.76171875, + "max_sentence2_length": 691, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88880 + }, + "pri_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 234.76171875, + "max_sentence1_length": 691, + "min_sentence2_length": 24, + "average_sentence2_length": 112.42578125, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88880 + }, + "eng_Latn-ptp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.2421875, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 184.25390625, + "max_sentence2_length": 592, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76415 + }, + "ptp_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 184.25390625, + "max_sentence1_length": 592, + "min_sentence2_length": 24, + "average_sentence2_length": 114.2421875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76415 + }, + "eng_Latn-ptu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.14453125, + "max_sentence1_length": 227, + "min_sentence2_length": 31, + "average_sentence2_length": 186.8203125, + "max_sentence2_length": 566, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76023 + }, + "ptu_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 186.8203125, + "max_sentence1_length": 566, + "min_sentence2_length": 24, + "average_sentence2_length": 110.14453125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76023 + }, + "eng_Latn-pwg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.4921875, + "max_sentence1_length": 376, + "min_sentence2_length": 32, + "average_sentence2_length": 168.41015625, + "max_sentence2_length": 546, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71655 + }, + "pwg_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 168.41015625, + "max_sentence1_length": 546, + "min_sentence2_length": 24, + "average_sentence2_length": 111.4921875, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71655 + }, + "eng_Latn-qub_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 116.26953125, + "max_sentence1_length": 827, + "min_sentence2_length": 34, + "average_sentence2_length": 162.5703125, + "max_sentence2_length": 540, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71383 + }, + "qub_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 162.5703125, + "max_sentence1_length": 540, + "min_sentence2_length": 24, + "average_sentence2_length": 116.26953125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71383 + }, + "eng_Latn-quc_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.15234375, + "max_sentence1_length": 341, + "min_sentence2_length": 61, + "average_sentence2_length": 217.57421875, + "max_sentence2_length": 504, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93370 + }, + "quc_Latn-eng_Latn": { + "min_sentence1_length": 61, + "average_sentence1_length": 217.57421875, + "max_sentence1_length": 504, + "min_sentence2_length": 56, + "average_sentence2_length": 147.15234375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93370 + }, + "eng_Latn-quf_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 110.26953125, + "max_sentence1_length": 227, + "min_sentence2_length": 61, + "average_sentence2_length": 236.96875, + "max_sentence2_length": 657, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88893 + }, + "quf_Latn-eng_Latn": { + "min_sentence1_length": 61, + "average_sentence1_length": 236.96875, + "max_sentence1_length": 657, + "min_sentence2_length": 31, + "average_sentence2_length": 110.26953125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88893 + }, + "eng_Latn-quh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.81640625, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 163.36328125, + "max_sentence2_length": 600, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71214 + }, + "quh_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 163.36328125, + "max_sentence1_length": 600, + "min_sentence2_length": 24, + "average_sentence2_length": 114.81640625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71214 + }, + "eng_Latn-qul_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.6328125, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 130.6796875, + "max_sentence2_length": 286, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63056 + }, + "qul_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 130.6796875, + "max_sentence1_length": 286, + "min_sentence2_length": 24, + "average_sentence2_length": 115.6328125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63056 + }, + "eng_Latn-qup_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.0, + "max_sentence1_length": 227, + "min_sentence2_length": 44, + "average_sentence2_length": 286.640625, + "max_sentence2_length": 955, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101796 + }, + "qup_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 286.640625, + "max_sentence1_length": 955, + "min_sentence2_length": 24, + "average_sentence2_length": 111.0, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101796 + }, + "eng_Latn-qvc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.984375, + "max_sentence1_length": 230, + "min_sentence2_length": 54, + "average_sentence2_length": 229.4765625, + "max_sentence2_length": 807, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87158 + }, + "qvc_Latn-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 229.4765625, + "max_sentence1_length": 807, + "min_sentence2_length": 24, + "average_sentence2_length": 110.984375, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87158 + }, + "eng_Latn-qve_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 63, + "average_sentence2_length": 217.6484375, + "max_sentence2_length": 630, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84491 + }, + "qve_Latn-eng_Latn": { + "min_sentence1_length": 63, + "average_sentence1_length": 217.6484375, + "max_sentence1_length": 630, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84491 + }, + "eng_Latn-qvh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.80078125, + "max_sentence1_length": 271, + "min_sentence2_length": 32, + "average_sentence2_length": 230.640625, + "max_sentence2_length": 759, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88177 + }, + "qvh_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 230.640625, + "max_sentence1_length": 759, + "min_sentence2_length": 24, + "average_sentence2_length": 113.80078125, + "max_sentence2_length": 271, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88177 + }, + "eng_Latn-qvm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.80078125, + "max_sentence1_length": 271, + "min_sentence2_length": 36, + "average_sentence2_length": 238.71484375, + "max_sentence2_length": 762, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90244 + }, + "qvm_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 238.71484375, + "max_sentence1_length": 762, + "min_sentence2_length": 24, + "average_sentence2_length": 113.80078125, + "max_sentence2_length": 271, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90244 + }, + "eng_Latn-qvn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.42578125, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 175.89453125, + "max_sentence2_length": 523, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74578 + }, + "qvn_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 175.89453125, + "max_sentence1_length": 523, + "min_sentence2_length": 24, + "average_sentence2_length": 115.42578125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74578 + }, + "eng_Latn-qvs_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.2265625, + "max_sentence1_length": 827, + "min_sentence2_length": 47, + "average_sentence2_length": 201.8828125, + "max_sentence2_length": 637, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80924 + }, + "qvs_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 201.8828125, + "max_sentence1_length": 637, + "min_sentence2_length": 24, + "average_sentence2_length": 114.2265625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80924 + }, + "eng_Latn-qvw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.88671875, + "max_sentence1_length": 227, + "min_sentence2_length": 41, + "average_sentence2_length": 174.16015625, + "max_sentence2_length": 539, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73228 + }, + "qvw_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 174.16015625, + "max_sentence1_length": 539, + "min_sentence2_length": 24, + "average_sentence2_length": 111.88671875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73228 + }, + "eng_Latn-qvz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.54296875, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 202.98046875, + "max_sentence2_length": 625, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81286 + }, + "qvz_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 202.98046875, + "max_sentence1_length": 625, + "min_sentence2_length": 24, + "average_sentence2_length": 114.54296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81286 + }, + "eng_Latn-qwh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.5078125, + "max_sentence1_length": 227, + "min_sentence2_length": 38, + "average_sentence2_length": 173.35546875, + "max_sentence2_length": 490, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72925 + }, + "qwh_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 173.35546875, + "max_sentence1_length": 490, + "min_sentence2_length": 24, + "average_sentence2_length": 111.5078125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72925 + }, + "eng_Latn-qxh_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.95703125, + "max_sentence1_length": 227, + "min_sentence2_length": 31, + "average_sentence2_length": 188.90625, + "max_sentence2_length": 569, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76253 + }, + "qxh_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 188.90625, + "max_sentence1_length": 569, + "min_sentence2_length": 24, + "average_sentence2_length": 108.95703125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76253 + }, + "eng_Latn-qxn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.30078125, + "max_sentence1_length": 827, + "min_sentence2_length": 48, + "average_sentence2_length": 196.8671875, + "max_sentence2_length": 528, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79403 + }, + "qxn_Latn-eng_Latn": { + "min_sentence1_length": 48, + "average_sentence1_length": 196.8671875, + "max_sentence1_length": 528, + "min_sentence2_length": 24, + "average_sentence2_length": 113.30078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79403 + }, + "eng_Latn-qxo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.046875, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 246.4609375, + "max_sentence2_length": 910, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91522 + }, + "qxo_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 246.4609375, + "max_sentence1_length": 910, + "min_sentence2_length": 24, + "average_sentence2_length": 111.046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91522 + }, + "eng_Latn-rai_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.71875, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 177.04296875, + "max_sentence2_length": 445, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74435 + }, + "rai_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 177.04296875, + "max_sentence1_length": 445, + "min_sentence2_length": 24, + "average_sentence2_length": 113.71875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74435 + }, + "eng_Latn-reg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.75390625, + "max_sentence1_length": 227, + "min_sentence2_length": 24, + "average_sentence2_length": 134.6328125, + "max_sentence2_length": 342, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62819 + }, + "reg_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 134.6328125, + "max_sentence1_length": 342, + "min_sentence2_length": 24, + "average_sentence2_length": 110.75390625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62819 + }, + "eng_Latn-rgu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.55859375, + "max_sentence1_length": 251, + "min_sentence2_length": 38, + "average_sentence2_length": 265.78125, + "max_sentence2_length": 1233, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97367 + }, + "rgu_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 265.78125, + "max_sentence1_length": 1233, + "min_sentence2_length": 24, + "average_sentence2_length": 114.55859375, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97367 + }, + "eng_Latn-rkb_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 109.2421875, + "max_sentence1_length": 251, + "min_sentence2_length": 35, + "average_sentence2_length": 305.28125, + "max_sentence2_length": 1129, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106118 + }, + "rkb_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 305.28125, + "max_sentence1_length": 1129, + "min_sentence2_length": 25, + "average_sentence2_length": 109.2421875, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106118 + }, + "eng_Latn-rmc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.87109375, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 126.6015625, + "max_sentence2_length": 288, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61817 + }, + "rmc_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 126.6015625, + "max_sentence1_length": 288, + "min_sentence2_length": 24, + "average_sentence2_length": 114.87109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61817 + }, + "eng_Latn-rmy_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 100.94140625, + "max_sentence1_length": 218, + "min_sentence2_length": 26, + "average_sentence2_length": 106.82421875, + "max_sentence2_length": 247, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 53188 + }, + "rmy_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 106.82421875, + "max_sentence1_length": 247, + "min_sentence2_length": 26, + "average_sentence2_length": 100.94140625, + "max_sentence2_length": 218, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 53188 + }, + "eng_Latn-ron_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 54, + "average_sentence2_length": 143.33203125, + "max_sentence2_length": 339, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74468 + }, + "ron_Latn-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 143.33203125, + "max_sentence1_length": 339, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74468 + }, + "eng_Latn-roo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.71484375, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 285.2890625, + "max_sentence2_length": 1183, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101889 + }, + "roo_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 285.2890625, + "max_sentence1_length": 1183, + "min_sentence2_length": 24, + "average_sentence2_length": 112.71484375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101889 + }, + "eng_Latn-rop_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.8125, + "max_sentence1_length": 231, + "min_sentence2_length": 43, + "average_sentence2_length": 269.47265625, + "max_sentence2_length": 844, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97353 + }, + "rop_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 269.47265625, + "max_sentence1_length": 844, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8125, + "max_sentence2_length": 231, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97353 + }, + "eng_Latn-row_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.76953125, + "max_sentence1_length": 251, + "min_sentence2_length": 33, + "average_sentence2_length": 257.19140625, + "max_sentence2_length": 1241, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 95222 + }, + "row_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 257.19140625, + "max_sentence1_length": 1241, + "min_sentence2_length": 24, + "average_sentence2_length": 114.76953125, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 95222 + }, + "eng_Latn-rro_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 150.671875, + "max_sentence2_length": 352, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68107 + }, + "rro_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 150.671875, + "max_sentence1_length": 352, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68107 + }, + "eng_Latn-ruf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 30, + "average_sentence2_length": 130.6015625, + "max_sentence2_length": 316, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62207 + }, + "ruf_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 130.6015625, + "max_sentence1_length": 316, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62207 + }, + "eng_Latn-rug_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.87109375, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 174.76171875, + "max_sentence2_length": 480, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74146 + }, + "rug_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 174.76171875, + "max_sentence1_length": 480, + "min_sentence2_length": 24, + "average_sentence2_length": 114.87109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74146 + }, + "eng_Latn-rus_Cyrl": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 17, + "average_sentence2_length": 92.38671875, + "max_sentence2_length": 233, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 52947 + }, + "rus_Cyrl-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 92.38671875, + "max_sentence1_length": 233, + "min_sentence2_length": 24, + "average_sentence2_length": 114.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 52947 + }, + "eng_Latn-rwo_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 145.0625, + "max_sentence1_length": 341, + "min_sentence2_length": 85, + "average_sentence2_length": 306.8515625, + "max_sentence2_length": 871, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 115690 + }, + "rwo_Latn-eng_Latn": { + "min_sentence1_length": 85, + "average_sentence1_length": 306.8515625, + "max_sentence1_length": 871, + "min_sentence2_length": 35, + "average_sentence2_length": 145.0625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 115690 + }, + "eng_Latn-sab_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.7109375, + "max_sentence1_length": 246, + "min_sentence2_length": 29, + "average_sentence2_length": 446.453125, + "max_sentence2_length": 1781, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 143658 + }, + "sab_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 446.453125, + "max_sentence1_length": 1781, + "min_sentence2_length": 24, + "average_sentence2_length": 114.7109375, + "max_sentence2_length": 246, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 143658 + }, + "eng_Latn-san_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 2, + "average_sentence2_length": 159.125, + "max_sentence2_length": 429, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77941 + }, + "san_Latn-eng_Latn": { + "min_sentence1_length": 2, + "average_sentence1_length": 159.125, + "max_sentence1_length": 429, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77941 + }, + "eng_Latn-sbe_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.0546875, + "max_sentence1_length": 239, + "min_sentence2_length": 31, + "average_sentence2_length": 179.20703125, + "max_sentence2_length": 666, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74051 + }, + "sbe_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 179.20703125, + "max_sentence1_length": 666, + "min_sentence2_length": 24, + "average_sentence2_length": 110.0546875, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74051 + }, + "eng_Latn-sbk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.32421875, + "max_sentence1_length": 827, + "min_sentence2_length": 17, + "average_sentence2_length": 112.703125, + "max_sentence2_length": 250, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58375 + }, + "sbk_Latn-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 112.703125, + "max_sentence1_length": 250, + "min_sentence2_length": 24, + "average_sentence2_length": 115.32421875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58375 + }, + "eng_Latn-sbs_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.984375, + "max_sentence1_length": 237, + "min_sentence2_length": 23, + "average_sentence2_length": 117.47265625, + "max_sentence2_length": 434, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58485 + }, + "sbs_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 117.47265625, + "max_sentence1_length": 434, + "min_sentence2_length": 24, + "average_sentence2_length": 110.984375, + "max_sentence2_length": 237, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58485 + }, + "eng_Latn-seh_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 109.953125, + "max_sentence1_length": 257, + "min_sentence2_length": 28, + "average_sentence2_length": 128.59765625, + "max_sentence2_length": 298, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61069 + }, + "seh_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 128.59765625, + "max_sentence1_length": 298, + "min_sentence2_length": 37, + "average_sentence2_length": 109.953125, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61069 + }, + "eng_Latn-sey_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.765625, + "max_sentence1_length": 827, + "min_sentence2_length": 34, + "average_sentence2_length": 168.8046875, + "max_sentence2_length": 483, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72594 + }, + "sey_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 168.8046875, + "max_sentence1_length": 483, + "min_sentence2_length": 24, + "average_sentence2_length": 114.765625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72594 + }, + "eng_Latn-sgb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.53515625, + "max_sentence1_length": 227, + "min_sentence2_length": 38, + "average_sentence2_length": 173.15234375, + "max_sentence2_length": 574, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72880 + }, + "sgb_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 173.15234375, + "max_sentence1_length": 574, + "min_sentence2_length": 24, + "average_sentence2_length": 111.53515625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72880 + }, + "eng_Latn-sgz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.1171875, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 245.6015625, + "max_sentence2_length": 964, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90808 + }, + "sgz_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 245.6015625, + "max_sentence1_length": 964, + "min_sentence2_length": 24, + "average_sentence2_length": 109.1171875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90808 + }, + "eng_Latn-shj_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 108.34065934065934, + "max_sentence1_length": 245, + "min_sentence2_length": 25, + "average_sentence2_length": 114.28571428571429, + "max_sentence2_length": 254, + "num_samples": 91, + "num_samples_sentence2": 91, + "number_of_characters": 20259 + }, + "shj_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 114.28571428571429, + "max_sentence1_length": 254, + "min_sentence2_length": 37, + "average_sentence2_length": 108.34065934065934, + "max_sentence2_length": 245, + "num_samples": 91, + "num_samples_sentence2": 91, + "number_of_characters": 20259 + }, + "eng_Latn-shp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 35, + "average_sentence2_length": 150.4765625, + "max_sentence2_length": 322, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67295 + }, + "shp_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 150.4765625, + "max_sentence1_length": 322, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67295 + }, + "eng_Latn-sim_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.4453125, + "max_sentence1_length": 238, + "min_sentence2_length": 47, + "average_sentence2_length": 246.4921875, + "max_sentence2_length": 975, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92400 + }, + "sim_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 246.4921875, + "max_sentence1_length": 975, + "min_sentence2_length": 31, + "average_sentence2_length": 114.4453125, + "max_sentence2_length": 238, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92400 + }, + "eng_Latn-sja_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.34765625, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 213.08984375, + "max_sentence2_length": 513, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84080 + }, + "sja_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 213.08984375, + "max_sentence1_length": 513, + "min_sentence2_length": 24, + "average_sentence2_length": 115.34765625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84080 + }, + "eng_Latn-sll_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.078125, + "max_sentence1_length": 227, + "min_sentence2_length": 33, + "average_sentence2_length": 220.609375, + "max_sentence2_length": 1080, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84656 + }, + "sll_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 220.609375, + "max_sentence1_length": 1080, + "min_sentence2_length": 24, + "average_sentence2_length": 110.078125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84656 + }, + "eng_Latn-smk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.5078125, + "max_sentence1_length": 827, + "min_sentence2_length": 42, + "average_sentence2_length": 203.859375, + "max_sentence2_length": 567, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81502 + }, + "smk_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 203.859375, + "max_sentence1_length": 567, + "min_sentence2_length": 24, + "average_sentence2_length": 114.5078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81502 + }, + "eng_Latn-snc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.4921875, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 179.6328125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75296 + }, + "snc_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 179.6328125, + "max_sentence1_length": 827, + "min_sentence2_length": 24, + "average_sentence2_length": 114.4921875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75296 + }, + "eng_Latn-snn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.234375, + "max_sentence1_length": 233, + "min_sentence2_length": 36, + "average_sentence2_length": 254.91796875, + "max_sentence2_length": 841, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93479 + }, + "snn_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 254.91796875, + "max_sentence1_length": 841, + "min_sentence2_length": 24, + "average_sentence2_length": 110.234375, + "max_sentence2_length": 233, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93479 + }, + "eng_Latn-snp_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 49, + "average_sentence2_length": 219.875, + "max_sentence2_length": 619, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93493 + }, + "snp_Latn-eng_Latn": { + "min_sentence1_length": 49, + "average_sentence1_length": 219.875, + "max_sentence1_length": 619, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93493 + }, + "eng_Latn-snx_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 108.87857142857143, + "max_sentence1_length": 257, + "min_sentence2_length": 42, + "average_sentence2_length": 132.65, + "max_sentence2_length": 313, + "num_samples": 140, + "num_samples_sentence2": 140, + "number_of_characters": 33814 + }, + "snx_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 132.65, + "max_sentence1_length": 313, + "min_sentence2_length": 39, + "average_sentence2_length": 108.87857142857143, + "max_sentence2_length": 257, + "num_samples": 140, + "num_samples_sentence2": 140, + "number_of_characters": 33814 + }, + "eng_Latn-sny_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.15625, + "max_sentence1_length": 227, + "min_sentence2_length": 44, + "average_sentence2_length": 317.09765625, + "max_sentence2_length": 1158, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 109633 + }, + "sny_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 317.09765625, + "max_sentence1_length": 1158, + "min_sentence2_length": 24, + "average_sentence2_length": 111.15625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 109633 + }, + "eng_Latn-som_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 18, + "average_sentence2_length": 125.13671875, + "max_sentence2_length": 311, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61570 + }, + "som_Latn-eng_Latn": { + "min_sentence1_length": 18, + "average_sentence1_length": 125.13671875, + "max_sentence1_length": 311, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61570 + }, + "eng_Latn-soq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.28515625, + "max_sentence1_length": 227, + "min_sentence2_length": 29, + "average_sentence2_length": 215.83203125, + "max_sentence2_length": 727, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83230 + }, + "soq_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 215.83203125, + "max_sentence1_length": 727, + "min_sentence2_length": 24, + "average_sentence2_length": 109.28515625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83230 + }, + "eng_Latn-soy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.36328125, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 136.3359375, + "max_sentence2_length": 317, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64435 + }, + "soy_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 136.3359375, + "max_sentence1_length": 317, + "min_sentence2_length": 24, + "average_sentence2_length": 115.36328125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64435 + }, + "eng_Latn-spa_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 50, + "average_sentence2_length": 146.875, + "max_sentence2_length": 371, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75375 + }, + "spa_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 146.875, + "max_sentence1_length": 371, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75375 + }, + "eng_Latn-spl_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 112.36328125, + "max_sentence1_length": 265, + "min_sentence2_length": 47, + "average_sentence2_length": 401.6015625, + "max_sentence2_length": 1260, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 131575 + }, + "spl_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 401.6015625, + "max_sentence1_length": 1260, + "min_sentence2_length": 25, + "average_sentence2_length": 112.36328125, + "max_sentence2_length": 265, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 131575 + }, + "eng_Latn-spm_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 114.765625, + "max_sentence1_length": 257, + "min_sentence2_length": 41, + "average_sentence2_length": 219.171875, + "max_sentence2_length": 596, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85488 + }, + "spm_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 219.171875, + "max_sentence1_length": 596, + "min_sentence2_length": 38, + "average_sentence2_length": 114.765625, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85488 + }, + "eng_Latn-spp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.95703125, + "max_sentence1_length": 227, + "min_sentence2_length": 31, + "average_sentence2_length": 167.06640625, + "max_sentence2_length": 601, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71174 + }, + "spp_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 167.06640625, + "max_sentence1_length": 601, + "min_sentence2_length": 24, + "average_sentence2_length": 110.95703125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71174 + }, + "eng_Latn-sps_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.609375, + "max_sentence1_length": 827, + "min_sentence2_length": 38, + "average_sentence2_length": 242.5859375, + "max_sentence2_length": 789, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91698 + }, + "sps_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 242.5859375, + "max_sentence1_length": 789, + "min_sentence2_length": 24, + "average_sentence2_length": 115.609375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91698 + }, + "eng_Latn-spy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4140625, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 145.1640625, + "max_sentence2_length": 398, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66196 + }, + "spy_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 145.1640625, + "max_sentence1_length": 398, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4140625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66196 + }, + "eng_Latn-sri_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.62109375, + "max_sentence1_length": 227, + "min_sentence2_length": 29, + "average_sentence2_length": 208.41015625, + "max_sentence2_length": 543, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81672 + }, + "sri_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 208.41015625, + "max_sentence1_length": 543, + "min_sentence2_length": 24, + "average_sentence2_length": 110.62109375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81672 + }, + "eng_Latn-srm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.73046875, + "max_sentence1_length": 227, + "min_sentence2_length": 39, + "average_sentence2_length": 250.09765625, + "max_sentence2_length": 997, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92372 + }, + "srm_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 250.09765625, + "max_sentence1_length": 997, + "min_sentence2_length": 24, + "average_sentence2_length": 110.73046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92372 + }, + "eng_Latn-srn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.71484375, + "max_sentence1_length": 827, + "min_sentence2_length": 34, + "average_sentence2_length": 131.99609375, + "max_sentence2_length": 315, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63158 + }, + "srn_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 131.99609375, + "max_sentence1_length": 315, + "min_sentence2_length": 24, + "average_sentence2_length": 114.71484375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63158 + }, + "eng_Latn-srp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.67578125, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 89.53515625, + "max_sentence2_length": 220, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 52022 + }, + "srp_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 89.53515625, + "max_sentence1_length": 220, + "min_sentence2_length": 24, + "average_sentence2_length": 113.67578125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 52022 + }, + "eng_Latn-srq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.078125, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 165.5703125, + "max_sentence2_length": 532, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71590 + }, + "srq_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 165.5703125, + "max_sentence1_length": 532, + "min_sentence2_length": 24, + "average_sentence2_length": 114.078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71590 + }, + "eng_Latn-ssd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.75, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 174.31640625, + "max_sentence2_length": 502, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72465 + }, + "ssd_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 174.31640625, + "max_sentence1_length": 502, + "min_sentence2_length": 24, + "average_sentence2_length": 108.75, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72465 + }, + "eng_Latn-ssg_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.9296875, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 164.51171875, + "max_sentence2_length": 461, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71281 + }, + "ssg_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 164.51171875, + "max_sentence1_length": 461, + "min_sentence2_length": 24, + "average_sentence2_length": 113.9296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71281 + }, + "eng_Latn-ssx_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 116.21875, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 213.1640625, + "max_sentence2_length": 493, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84322 + }, + "ssx_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 213.1640625, + "max_sentence1_length": 493, + "min_sentence2_length": 24, + "average_sentence2_length": 116.21875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84322 + }, + "eng_Latn-stp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.640625, + "max_sentence1_length": 251, + "min_sentence2_length": 46, + "average_sentence2_length": 279.2734375, + "max_sentence2_length": 743, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99562 + }, + "stp_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 279.2734375, + "max_sentence1_length": 743, + "min_sentence2_length": 24, + "average_sentence2_length": 109.640625, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99562 + }, + "eng_Latn-sua_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 109.33984375, + "max_sentence1_length": 227, + "min_sentence2_length": 53, + "average_sentence2_length": 425.78515625, + "max_sentence2_length": 1832, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 136992 + }, + "sua_Latn-eng_Latn": { + "min_sentence1_length": 53, + "average_sentence1_length": 425.78515625, + "max_sentence1_length": 1832, + "min_sentence2_length": 25, + "average_sentence2_length": 109.33984375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 136992 + }, + "eng_Latn-sue_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.70703125, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 170.77734375, + "max_sentence2_length": 777, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73084 + }, + "sue_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 170.77734375, + "max_sentence1_length": 777, + "min_sentence2_length": 24, + "average_sentence2_length": 114.70703125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73084 + }, + "eng_Latn-sus_Arab": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 27, + "average_sentence2_length": 147.64453125, + "max_sentence2_length": 435, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75002 + }, + "sus_Arab-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 147.64453125, + "max_sentence1_length": 435, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75002 + }, + "eng_Latn-suz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 26, + "average_sentence2_length": 150.5703125, + "max_sentence2_length": 456, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67319 + }, + "suz_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 150.5703125, + "max_sentence1_length": 456, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67319 + }, + "eng_Latn-swe_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 33, + "average_sentence2_length": 274.54296875, + "max_sentence2_length": 1575, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99056 + }, + "swe_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 274.54296875, + "max_sentence1_length": 1575, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99056 + }, + "eng_Latn-swh_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 43, + "average_sentence2_length": 152.96484375, + "max_sentence2_length": 378, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76934 + }, + "swh_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 152.96484375, + "max_sentence1_length": 378, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76934 + }, + "eng_Latn-swp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.3125, + "max_sentence1_length": 227, + "min_sentence2_length": 24, + "average_sentence2_length": 129.51953125, + "max_sentence2_length": 378, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61397 + }, + "swp_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 129.51953125, + "max_sentence1_length": 378, + "min_sentence2_length": 24, + "average_sentence2_length": 110.3125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61397 + }, + "eng_Latn-sxb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.4453125, + "max_sentence1_length": 227, + "min_sentence2_length": 27, + "average_sentence2_length": 154.4921875, + "max_sentence2_length": 393, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67824 + }, + "sxb_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 154.4921875, + "max_sentence1_length": 393, + "min_sentence2_length": 24, + "average_sentence2_length": 110.4453125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67824 + }, + "eng_Latn-tac_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.203125, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 224.921875, + "max_sentence2_length": 673, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85792 + }, + "tac_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 224.921875, + "max_sentence1_length": 673, + "min_sentence2_length": 24, + "average_sentence2_length": 110.203125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85792 + }, + "eng_Latn-taj_Deva": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.34375, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 151.29296875, + "max_sentence2_length": 392, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67747 + }, + "taj_Deva-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 151.29296875, + "max_sentence1_length": 392, + "min_sentence2_length": 24, + "average_sentence2_length": 113.34375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67747 + }, + "eng_Latn-tam_Taml": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.296875, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 135.61328125, + "max_sentence2_length": 304, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64233 + }, + "tam_Taml-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 135.61328125, + "max_sentence1_length": 304, + "min_sentence2_length": 24, + "average_sentence2_length": 115.296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64233 + }, + "eng_Latn-tav_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.55859375, + "max_sentence1_length": 230, + "min_sentence2_length": 37, + "average_sentence2_length": 326.8125, + "max_sentence2_length": 1637, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 112479 + }, + "tav_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 326.8125, + "max_sentence1_length": 1637, + "min_sentence2_length": 24, + "average_sentence2_length": 112.55859375, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 112479 + }, + "eng_Latn-taw_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.49609375, + "max_sentence1_length": 227, + "min_sentence2_length": 41, + "average_sentence2_length": 227.09375, + "max_sentence2_length": 647, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86679 + }, + "taw_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 227.09375, + "max_sentence1_length": 647, + "min_sentence2_length": 31, + "average_sentence2_length": 111.49609375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86679 + }, + "eng_Latn-tbc_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 113.28515625, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 195.09765625, + "max_sentence2_length": 552, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78946 + }, + "tbc_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 195.09765625, + "max_sentence1_length": 552, + "min_sentence2_length": 21, + "average_sentence2_length": 113.28515625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78946 + }, + "eng_Latn-tbf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.50390625, + "max_sentence1_length": 227, + "min_sentence2_length": 35, + "average_sentence2_length": 170.6328125, + "max_sentence2_length": 527, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71971 + }, + "tbf_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 170.6328125, + "max_sentence1_length": 527, + "min_sentence2_length": 24, + "average_sentence2_length": 110.50390625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71971 + }, + "eng_Latn-tbg_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 105.53515625, + "max_sentence1_length": 217, + "min_sentence2_length": 18, + "average_sentence2_length": 176.765625, + "max_sentence2_length": 557, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72269 + }, + "tbg_Latn-eng_Latn": { + "min_sentence1_length": 18, + "average_sentence1_length": 176.765625, + "max_sentence1_length": 557, + "min_sentence2_length": 29, + "average_sentence2_length": 105.53515625, + "max_sentence2_length": 217, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72269 + }, + "eng_Latn-tbo_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.03125, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 147.125, + "max_sentence2_length": 495, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66856 + }, + "tbo_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 147.125, + "max_sentence1_length": 495, + "min_sentence2_length": 31, + "average_sentence2_length": 114.03125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66856 + }, + "eng_Latn-tbz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.125, + "max_sentence1_length": 227, + "min_sentence2_length": 30, + "average_sentence2_length": 107.83203125, + "max_sentence2_length": 275, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 55797 + }, + "tbz_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 107.83203125, + "max_sentence1_length": 275, + "min_sentence2_length": 24, + "average_sentence2_length": 110.125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 55797 + }, + "eng_Latn-tca_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.3046875, + "max_sentence1_length": 227, + "min_sentence2_length": 40, + "average_sentence2_length": 274.484375, + "max_sentence2_length": 1003, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99018 + }, + "tca_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 274.484375, + "max_sentence1_length": 1003, + "min_sentence2_length": 24, + "average_sentence2_length": 112.3046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99018 + }, + "eng_Latn-tcs_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.69921875, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 175.640625, + "max_sentence2_length": 559, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73303 + }, + "tcs_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 175.640625, + "max_sentence1_length": 559, + "min_sentence2_length": 24, + "average_sentence2_length": 110.69921875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73303 + }, + "eng_Latn-tcz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 30, + "average_sentence2_length": 141.09375, + "max_sentence2_length": 363, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64893 + }, + "tcz_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 141.09375, + "max_sentence1_length": 363, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64893 + }, + "eng_Latn-tdt_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 115.015625, + "max_sentence1_length": 827, + "min_sentence2_length": 33, + "average_sentence2_length": 168.8203125, + "max_sentence2_length": 617, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72662 + }, + "tdt_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 168.8203125, + "max_sentence1_length": 617, + "min_sentence2_length": 31, + "average_sentence2_length": 115.015625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72662 + }, + "eng_Latn-tee_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.37890625, + "max_sentence1_length": 827, + "min_sentence2_length": 46, + "average_sentence2_length": 231.21484375, + "max_sentence2_length": 533, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88472 + }, + "tee_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 231.21484375, + "max_sentence1_length": 533, + "min_sentence2_length": 24, + "average_sentence2_length": 114.37890625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88472 + }, + "eng_Latn-tel_Telu": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.12109375, + "max_sentence1_length": 227, + "min_sentence2_length": 24, + "average_sentence2_length": 120.203125, + "max_sentence2_length": 369, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58963 + }, + "tel_Telu-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 120.203125, + "max_sentence1_length": 369, + "min_sentence2_length": 24, + "average_sentence2_length": 110.12109375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58963 + }, + "eng_Latn-ter_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 54, + "average_sentence2_length": 202.0625, + "max_sentence2_length": 599, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80501 + }, + "ter_Latn-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 202.0625, + "max_sentence1_length": 599, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80501 + }, + "eng_Latn-tet_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 113.78125, + "max_sentence1_length": 251, + "min_sentence2_length": 36, + "average_sentence2_length": 232.72265625, + "max_sentence2_length": 1185, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88705 + }, + "tet_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 232.72265625, + "max_sentence1_length": 1185, + "min_sentence2_length": 31, + "average_sentence2_length": 113.78125, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 88705 + }, + "eng_Latn-tew_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.140625, + "max_sentence1_length": 239, + "min_sentence2_length": 39, + "average_sentence2_length": 210.421875, + "max_sentence2_length": 585, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82320 + }, + "tew_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 210.421875, + "max_sentence1_length": 585, + "min_sentence2_length": 24, + "average_sentence2_length": 111.140625, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82320 + }, + "eng_Latn-tfr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.41796875, + "max_sentence1_length": 235, + "min_sentence2_length": 32, + "average_sentence2_length": 224.50390625, + "max_sentence2_length": 717, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85996 + }, + "tfr_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 224.50390625, + "max_sentence1_length": 717, + "min_sentence2_length": 24, + "average_sentence2_length": 111.41796875, + "max_sentence2_length": 235, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85996 + }, + "eng_Latn-tgk_Cyrl": { + "min_sentence1_length": 32, + "average_sentence1_length": 104.37109375, + "max_sentence1_length": 245, + "min_sentence2_length": 31, + "average_sentence2_length": 121.40234375, + "max_sentence2_length": 360, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57798 + }, + "tgk_Cyrl-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 121.40234375, + "max_sentence1_length": 360, + "min_sentence2_length": 32, + "average_sentence2_length": 104.37109375, + "max_sentence2_length": 245, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57798 + }, + "eng_Latn-tgl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 29, + "average_sentence2_length": 145.71875, + "max_sentence2_length": 357, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66077 + }, + "tgl_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 145.71875, + "max_sentence1_length": 357, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66077 + }, + "eng_Latn-tgo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.2109375, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 177.0, + "max_sentence2_length": 472, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73526 + }, + "tgo_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 177.0, + "max_sentence1_length": 472, + "min_sentence2_length": 24, + "average_sentence2_length": 110.2109375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73526 + }, + "eng_Latn-tgp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.50390625, + "max_sentence1_length": 227, + "min_sentence2_length": 33, + "average_sentence2_length": 157.234375, + "max_sentence2_length": 609, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68797 + }, + "tgp_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 157.234375, + "max_sentence1_length": 609, + "min_sentence2_length": 24, + "average_sentence2_length": 111.50390625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 68797 + }, + "eng_Latn-tha_Thai": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 16, + "average_sentence2_length": 123.1015625, + "max_sentence2_length": 312, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60554 + }, + "tha_Thai-eng_Latn": { + "min_sentence1_length": 16, + "average_sentence1_length": 123.1015625, + "max_sentence1_length": 312, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60554 + }, + "eng_Latn-tif_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.62890625, + "max_sentence1_length": 230, + "min_sentence2_length": 42, + "average_sentence2_length": 404.69921875, + "max_sentence2_length": 1636, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 132436 + }, + "tif_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 404.69921875, + "max_sentence1_length": 1636, + "min_sentence2_length": 24, + "average_sentence2_length": 112.62890625, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 132436 + }, + "eng_Latn-tim_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.66796875, + "max_sentence1_length": 227, + "min_sentence2_length": 42, + "average_sentence2_length": 202.26953125, + "max_sentence2_length": 868, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79600 + }, + "tim_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 202.26953125, + "max_sentence1_length": 868, + "min_sentence2_length": 24, + "average_sentence2_length": 108.66796875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79600 + }, + "eng_Latn-tiw_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 103.73023255813953, + "max_sentence1_length": 257, + "min_sentence2_length": 95, + "average_sentence2_length": 703.9720930232559, + "max_sentence2_length": 2787, + "num_samples": 215, + "num_samples_sentence2": 215, + "number_of_characters": 173656 + }, + "tiw_Latn-eng_Latn": { + "min_sentence1_length": 95, + "average_sentence1_length": 703.9720930232559, + "max_sentence1_length": 2787, + "min_sentence2_length": 40, + "average_sentence2_length": 103.73023255813953, + "max_sentence2_length": 257, + "num_samples": 215, + "num_samples_sentence2": 215, + "number_of_characters": 173656 + }, + "eng_Latn-tiy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.0859375, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 173.453125, + "max_sentence2_length": 679, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73610 + }, + "tiy_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 173.453125, + "max_sentence1_length": 679, + "min_sentence2_length": 24, + "average_sentence2_length": 114.0859375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73610 + }, + "eng_Latn-tke_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 107.67391304347827, + "max_sentence1_length": 245, + "min_sentence2_length": 31, + "average_sentence2_length": 110.21739130434783, + "max_sentence2_length": 212, + "num_samples": 92, + "num_samples_sentence2": 92, + "number_of_characters": 20046 + }, + "tke_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 110.21739130434783, + "max_sentence1_length": 212, + "min_sentence2_length": 37, + "average_sentence2_length": 107.67391304347827, + "max_sentence2_length": 245, + "num_samples": 92, + "num_samples_sentence2": 92, + "number_of_characters": 20046 + }, + "eng_Latn-tku_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.55078125, + "max_sentence1_length": 827, + "min_sentence2_length": 28, + "average_sentence2_length": 174.171875, + "max_sentence2_length": 415, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74169 + }, + "tku_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 174.171875, + "max_sentence1_length": 415, + "min_sentence2_length": 24, + "average_sentence2_length": 115.55078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74169 + }, + "eng_Latn-tlf_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.07421875, + "max_sentence1_length": 230, + "min_sentence2_length": 42, + "average_sentence2_length": 383.2734375, + "max_sentence2_length": 1527, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 126553 + }, + "tlf_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 383.2734375, + "max_sentence1_length": 1527, + "min_sentence2_length": 24, + "average_sentence2_length": 111.07421875, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 126553 + }, + "eng_Latn-tmd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.16015625, + "max_sentence1_length": 227, + "min_sentence2_length": 27, + "average_sentence2_length": 218.640625, + "max_sentence2_length": 702, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83917 + }, + "tmd_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 218.640625, + "max_sentence1_length": 702, + "min_sentence2_length": 24, + "average_sentence2_length": 109.16015625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 83917 + }, + "eng_Latn-tna_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 109.80859375, + "max_sentence1_length": 227, + "min_sentence2_length": 37, + "average_sentence2_length": 208.0234375, + "max_sentence2_length": 741, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81365 + }, + "tna_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 208.0234375, + "max_sentence1_length": 741, + "min_sentence2_length": 21, + "average_sentence2_length": 109.80859375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81365 + }, + "eng_Latn-tnc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.6015625, + "max_sentence1_length": 263, + "min_sentence2_length": 46, + "average_sentence2_length": 302.96484375, + "max_sentence2_length": 899, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106641 + }, + "tnc_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 302.96484375, + "max_sentence1_length": 899, + "min_sentence2_length": 24, + "average_sentence2_length": 113.6015625, + "max_sentence2_length": 263, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 106641 + }, + "eng_Latn-tnk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.58203125, + "max_sentence1_length": 227, + "min_sentence2_length": 35, + "average_sentence2_length": 195.19140625, + "max_sentence2_length": 547, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78022 + }, + "tnk_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 195.19140625, + "max_sentence1_length": 547, + "min_sentence2_length": 24, + "average_sentence2_length": 109.58203125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78022 + }, + "eng_Latn-tnn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.359375, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 203.25390625, + "max_sentence2_length": 1039, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81309 + }, + "tnn_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 203.25390625, + "max_sentence1_length": 1039, + "min_sentence2_length": 24, + "average_sentence2_length": 114.359375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81309 + }, + "eng_Latn-tnp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.2734375, + "max_sentence1_length": 827, + "min_sentence2_length": 33, + "average_sentence2_length": 214.32421875, + "max_sentence2_length": 588, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84121 + }, + "tnp_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 214.32421875, + "max_sentence1_length": 588, + "min_sentence2_length": 24, + "average_sentence2_length": 114.2734375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84121 + }, + "eng_Latn-toc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.421875, + "max_sentence1_length": 227, + "min_sentence2_length": 54, + "average_sentence2_length": 287.953125, + "max_sentence2_length": 808, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101728 + }, + "toc_Latn-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 287.953125, + "max_sentence1_length": 808, + "min_sentence2_length": 24, + "average_sentence2_length": 109.421875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101728 + }, + "eng_Latn-tod_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 123.76171875, + "max_sentence1_length": 306, + "min_sentence2_length": 17, + "average_sentence2_length": 124.19140625, + "max_sentence2_length": 293, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63476 + }, + "tod_Latn-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 124.19140625, + "max_sentence1_length": 293, + "min_sentence2_length": 24, + "average_sentence2_length": 123.76171875, + "max_sentence2_length": 306, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63476 + }, + "eng_Latn-tof_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.203125, + "max_sentence1_length": 827, + "min_sentence2_length": 46, + "average_sentence2_length": 199.18359375, + "max_sentence2_length": 546, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80483 + }, + "tof_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 199.18359375, + "max_sentence1_length": 546, + "min_sentence2_length": 24, + "average_sentence2_length": 115.203125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80483 + }, + "eng_Latn-toj_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 108.55078125, + "max_sentence1_length": 227, + "min_sentence2_length": 32, + "average_sentence2_length": 245.6171875, + "max_sentence2_length": 816, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90667 + }, + "toj_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 245.6171875, + "max_sentence1_length": 816, + "min_sentence2_length": 31, + "average_sentence2_length": 108.55078125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90667 + }, + "eng_Latn-ton_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 134.453125, + "max_sentence2_length": 302, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63460 + }, + "ton_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 134.453125, + "max_sentence1_length": 302, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63460 + }, + "eng_Latn-too_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.55078125, + "max_sentence1_length": 827, + "min_sentence2_length": 33, + "average_sentence2_length": 196.1015625, + "max_sentence2_length": 458, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79783 + }, + "too_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 196.1015625, + "max_sentence1_length": 458, + "min_sentence2_length": 24, + "average_sentence2_length": 115.55078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79783 + }, + "eng_Latn-top_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.54296875, + "max_sentence1_length": 827, + "min_sentence2_length": 41, + "average_sentence2_length": 182.8671875, + "max_sentence2_length": 628, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75881 + }, + "top_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 182.8671875, + "max_sentence1_length": 628, + "min_sentence2_length": 24, + "average_sentence2_length": 113.54296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75881 + }, + "eng_Latn-tos_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.44140625, + "max_sentence1_length": 827, + "min_sentence2_length": 60, + "average_sentence2_length": 288.21484375, + "max_sentence2_length": 845, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102824 + }, + "tos_Latn-eng_Latn": { + "min_sentence1_length": 60, + "average_sentence1_length": 288.21484375, + "max_sentence1_length": 845, + "min_sentence2_length": 24, + "average_sentence2_length": 113.44140625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102824 + }, + "eng_Latn-tpa_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 110.51063829787235, + "max_sentence1_length": 257, + "min_sentence2_length": 47, + "average_sentence2_length": 150.54609929078015, + "max_sentence2_length": 540, + "num_samples": 141, + "num_samples_sentence2": 141, + "number_of_characters": 36809 + }, + "tpa_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 150.54609929078015, + "max_sentence1_length": 540, + "min_sentence2_length": 39, + "average_sentence2_length": 110.51063829787235, + "max_sentence2_length": 257, + "num_samples": 141, + "num_samples_sentence2": 141, + "number_of_characters": 36809 + }, + "eng_Latn-tpi_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 138.0859375, + "max_sentence1_length": 341, + "min_sentence2_length": 46, + "average_sentence2_length": 244.546875, + "max_sentence2_length": 2947, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97954 + }, + "tpi_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 244.546875, + "max_sentence1_length": 2947, + "min_sentence2_length": 1, + "average_sentence2_length": 138.0859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97954 + }, + "eng_Latn-tpt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 46, + "average_sentence2_length": 163.7890625, + "max_sentence2_length": 441, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71465 + }, + "tpt_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 163.7890625, + "max_sentence1_length": 441, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71465 + }, + "eng_Latn-tpz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.09765625, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 222.984375, + "max_sentence2_length": 780, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85269 + }, + "tpz_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 222.984375, + "max_sentence1_length": 780, + "min_sentence2_length": 24, + "average_sentence2_length": 110.09765625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85269 + }, + "eng_Latn-trc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.23046875, + "max_sentence1_length": 376, + "min_sentence2_length": 46, + "average_sentence2_length": 270.44921875, + "max_sentence2_length": 859, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97710 + }, + "trc_Latn-eng_Latn": { + "min_sentence1_length": 46, + "average_sentence1_length": 270.44921875, + "max_sentence1_length": 859, + "min_sentence2_length": 24, + "average_sentence2_length": 111.23046875, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 97710 + }, + "eng_Latn-tsw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.4765625, + "max_sentence1_length": 227, + "min_sentence2_length": 27, + "average_sentence2_length": 184.984375, + "max_sentence2_length": 1078, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75638 + }, + "tsw_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 184.984375, + "max_sentence1_length": 1078, + "min_sentence2_length": 24, + "average_sentence2_length": 110.4765625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75638 + }, + "eng_Latn-ttc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 30, + "average_sentence2_length": 172.53515625, + "max_sentence2_length": 410, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72942 + }, + "ttc_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 172.53515625, + "max_sentence1_length": 410, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72942 + }, + "eng_Latn-tte_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.7734375, + "max_sentence1_length": 239, + "min_sentence2_length": 25, + "average_sentence2_length": 233.83203125, + "max_sentence2_length": 973, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87707 + }, + "tte_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 233.83203125, + "max_sentence1_length": 973, + "min_sentence2_length": 24, + "average_sentence2_length": 108.7734375, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87707 + }, + "eng_Latn-tuc_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 146.65625, + "max_sentence1_length": 341, + "min_sentence2_length": 32, + "average_sentence2_length": 221.24609375, + "max_sentence2_length": 533, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94183 + }, + "tuc_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 221.24609375, + "max_sentence1_length": 533, + "min_sentence2_length": 1, + "average_sentence2_length": 146.65625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 94183 + }, + "eng_Latn-tue_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.07421875, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 179.30859375, + "max_sentence2_length": 980, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74850 + }, + "tue_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 179.30859375, + "max_sentence1_length": 980, + "min_sentence2_length": 24, + "average_sentence2_length": 113.07421875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74850 + }, + "eng_Latn-tuf_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.47265625, + "max_sentence1_length": 827, + "min_sentence2_length": 23, + "average_sentence2_length": 208.703125, + "max_sentence2_length": 628, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82733 + }, + "tuf_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 208.703125, + "max_sentence1_length": 628, + "min_sentence2_length": 31, + "average_sentence2_length": 114.47265625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82733 + }, + "eng_Latn-tuo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.75390625, + "max_sentence1_length": 230, + "min_sentence2_length": 29, + "average_sentence2_length": 221.515625, + "max_sentence2_length": 945, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85317 + }, + "tuo_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 221.515625, + "max_sentence1_length": 945, + "min_sentence2_length": 24, + "average_sentence2_length": 111.75390625, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85317 + }, + "eng_Latn-tur_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.5703125, + "max_sentence1_length": 225, + "min_sentence2_length": 18, + "average_sentence2_length": 156.30859375, + "max_sentence2_length": 448, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67809 + }, + "tur_Latn-eng_Latn": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.30859375, + "max_sentence1_length": 448, + "min_sentence2_length": 24, + "average_sentence2_length": 108.5703125, + "max_sentence2_length": 225, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67809 + }, + "eng_Latn-tvk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.1484375, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 193.80859375, + "max_sentence2_length": 500, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79093 + }, + "tvk_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 193.80859375, + "max_sentence1_length": 500, + "min_sentence2_length": 24, + "average_sentence2_length": 115.1484375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79093 + }, + "eng_Latn-twi_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 21, + "average_sentence2_length": 108.4921875, + "max_sentence2_length": 240, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56547 + }, + "twi_Latn-eng_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 108.4921875, + "max_sentence1_length": 240, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 56547 + }, + "eng_Latn-txq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.76953125, + "max_sentence1_length": 251, + "min_sentence2_length": 37, + "average_sentence2_length": 293.578125, + "max_sentence2_length": 1375, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104537 + }, + "txq_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 293.578125, + "max_sentence1_length": 1375, + "min_sentence2_length": 24, + "average_sentence2_length": 114.76953125, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104537 + }, + "eng_Latn-txu_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.9375, + "max_sentence1_length": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 362.140625, + "max_sentence2_length": 1373, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 121364 + }, + "txu_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 362.140625, + "max_sentence1_length": 1373, + "min_sentence2_length": 31, + "average_sentence2_length": 111.9375, + "max_sentence2_length": 256, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 121364 + }, + "eng_Latn-tzj_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 146.40625, + "max_sentence1_length": 341, + "min_sentence2_length": 58, + "average_sentence2_length": 246.171875, + "max_sentence2_length": 687, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100500 + }, + "tzj_Latn-eng_Latn": { + "min_sentence1_length": 58, + "average_sentence1_length": 246.171875, + "max_sentence1_length": 687, + "min_sentence2_length": 35, + "average_sentence2_length": 146.40625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 100500 + }, + "eng_Latn-tzo_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 148.25390625, + "max_sentence1_length": 341, + "min_sentence2_length": 54, + "average_sentence2_length": 223.5625, + "max_sentence2_length": 965, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 95185 + }, + "tzo_Latn-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 223.5625, + "max_sentence1_length": 965, + "min_sentence2_length": 35, + "average_sentence2_length": 148.25390625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 95185 + }, + "eng_Latn-ubr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.6875, + "max_sentence1_length": 227, + "min_sentence2_length": 27, + "average_sentence2_length": 179.2109375, + "max_sentence2_length": 462, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73958 + }, + "ubr_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 179.2109375, + "max_sentence1_length": 462, + "min_sentence2_length": 24, + "average_sentence2_length": 109.6875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73958 + }, + "eng_Latn-ubu_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 146.6953125, + "max_sentence1_length": 341, + "min_sentence2_length": 47, + "average_sentence2_length": 266.8984375, + "max_sentence2_length": 858, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 105880 + }, + "ubu_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 266.8984375, + "max_sentence1_length": 858, + "min_sentence2_length": 1, + "average_sentence2_length": 146.6953125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 105880 + }, + "eng_Latn-udu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 24, + "average_sentence2_length": 174.4296875, + "max_sentence2_length": 525, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74189 + }, + "udu_Latn-eng_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 174.4296875, + "max_sentence1_length": 525, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74189 + }, + "eng_Latn-uig_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.046875, + "max_sentence1_length": 341, + "min_sentence2_length": 49, + "average_sentence2_length": 158.07421875, + "max_sentence2_length": 374, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78111 + }, + "uig_Latn-eng_Latn": { + "min_sentence1_length": 49, + "average_sentence1_length": 158.07421875, + "max_sentence1_length": 374, + "min_sentence2_length": 56, + "average_sentence2_length": 147.046875, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78111 + }, + "eng_Latn-ukr_Cyrl": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.78125, + "max_sentence1_length": 227, + "min_sentence2_length": 17, + "average_sentence2_length": 87.7109375, + "max_sentence2_length": 234, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 50558 + }, + "ukr_Cyrl-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 87.7109375, + "max_sentence1_length": 234, + "min_sentence2_length": 24, + "average_sentence2_length": 109.78125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 50558 + }, + "eng_Latn-uli_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 113.765625, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 156.32421875, + "max_sentence2_length": 429, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69143 + }, + "uli_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 156.32421875, + "max_sentence1_length": 429, + "min_sentence2_length": 25, + "average_sentence2_length": 113.765625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 69143 + }, + "eng_Latn-ulk_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 110.41361256544502, + "max_sentence1_length": 244, + "min_sentence2_length": 41, + "average_sentence2_length": 147.36649214659687, + "max_sentence2_length": 858, + "num_samples": 191, + "num_samples_sentence2": 191, + "number_of_characters": 49236 + }, + "ulk_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 147.36649214659687, + "max_sentence1_length": 858, + "min_sentence2_length": 39, + "average_sentence2_length": 110.41361256544502, + "max_sentence2_length": 244, + "num_samples": 191, + "num_samples_sentence2": 191, + "number_of_characters": 49236 + }, + "eng_Latn-upv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.0546875, + "max_sentence1_length": 376, + "min_sentence2_length": 36, + "average_sentence2_length": 181.41796875, + "max_sentence2_length": 584, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75129 + }, + "upv_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 181.41796875, + "max_sentence1_length": 584, + "min_sentence2_length": 24, + "average_sentence2_length": 112.0546875, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75129 + }, + "eng_Latn-ura_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.66015625, + "max_sentence1_length": 227, + "min_sentence2_length": 31, + "average_sentence2_length": 231.44140625, + "max_sentence2_length": 990, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87834 + }, + "ura_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 231.44140625, + "max_sentence1_length": 990, + "min_sentence2_length": 24, + "average_sentence2_length": 111.66015625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 87834 + }, + "eng_Latn-urb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.109375, + "max_sentence1_length": 227, + "min_sentence2_length": 31, + "average_sentence2_length": 299.421875, + "max_sentence2_length": 1080, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104840 + }, + "urb_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 299.421875, + "max_sentence1_length": 1080, + "min_sentence2_length": 24, + "average_sentence2_length": 110.109375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104840 + }, + "eng_Latn-urd_Arab": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 35, + "average_sentence2_length": 156.125, + "max_sentence2_length": 420, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77743 + }, + "urd_Arab-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 156.125, + "max_sentence1_length": 420, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77743 + }, + "eng_Latn-uri_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 112.38888888888889, + "max_sentence1_length": 245, + "min_sentence2_length": 60, + "average_sentence2_length": 212.01190476190476, + "max_sentence2_length": 618, + "num_samples": 252, + "num_samples_sentence2": 252, + "number_of_characters": 81749 + }, + "uri_Latn-eng_Latn": { + "min_sentence1_length": 60, + "average_sentence1_length": 212.01190476190476, + "max_sentence1_length": 618, + "min_sentence2_length": 38, + "average_sentence2_length": 112.38888888888889, + "max_sentence2_length": 245, + "num_samples": 252, + "num_samples_sentence2": 252, + "number_of_characters": 81749 + }, + "eng_Latn-urt_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.5703125, + "max_sentence1_length": 827, + "min_sentence2_length": 40, + "average_sentence2_length": 203.98046875, + "max_sentence2_length": 882, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81805 + }, + "urt_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 203.98046875, + "max_sentence1_length": 882, + "min_sentence2_length": 24, + "average_sentence2_length": 115.5703125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81805 + }, + "eng_Latn-urw_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 112.38888888888889, + "max_sentence1_length": 245, + "min_sentence2_length": 47, + "average_sentence2_length": 180.75555555555556, + "max_sentence2_length": 597, + "num_samples": 90, + "num_samples_sentence2": 90, + "number_of_characters": 26383 + }, + "urw_Latn-eng_Latn": { + "min_sentence1_length": 47, + "average_sentence1_length": 180.75555555555556, + "max_sentence1_length": 597, + "min_sentence2_length": 37, + "average_sentence2_length": 112.38888888888889, + "max_sentence2_length": 245, + "num_samples": 90, + "num_samples_sentence2": 90, + "number_of_characters": 26383 + }, + "eng_Latn-usa_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.59375, + "max_sentence1_length": 251, + "min_sentence2_length": 8, + "average_sentence2_length": 200.51171875, + "max_sentence2_length": 896, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79899 + }, + "usa_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 200.51171875, + "max_sentence1_length": 896, + "min_sentence2_length": 24, + "average_sentence2_length": 111.59375, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79899 + }, + "eng_Latn-usp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.296875, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 190.515625, + "max_sentence2_length": 480, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78288 + }, + "usp_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 190.515625, + "max_sentence1_length": 480, + "min_sentence2_length": 24, + "average_sentence2_length": 115.296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78288 + }, + "eng_Latn-uvh_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 109.5390625, + "max_sentence1_length": 271, + "min_sentence2_length": 54, + "average_sentence2_length": 372.28515625, + "max_sentence2_length": 1232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 123347 + }, + "uvh_Latn-eng_Latn": { + "min_sentence1_length": 54, + "average_sentence1_length": 372.28515625, + "max_sentence1_length": 1232, + "min_sentence2_length": 31, + "average_sentence2_length": 109.5390625, + "max_sentence2_length": 271, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 123347 + }, + "eng_Latn-uvl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.4609375, + "max_sentence1_length": 227, + "min_sentence2_length": 45, + "average_sentence2_length": 192.77734375, + "max_sentence2_length": 702, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77885 + }, + "uvl_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 192.77734375, + "max_sentence1_length": 702, + "min_sentence2_length": 24, + "average_sentence2_length": 111.4609375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77885 + }, + "eng_Latn-vid_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 38, + "average_sentence2_length": 138.671875, + "max_sentence2_length": 305, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64273 + }, + "vid_Latn-eng_Latn": { + "min_sentence1_length": 38, + "average_sentence1_length": 138.671875, + "max_sentence1_length": 305, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64273 + }, + "eng_Latn-vie_Latn": { + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "min_sentence2_length": 32, + "average_sentence2_length": 127.79296875, + "max_sentence2_length": 355, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70490 + }, + "vie_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 127.79296875, + "max_sentence1_length": 355, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70490 + }, + "eng_Latn-viv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.82421875, + "max_sentence1_length": 227, + "min_sentence2_length": 50, + "average_sentence2_length": 319.16015625, + "max_sentence2_length": 1077, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 110076 + }, + "viv_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 319.16015625, + "max_sentence1_length": 1077, + "min_sentence2_length": 24, + "average_sentence2_length": 110.82421875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 110076 + }, + "eng_Latn-vmy_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 42, + "average_sentence2_length": 149.4921875, + "max_sentence2_length": 337, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67043 + }, + "vmy_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 149.4921875, + "max_sentence1_length": 337, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67043 + }, + "eng_Latn-waj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.875, + "max_sentence1_length": 827, + "min_sentence2_length": 43, + "average_sentence2_length": 248.9609375, + "max_sentence2_length": 1148, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92630 + }, + "waj_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 248.9609375, + "max_sentence1_length": 1148, + "min_sentence2_length": 24, + "average_sentence2_length": 112.875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92630 + }, + "eng_Latn-wal_Ethi": { + "min_sentence1_length": 24, + "average_sentence1_length": 106.609375, + "max_sentence1_length": 207, + "min_sentence2_length": 28, + "average_sentence2_length": 146.234375, + "max_sentence2_length": 451, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64728 + }, + "wal_Ethi-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 146.234375, + "max_sentence1_length": 451, + "min_sentence2_length": 24, + "average_sentence2_length": 106.609375, + "max_sentence2_length": 207, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64728 + }, + "eng_Latn-wap_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.37890625, + "max_sentence1_length": 827, + "min_sentence2_length": 41, + "average_sentence2_length": 218.2421875, + "max_sentence2_length": 566, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84895 + }, + "wap_Latn-eng_Latn": { + "min_sentence1_length": 41, + "average_sentence1_length": 218.2421875, + "max_sentence1_length": 566, + "min_sentence2_length": 24, + "average_sentence2_length": 113.37890625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84895 + }, + "eng_Latn-wat_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 114.1875, + "max_sentence1_length": 257, + "min_sentence2_length": 50, + "average_sentence2_length": 172.70703125, + "max_sentence2_length": 748, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73445 + }, + "wat_Latn-eng_Latn": { + "min_sentence1_length": 50, + "average_sentence1_length": 172.70703125, + "max_sentence1_length": 748, + "min_sentence2_length": 39, + "average_sentence2_length": 114.1875, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73445 + }, + "eng_Latn-wbi_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.48046875, + "max_sentence1_length": 227, + "min_sentence2_length": 4, + "average_sentence2_length": 132.11328125, + "max_sentence2_length": 359, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62104 + }, + "wbi_Latn-eng_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 132.11328125, + "max_sentence1_length": 359, + "min_sentence2_length": 24, + "average_sentence2_length": 110.48046875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62104 + }, + "eng_Latn-wbp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.17578125, + "max_sentence1_length": 230, + "min_sentence2_length": 99, + "average_sentence2_length": 460.8671875, + "max_sentence2_length": 1904, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 146443 + }, + "wbp_Latn-eng_Latn": { + "min_sentence1_length": 99, + "average_sentence1_length": 460.8671875, + "max_sentence1_length": 1904, + "min_sentence2_length": 24, + "average_sentence2_length": 111.17578125, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 146443 + }, + "eng_Latn-wed_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 111.1953125, + "max_sentence1_length": 215, + "min_sentence2_length": 30, + "average_sentence2_length": 113.93359375, + "max_sentence2_length": 298, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57633 + }, + "wed_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 113.93359375, + "max_sentence1_length": 298, + "min_sentence2_length": 28, + "average_sentence2_length": 111.1953125, + "max_sentence2_length": 215, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57633 + }, + "eng_Latn-wer_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.5859375, + "max_sentence1_length": 827, + "min_sentence2_length": 19, + "average_sentence2_length": 165.30859375, + "max_sentence2_length": 590, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71653 + }, + "wer_Latn-eng_Latn": { + "min_sentence1_length": 19, + "average_sentence1_length": 165.30859375, + "max_sentence1_length": 590, + "min_sentence2_length": 24, + "average_sentence2_length": 114.5859375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71653 + }, + "eng_Latn-wim_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.02734375, + "max_sentence1_length": 232, + "min_sentence2_length": 64, + "average_sentence2_length": 350.8046875, + "max_sentence2_length": 1563, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 117973 + }, + "wim_Latn-eng_Latn": { + "min_sentence1_length": 64, + "average_sentence1_length": 350.8046875, + "max_sentence1_length": 1563, + "min_sentence2_length": 24, + "average_sentence2_length": 110.02734375, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 117973 + }, + "eng_Latn-wiu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.5078125, + "max_sentence1_length": 827, + "min_sentence2_length": 55, + "average_sentence2_length": 200.34765625, + "max_sentence2_length": 590, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80347 + }, + "wiu_Latn-eng_Latn": { + "min_sentence1_length": 55, + "average_sentence1_length": 200.34765625, + "max_sentence1_length": 590, + "min_sentence2_length": 24, + "average_sentence2_length": 113.5078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80347 + }, + "eng_Latn-wiv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.328125, + "max_sentence1_length": 827, + "min_sentence2_length": 44, + "average_sentence2_length": 192.15625, + "max_sentence2_length": 518, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78716 + }, + "wiv_Latn-eng_Latn": { + "min_sentence1_length": 44, + "average_sentence1_length": 192.15625, + "max_sentence1_length": 518, + "min_sentence2_length": 24, + "average_sentence2_length": 115.328125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 78716 + }, + "eng_Latn-wmt_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 116.83203125, + "max_sentence1_length": 288, + "min_sentence2_length": 60, + "average_sentence2_length": 793.13671875, + "max_sentence2_length": 4949, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 232952 + }, + "wmt_Latn-eng_Latn": { + "min_sentence1_length": 60, + "average_sentence1_length": 793.13671875, + "max_sentence1_length": 4949, + "min_sentence2_length": 33, + "average_sentence2_length": 116.83203125, + "max_sentence2_length": 288, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 232952 + }, + "eng_Latn-wmw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 33, + "average_sentence2_length": 125.3671875, + "max_sentence2_length": 276, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60867 + }, + "wmw_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 125.3671875, + "max_sentence1_length": 276, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60867 + }, + "eng_Latn-wnc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.4375, + "max_sentence1_length": 251, + "min_sentence2_length": 58, + "average_sentence2_length": 314.421875, + "max_sentence2_length": 1344, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 108764 + }, + "wnc_Latn-eng_Latn": { + "min_sentence1_length": 58, + "average_sentence1_length": 314.421875, + "max_sentence1_length": 1344, + "min_sentence2_length": 24, + "average_sentence2_length": 110.4375, + "max_sentence2_length": 251, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 108764 + }, + "eng_Latn-wnu_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 110.328125, + "max_sentence1_length": 271, + "min_sentence2_length": 32, + "average_sentence2_length": 180.5078125, + "max_sentence2_length": 904, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74454 + }, + "wnu_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 180.5078125, + "max_sentence1_length": 904, + "min_sentence2_length": 21, + "average_sentence2_length": 110.328125, + "max_sentence2_length": 271, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74454 + }, + "eng_Latn-wol_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "min_sentence2_length": 31, + "average_sentence2_length": 131.58984375, + "max_sentence2_length": 322, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70892 + }, + "wol_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 131.58984375, + "max_sentence1_length": 322, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70892 + }, + "eng_Latn-wos_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.16796875, + "max_sentence1_length": 227, + "min_sentence2_length": 23, + "average_sentence2_length": 256.5078125, + "max_sentence2_length": 878, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93613 + }, + "wos_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 256.5078125, + "max_sentence1_length": 878, + "min_sentence2_length": 24, + "average_sentence2_length": 109.16796875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93613 + }, + "eng_Latn-wrk_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 109.18359375, + "max_sentence1_length": 256, + "min_sentence2_length": 85, + "average_sentence2_length": 712.43359375, + "max_sentence2_length": 2886, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 210334 + }, + "wrk_Latn-eng_Latn": { + "min_sentence1_length": 85, + "average_sentence1_length": 712.43359375, + "max_sentence1_length": 2886, + "min_sentence2_length": 31, + "average_sentence2_length": 109.18359375, + "max_sentence2_length": 256, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 210334 + }, + "eng_Latn-wro_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 110.109375, + "max_sentence1_length": 257, + "min_sentence2_length": 28, + "average_sentence2_length": 113.57421875, + "max_sentence2_length": 323, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57263 + }, + "wro_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 113.57421875, + "max_sentence1_length": 323, + "min_sentence2_length": 37, + "average_sentence2_length": 110.109375, + "max_sentence2_length": 257, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 57263 + }, + "eng_Latn-wrs_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.2578125, + "max_sentence1_length": 827, + "min_sentence2_length": 43, + "average_sentence2_length": 216.0, + "max_sentence2_length": 761, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84290 + }, + "wrs_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 216.0, + "max_sentence1_length": 761, + "min_sentence2_length": 24, + "average_sentence2_length": 113.2578125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84290 + }, + "eng_Latn-wsk_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.75, + "max_sentence1_length": 827, + "min_sentence2_length": 45, + "average_sentence2_length": 172.875, + "max_sentence2_length": 362, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73376 + }, + "wsk_Latn-eng_Latn": { + "min_sentence1_length": 45, + "average_sentence1_length": 172.875, + "max_sentence1_length": 362, + "min_sentence2_length": 24, + "average_sentence2_length": 113.75, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73376 + }, + "eng_Latn-wuv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4140625, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 131.328125, + "max_sentence2_length": 284, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62654 + }, + "wuv_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 131.328125, + "max_sentence1_length": 284, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4140625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 62654 + }, + "eng_Latn-xav_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.640625, + "max_sentence1_length": 227, + "min_sentence2_length": 65, + "average_sentence2_length": 432.13671875, + "max_sentence2_length": 1467, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 138695 + }, + "xav_Latn-eng_Latn": { + "min_sentence1_length": 65, + "average_sentence1_length": 432.13671875, + "max_sentence1_length": 1467, + "min_sentence2_length": 24, + "average_sentence2_length": 109.640625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 138695 + }, + "eng_Latn-xbi_Latn": { + "min_sentence1_length": 22, + "average_sentence1_length": 108.2265625, + "max_sentence1_length": 232, + "min_sentence2_length": 32, + "average_sentence2_length": 215.0703125, + "max_sentence2_length": 2517, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82764 + }, + "xbi_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 215.0703125, + "max_sentence1_length": 2517, + "min_sentence2_length": 22, + "average_sentence2_length": 108.2265625, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 82764 + }, + "eng_Latn-xed_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.484375, + "max_sentence1_length": 376, + "min_sentence2_length": 30, + "average_sentence2_length": 152.98046875, + "max_sentence2_length": 361, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67959 + }, + "xed_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 152.98046875, + "max_sentence1_length": 361, + "min_sentence2_length": 24, + "average_sentence2_length": 112.484375, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67959 + }, + "eng_Latn-xla_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.15234375, + "max_sentence1_length": 259, + "min_sentence2_length": 29, + "average_sentence2_length": 242.9375, + "max_sentence2_length": 775, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90647 + }, + "xla_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 242.9375, + "max_sentence1_length": 775, + "min_sentence2_length": 24, + "average_sentence2_length": 111.15234375, + "max_sentence2_length": 259, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90647 + }, + "eng_Latn-xnn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.05078125, + "max_sentence1_length": 239, + "min_sentence2_length": 29, + "average_sentence2_length": 239.5703125, + "max_sentence2_length": 944, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89247 + }, + "xnn_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 239.5703125, + "max_sentence1_length": 944, + "min_sentence2_length": 24, + "average_sentence2_length": 109.05078125, + "max_sentence2_length": 239, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89247 + }, + "eng_Latn-xon_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 112.515625, + "max_sentence1_length": 376, + "min_sentence2_length": 35, + "average_sentence2_length": 141.6640625, + "max_sentence2_length": 776, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65070 + }, + "xon_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 141.6640625, + "max_sentence1_length": 776, + "min_sentence2_length": 31, + "average_sentence2_length": 112.515625, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65070 + }, + "eng_Latn-xsi_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.8046875, + "max_sentence1_length": 827, + "min_sentence2_length": 39, + "average_sentence2_length": 245.1796875, + "max_sentence2_length": 735, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91644 + }, + "xsi_Latn-eng_Latn": { + "min_sentence1_length": 39, + "average_sentence1_length": 245.1796875, + "max_sentence1_length": 735, + "min_sentence2_length": 24, + "average_sentence2_length": 112.8046875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91644 + }, + "eng_Latn-xtd_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.34375, + "max_sentence1_length": 227, + "min_sentence2_length": 48, + "average_sentence2_length": 201.03125, + "max_sentence2_length": 707, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79968 + }, + "xtd_Latn-eng_Latn": { + "min_sentence1_length": 48, + "average_sentence1_length": 201.03125, + "max_sentence1_length": 707, + "min_sentence2_length": 24, + "average_sentence2_length": 111.34375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79968 + }, + "eng_Latn-xtm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.54296875, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 197.6171875, + "max_sentence2_length": 544, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79657 + }, + "xtm_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 197.6171875, + "max_sentence1_length": 544, + "min_sentence2_length": 24, + "average_sentence2_length": 113.54296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79657 + }, + "eng_Latn-yaa_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 114.7109375, + "max_sentence1_length": 227, + "min_sentence2_length": 27, + "average_sentence2_length": 283.4921875, + "max_sentence2_length": 1230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101940 + }, + "yaa_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 283.4921875, + "max_sentence1_length": 1230, + "min_sentence2_length": 31, + "average_sentence2_length": 114.7109375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 101940 + }, + "eng_Latn-yad_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.2421875, + "max_sentence1_length": 233, + "min_sentence2_length": 31, + "average_sentence2_length": 249.9453125, + "max_sentence2_length": 650, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93232 + }, + "yad_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 249.9453125, + "max_sentence1_length": 650, + "min_sentence2_length": 24, + "average_sentence2_length": 114.2421875, + "max_sentence2_length": 233, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 93232 + }, + "eng_Latn-yal_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 28, + "average_sentence2_length": 122.8515625, + "max_sentence2_length": 308, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60223 + }, + "yal_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 122.8515625, + "max_sentence1_length": 308, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60223 + }, + "eng_Latn-yap_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.4921875, + "max_sentence1_length": 827, + "min_sentence2_length": 28, + "average_sentence2_length": 176.61328125, + "max_sentence2_length": 491, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74267 + }, + "yap_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 176.61328125, + "max_sentence1_length": 491, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4921875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74267 + }, + "eng_Latn-yaq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 109.8359375, + "max_sentence1_length": 227, + "min_sentence2_length": 60, + "average_sentence2_length": 220.11328125, + "max_sentence2_length": 476, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84467 + }, + "yaq_Latn-eng_Latn": { + "min_sentence1_length": 60, + "average_sentence1_length": 220.11328125, + "max_sentence1_length": 476, + "min_sentence2_length": 24, + "average_sentence2_length": 109.8359375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 84467 + }, + "eng_Latn-yby_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.140625, + "max_sentence1_length": 227, + "min_sentence2_length": 49, + "average_sentence2_length": 201.703125, + "max_sentence2_length": 611, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79832 + }, + "yby_Latn-eng_Latn": { + "min_sentence1_length": 49, + "average_sentence1_length": 201.703125, + "max_sentence1_length": 611, + "min_sentence2_length": 24, + "average_sentence2_length": 110.140625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79832 + }, + "eng_Latn-ycn_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.47265625, + "max_sentence1_length": 827, + "min_sentence2_length": 67, + "average_sentence2_length": 272.796875, + "max_sentence2_length": 793, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99397 + }, + "ycn_Latn-eng_Latn": { + "min_sentence1_length": 67, + "average_sentence1_length": 272.796875, + "max_sentence1_length": 793, + "min_sentence2_length": 24, + "average_sentence2_length": 115.47265625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 99397 + }, + "eng_Latn-yka_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 112.5, + "max_sentence1_length": 827, + "min_sentence2_length": 33, + "average_sentence2_length": 183.38671875, + "max_sentence2_length": 494, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75747 + }, + "yka_Latn-eng_Latn": { + "min_sentence1_length": 33, + "average_sentence1_length": 183.38671875, + "max_sentence1_length": 494, + "min_sentence2_length": 31, + "average_sentence2_length": 112.5, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75747 + }, + "eng_Latn-yle_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 111.36328125, + "max_sentence1_length": 233, + "min_sentence2_length": 29, + "average_sentence2_length": 205.69140625, + "max_sentence2_length": 587, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81166 + }, + "yle_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 205.69140625, + "max_sentence1_length": 587, + "min_sentence2_length": 31, + "average_sentence2_length": 111.36328125, + "max_sentence2_length": 233, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 81166 + }, + "eng_Latn-yml_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 112.3203125, + "max_sentence1_length": 227, + "min_sentence2_length": 42, + "average_sentence2_length": 296.03515625, + "max_sentence2_length": 1111, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104539 + }, + "yml_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 296.03515625, + "max_sentence1_length": 1111, + "min_sentence2_length": 25, + "average_sentence2_length": 112.3203125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 104539 + }, + "eng_Latn-yon_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.66015625, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 183.55859375, + "max_sentence2_length": 597, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76088 + }, + "yon_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.55859375, + "max_sentence1_length": 597, + "min_sentence2_length": 24, + "average_sentence2_length": 113.66015625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76088 + }, + "eng_Latn-yor_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.421875, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 114.8984375, + "max_sentence2_length": 287, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58962 + }, + "yor_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 114.8984375, + "max_sentence1_length": 287, + "min_sentence2_length": 24, + "average_sentence2_length": 115.421875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 58962 + }, + "eng_Latn-yrb_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.19921875, + "max_sentence1_length": 232, + "min_sentence2_length": 40, + "average_sentence2_length": 234.0625, + "max_sentence2_length": 765, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89411 + }, + "yrb_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 234.0625, + "max_sentence1_length": 765, + "min_sentence2_length": 24, + "average_sentence2_length": 115.19921875, + "max_sentence2_length": 232, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89411 + }, + "eng_Latn-yre_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.09375, + "max_sentence1_length": 827, + "min_sentence2_length": 43, + "average_sentence2_length": 172.7109375, + "max_sentence2_length": 461, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73678 + }, + "yre_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 172.7109375, + "max_sentence1_length": 461, + "min_sentence2_length": 24, + "average_sentence2_length": 115.09375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73678 + }, + "eng_Latn-yss_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 145.40625, + "max_sentence1_length": 341, + "min_sentence2_length": 23, + "average_sentence2_length": 214.68359375, + "max_sentence2_length": 524, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92183 + }, + "yss_Latn-eng_Latn": { + "min_sentence1_length": 23, + "average_sentence1_length": 214.68359375, + "max_sentence1_length": 524, + "min_sentence2_length": 1, + "average_sentence2_length": 145.40625, + "max_sentence2_length": 341, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 92183 + }, + "eng_Latn-yuj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.8125, + "max_sentence1_length": 827, + "min_sentence2_length": 48, + "average_sentence2_length": 221.23046875, + "max_sentence2_length": 747, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86283 + }, + "yuj_Latn-eng_Latn": { + "min_sentence1_length": 48, + "average_sentence1_length": 221.23046875, + "max_sentence1_length": 747, + "min_sentence2_length": 24, + "average_sentence2_length": 115.8125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 86283 + }, + "eng_Latn-yut_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.35546875, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 172.515625, + "max_sentence2_length": 549, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73439 + }, + "yut_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 172.515625, + "max_sentence1_length": 549, + "min_sentence2_length": 24, + "average_sentence2_length": 114.35546875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73439 + }, + "eng_Latn-yuw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.0703125, + "max_sentence1_length": 227, + "min_sentence2_length": 43, + "average_sentence2_length": 221.84765625, + "max_sentence2_length": 809, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85227 + }, + "yuw_Latn-eng_Latn": { + "min_sentence1_length": 43, + "average_sentence1_length": 221.84765625, + "max_sentence1_length": 809, + "min_sentence2_length": 24, + "average_sentence2_length": 111.0703125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 85227 + }, + "eng_Latn-yva_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.35546875, + "max_sentence1_length": 230, + "min_sentence2_length": 37, + "average_sentence2_length": 241.4609375, + "max_sentence2_length": 1386, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90577 + }, + "yva_Latn-eng_Latn": { + "min_sentence1_length": 37, + "average_sentence1_length": 241.4609375, + "max_sentence1_length": 1386, + "min_sentence2_length": 24, + "average_sentence2_length": 112.35546875, + "max_sentence2_length": 230, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 90577 + }, + "eng_Latn-zaa_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.95703125, + "max_sentence1_length": 827, + "min_sentence2_length": 58, + "average_sentence2_length": 243.12890625, + "max_sentence2_length": 823, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91414 + }, + "zaa_Latn-eng_Latn": { + "min_sentence1_length": 58, + "average_sentence1_length": 243.12890625, + "max_sentence1_length": 823, + "min_sentence2_length": 24, + "average_sentence2_length": 113.95703125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 91414 + }, + "eng_Latn-zab_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.93359375, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 140.60546875, + "max_sentence2_length": 323, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65162 + }, + "zab_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 140.60546875, + "max_sentence1_length": 323, + "min_sentence2_length": 24, + "average_sentence2_length": 113.93359375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 65162 + }, + "eng_Latn-zac_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.5078125, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 173.734375, + "max_sentence2_length": 379, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73790 + }, + "zac_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 173.734375, + "max_sentence1_length": 379, + "min_sentence2_length": 24, + "average_sentence2_length": 114.5078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 73790 + }, + "eng_Latn-zad_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.41796875, + "max_sentence1_length": 227, + "min_sentence2_length": 29, + "average_sentence2_length": 148.8125, + "max_sentence2_length": 371, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66619 + }, + "zad_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 148.8125, + "max_sentence1_length": 371, + "min_sentence2_length": 24, + "average_sentence2_length": 111.41796875, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66619 + }, + "eng_Latn-zai_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.6328125, + "max_sentence1_length": 227, + "min_sentence2_length": 35, + "average_sentence2_length": 136.72265625, + "max_sentence2_length": 399, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63323 + }, + "zai_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 136.72265625, + "max_sentence1_length": 399, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6328125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63323 + }, + "eng_Latn-zaj_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 111.21484375, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 134.9921875, + "max_sentence2_length": 340, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63029 + }, + "zaj_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 134.9921875, + "max_sentence1_length": 340, + "min_sentence2_length": 24, + "average_sentence2_length": 111.21484375, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63029 + }, + "eng_Latn-zam_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.8046875, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 285.17578125, + "max_sentence2_length": 829, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102139 + }, + "zam_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 285.17578125, + "max_sentence1_length": 829, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8046875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 102139 + }, + "eng_Latn-zao_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.84375, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 149.453125, + "max_sentence2_length": 447, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67404 + }, + "zao_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 149.453125, + "max_sentence1_length": 447, + "min_sentence2_length": 24, + "average_sentence2_length": 113.84375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67404 + }, + "eng_Latn-zap_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.22265625, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 139.109375, + "max_sentence2_length": 358, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64597 + }, + "zap_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 139.109375, + "max_sentence1_length": 358, + "min_sentence2_length": 24, + "average_sentence2_length": 113.22265625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 64597 + }, + "eng_Latn-zar_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.078125, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 175.34375, + "max_sentence2_length": 531, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74092 + }, + "zar_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 175.34375, + "max_sentence1_length": 531, + "min_sentence2_length": 24, + "average_sentence2_length": 114.078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74092 + }, + "eng_Latn-zas_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.93359375, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 146.23828125, + "max_sentence2_length": 329, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66604 + }, + "zas_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 146.23828125, + "max_sentence1_length": 329, + "min_sentence2_length": 24, + "average_sentence2_length": 113.93359375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 66604 + }, + "eng_Latn-zat_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.67578125, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 164.9765625, + "max_sentence2_length": 418, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71335 + }, + "zat_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 164.9765625, + "max_sentence1_length": 418, + "min_sentence2_length": 24, + "average_sentence2_length": 113.67578125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71335 + }, + "eng_Latn-zav_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.37890625, + "max_sentence1_length": 227, + "min_sentence2_length": 28, + "average_sentence2_length": 240.625, + "max_sentence2_length": 749, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89857 + }, + "zav_Latn-eng_Latn": { + "min_sentence1_length": 28, + "average_sentence1_length": 240.625, + "max_sentence1_length": 749, + "min_sentence2_length": 24, + "average_sentence2_length": 110.37890625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 89857 + }, + "eng_Latn-zaw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.54296875, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 135.7890625, + "max_sentence2_length": 332, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63829 + }, + "zaw_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 135.7890625, + "max_sentence1_length": 332, + "min_sentence2_length": 24, + "average_sentence2_length": 113.54296875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63829 + }, + "eng_Latn-zca_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.73828125, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 167.70703125, + "max_sentence2_length": 440, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72050 + }, + "zca_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 167.70703125, + "max_sentence1_length": 440, + "min_sentence2_length": 24, + "average_sentence2_length": 113.73828125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72050 + }, + "eng_Latn-zga_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 116.0625, + "max_sentence1_length": 827, + "min_sentence2_length": 22, + "average_sentence2_length": 124.90234375, + "max_sentence2_length": 319, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61687 + }, + "zga_Latn-eng_Latn": { + "min_sentence1_length": 22, + "average_sentence1_length": 124.90234375, + "max_sentence1_length": 319, + "min_sentence2_length": 24, + "average_sentence2_length": 116.0625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 61687 + }, + "eng_Latn-zia_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.75390625, + "max_sentence1_length": 827, + "min_sentence2_length": 29, + "average_sentence2_length": 184.64453125, + "max_sentence2_length": 826, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76390 + }, + "zia_Latn-eng_Latn": { + "min_sentence1_length": 29, + "average_sentence1_length": 184.64453125, + "max_sentence1_length": 826, + "min_sentence2_length": 24, + "average_sentence2_length": 113.75390625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76390 + }, + "eng_Latn-ziw_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "min_sentence2_length": 19, + "average_sentence2_length": 124.8671875, + "max_sentence2_length": 349, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60739 + }, + "ziw_Latn-eng_Latn": { + "min_sentence1_length": 19, + "average_sentence1_length": 124.8671875, + "max_sentence1_length": 349, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 60739 + }, + "eng_Latn-zlm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 25, + "average_sentence2_length": 131.09765625, + "max_sentence2_length": 302, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63096 + }, + "zlm_Latn-eng_Latn": { + "min_sentence1_length": 25, + "average_sentence1_length": 131.09765625, + "max_sentence1_length": 302, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 63096 + }, + "eng_Latn-zos_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.48828125, + "max_sentence1_length": 827, + "min_sentence2_length": 30, + "average_sentence2_length": 179.2265625, + "max_sentence2_length": 429, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75447 + }, + "zos_Latn-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 179.2265625, + "max_sentence1_length": 429, + "min_sentence2_length": 24, + "average_sentence2_length": 115.48828125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75447 + }, + "eng_Latn-zpc_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.0390625, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 182.3828125, + "max_sentence2_length": 515, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76140 + }, + "zpc_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 182.3828125, + "max_sentence1_length": 515, + "min_sentence2_length": 24, + "average_sentence2_length": 115.0390625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 76140 + }, + "eng_Latn-zpl_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.3046875, + "max_sentence1_length": 827, + "min_sentence2_length": 34, + "average_sentence2_length": 170.328125, + "max_sentence2_length": 409, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72610 + }, + "zpl_Latn-eng_Latn": { + "min_sentence1_length": 34, + "average_sentence1_length": 170.328125, + "max_sentence1_length": 409, + "min_sentence2_length": 24, + "average_sentence2_length": 113.3046875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 72610 + }, + "eng_Latn-zpm_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 108.8125, + "max_sentence1_length": 227, + "min_sentence2_length": 36, + "average_sentence2_length": 192.28125, + "max_sentence2_length": 615, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77080 + }, + "zpm_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 192.28125, + "max_sentence1_length": 615, + "min_sentence2_length": 24, + "average_sentence2_length": 108.8125, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 77080 + }, + "eng_Latn-zpo_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.3984375, + "max_sentence1_length": 827, + "min_sentence2_length": 27, + "average_sentence2_length": 149.56640625, + "max_sentence2_length": 398, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67831 + }, + "zpo_Latn-eng_Latn": { + "min_sentence1_length": 27, + "average_sentence1_length": 149.56640625, + "max_sentence1_length": 398, + "min_sentence2_length": 24, + "average_sentence2_length": 115.3984375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67831 + }, + "eng_Latn-zpq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 110.1640625, + "max_sentence1_length": 227, + "min_sentence2_length": 40, + "average_sentence2_length": 200.3203125, + "max_sentence2_length": 664, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79484 + }, + "zpq_Latn-eng_Latn": { + "min_sentence1_length": 40, + "average_sentence1_length": 200.3203125, + "max_sentence1_length": 664, + "min_sentence2_length": 24, + "average_sentence2_length": 110.1640625, + "max_sentence2_length": 227, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 79484 + }, + "eng_Latn-zpu_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "min_sentence2_length": 35, + "average_sentence2_length": 165.80859375, + "max_sentence2_length": 409, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71982 + }, + "zpu_Latn-eng_Latn": { + "min_sentence1_length": 35, + "average_sentence1_length": 165.80859375, + "max_sentence1_length": 409, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 71982 + }, + "eng_Latn-zpv_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.69140625, + "max_sentence1_length": 827, + "min_sentence2_length": 42, + "average_sentence2_length": 201.7734375, + "max_sentence2_length": 522, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80759 + }, + "zpv_Latn-eng_Latn": { + "min_sentence1_length": 42, + "average_sentence1_length": 201.7734375, + "max_sentence1_length": 522, + "min_sentence2_length": 24, + "average_sentence2_length": 113.69140625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 80759 + }, + "eng_Latn-zpz_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.1015625, + "max_sentence1_length": 827, + "min_sentence2_length": 36, + "average_sentence2_length": 180.28125, + "max_sentence2_length": 506, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75618 + }, + "zpz_Latn-eng_Latn": { + "min_sentence1_length": 36, + "average_sentence1_length": 180.28125, + "max_sentence1_length": 506, + "min_sentence2_length": 24, + "average_sentence2_length": 115.1015625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 75618 + }, + "eng_Latn-zsr_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 114.078125, + "max_sentence1_length": 827, + "min_sentence2_length": 31, + "average_sentence2_length": 161.50390625, + "max_sentence2_length": 488, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70549 + }, + "zsr_Latn-eng_Latn": { + "min_sentence1_length": 31, + "average_sentence1_length": 161.50390625, + "max_sentence1_length": 488, + "min_sentence2_length": 24, + "average_sentence2_length": 114.078125, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 70549 + }, + "eng_Latn-ztq_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 115.171875, + "max_sentence1_length": 827, + "min_sentence2_length": 19, + "average_sentence2_length": 117.6953125, + "max_sentence2_length": 505, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59614 + }, + "ztq_Latn-eng_Latn": { + "min_sentence1_length": 19, + "average_sentence1_length": 117.6953125, + "max_sentence1_length": 505, + "min_sentence2_length": 24, + "average_sentence2_length": 115.171875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 59614 + }, + "eng_Latn-zty_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.1171875, + "max_sentence1_length": 827, + "min_sentence2_length": 26, + "average_sentence2_length": 176.6484375, + "max_sentence2_length": 528, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74180 + }, + "zty_Latn-eng_Latn": { + "min_sentence1_length": 26, + "average_sentence1_length": 176.6484375, + "max_sentence1_length": 528, + "min_sentence2_length": 24, + "average_sentence2_length": 113.1171875, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 74180 + }, + "eng_Latn-zyp_Latn": { + "min_sentence1_length": 24, + "average_sentence1_length": 113.59765625, + "max_sentence1_length": 827, + "min_sentence2_length": 32, + "average_sentence2_length": 150.9375, + "max_sentence2_length": 342, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67721 + }, + "zyp_Latn-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 150.9375, + "max_sentence1_length": 342, + "min_sentence2_length": 24, + "average_sentence2_length": 113.59765625, + "max_sentence2_length": 827, + "num_samples": 256, + "num_samples_sentence2": 256, + "number_of_characters": 67721 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json b/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json index a6fcf5600..0675e5e0e 100644 --- a/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json @@ -1,12 +1,15 @@ { "test": { + "num_samples": 500, + "number_of_characters": 44361, + "unique_pairs": 500, "min_sentence1_length": 1, "average_sentence1_length": 49.834, "max_sentence1_length": 555, + "unique_sentence1": 497, "min_sentence2_length": 5, "average_sentence2_length": 38.888, "max_sentence2_length": 453, - "num_samples": 500, - "number_of_characters": 44361 + "unique_sentence2": 491 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json b/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json index 1435fa3f1..effafd237 100644 --- a/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json @@ -1,5073 +1,6594 @@ { "test": { + "num_samples": 760518, + "number_of_characters": 82637104, + "unique_pairs": 759283, "min_sentence1_length": 3, "average_sentence1_length": 54.32948595562498, "max_sentence1_length": 239, + "unique_sentence1": 34430, "min_sentence2_length": 3, "average_sentence2_length": 54.32948595562498, "max_sentence2_length": 239, - "num_samples": 760518, - "number_of_characters": 82637104, + "unique_sentence2": 34430, "hf_subset_descriptive_stats": { "asm_Beng-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155988, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 155988 + "unique_sentence2": 1497 }, "asm_Beng-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 162044, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 162044 + "unique_sentence2": 1498 }, "asm_Beng-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 167032, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 167032 + "unique_sentence2": 1499 }, "asm_Beng-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 160716, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 160716 + "unique_sentence2": 1497 }, "asm_Beng-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 156282, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 156282 + "unique_sentence2": 1500 }, "asm_Beng-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 158269, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 158269 + "unique_sentence2": 1500 }, "asm_Beng-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159964, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 159964 + "unique_sentence2": 1497 }, "asm_Beng-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 165177, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 165177 + "unique_sentence2": 1499 }, "asm_Beng-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 164681, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 164681 + "unique_sentence2": 1502 }, "asm_Beng-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 162408, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 162408 + "unique_sentence2": 1499 }, "asm_Beng-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 172838, + "unique_pairs": 1498, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 172838 + "unique_sentence2": 1495 }, "asm_Beng-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 162747, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 162747 + "unique_sentence2": 1501 }, "asm_Beng-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 157316, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 157316 + "unique_sentence2": 1498 }, "asm_Beng-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 160906, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 160906 + "unique_sentence2": 1497 }, "asm_Beng-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 164223, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 164223 + "unique_sentence2": 1500 }, "asm_Beng-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 160201, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 160201 + "unique_sentence2": 1495 }, "asm_Beng-san_Deva": { + "num_samples": 1503, + "number_of_characters": 158093, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 158093 + "unique_sentence2": 1500 }, "asm_Beng-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 169379, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 169379 + "unique_sentence2": 1500 }, "asm_Beng-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 162623, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 162623 + "unique_sentence2": 1490 }, "asm_Beng-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 174866, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 174866 + "unique_sentence2": 1492 }, "asm_Beng-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 157690, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 157690 + "unique_sentence2": 1495 }, "asm_Beng-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 161305, + "unique_pairs": 1498, "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, "max_sentence1_length": 208, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 161305 + "unique_sentence2": 1498 }, "ben_Beng-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 155988, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 155988 + "unique_sentence2": 1497 }, "ben_Beng-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 156448 + "unique_sentence2": 1498 }, "ben_Beng-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 161436, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 161436 + "unique_sentence2": 1499 }, "ben_Beng-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 155120, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 155120 + "unique_sentence2": 1497 }, "ben_Beng-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 150686, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 150686 + "unique_sentence2": 1500 }, "ben_Beng-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 152673, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 152673 + "unique_sentence2": 1500 }, "ben_Beng-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 154368, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 154368 + "unique_sentence2": 1497 }, "ben_Beng-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 159581, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 159581 + "unique_sentence2": 1499 }, "ben_Beng-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 159085, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 159085 + "unique_sentence2": 1502 }, "ben_Beng-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 156812, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 156812 + "unique_sentence2": 1499 }, "ben_Beng-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 167242, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 167242 + "unique_sentence2": 1495 }, "ben_Beng-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 157151, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 157151 + "unique_sentence2": 1501 }, "ben_Beng-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 151720, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 151720 + "unique_sentence2": 1498 }, "ben_Beng-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 155310, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 155310 + "unique_sentence2": 1497 }, "ben_Beng-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 158627, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 158627 + "unique_sentence2": 1500 }, "ben_Beng-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 154605, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 154605 + "unique_sentence2": 1495 }, "ben_Beng-san_Deva": { + "num_samples": 1503, + "number_of_characters": 152497, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 152497 + "unique_sentence2": 1500 }, "ben_Beng-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 163783, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 163783 + "unique_sentence2": 1500 }, "ben_Beng-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 157027, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 157027 + "unique_sentence2": 1490 }, "ben_Beng-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 169270, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 169270 + "unique_sentence2": 1492 }, "ben_Beng-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 152094, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 152094 + "unique_sentence2": 1495 }, "ben_Beng-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 155709, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, "max_sentence1_length": 178, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 155709 + "unique_sentence2": 1498 }, "brx_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162044, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 162044 + "unique_sentence2": 1497 }, "brx_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 156448 + "unique_sentence2": 1497 }, "brx_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 167492, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 167492 + "unique_sentence2": 1499 }, "brx_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161176, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 161176 + "unique_sentence2": 1497 }, "brx_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 156742, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 156742 + "unique_sentence2": 1500 }, "brx_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 158729 + "unique_sentence2": 1500 }, "brx_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 160424, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 160424 + "unique_sentence2": 1497 }, "brx_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 165637, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 165637 + "unique_sentence2": 1499 }, "brx_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165141, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 165141 + "unique_sentence2": 1502 }, "brx_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 162868, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 162868 + "unique_sentence2": 1499 }, "brx_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 173298, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 173298 + "unique_sentence2": 1495 }, "brx_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 163207, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 163207 + "unique_sentence2": 1501 }, "brx_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 157776, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 157776 + "unique_sentence2": 1498 }, "brx_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 161366 + "unique_sentence2": 1497 }, "brx_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 164683, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 164683 + "unique_sentence2": 1500 }, "brx_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 160661, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 160661 + "unique_sentence2": 1495 }, "brx_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 158553, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 158553 + "unique_sentence2": 1500 }, "brx_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 169839, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 169839 + "unique_sentence2": 1500 }, "brx_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 163083, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 163083 + "unique_sentence2": 1490 }, "brx_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 175326, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 175326 + "unique_sentence2": 1492 }, "brx_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158150, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 158150 + "unique_sentence2": 1495 }, "brx_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 161765, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, "max_sentence1_length": 210, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 161765 + "unique_sentence2": 1498 }, "doi_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 167032, + "unique_pairs": 1500, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 167032 + "unique_sentence2": 1497 }, "doi_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 161436, + "unique_pairs": 1501, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 161436 + "unique_sentence2": 1497 }, "doi_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 167492, + "unique_pairs": 1501, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 167492 + "unique_sentence2": 1498 }, "doi_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 166164, + "unique_pairs": 1500, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 166164 + "unique_sentence2": 1497 }, "doi_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 161730 + "unique_sentence2": 1500 }, "doi_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 163717, + "unique_pairs": 1503, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 163717 + "unique_sentence2": 1500 }, "doi_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 165412, + "unique_pairs": 1503, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 165412 + "unique_sentence2": 1497 }, "doi_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 170625, + "unique_pairs": 1503, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 170625 + "unique_sentence2": 1499 }, "doi_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 170129, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 170129 + "unique_sentence2": 1502 }, "doi_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 167856, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 167856 + "unique_sentence2": 1499 }, "doi_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 178286, + "unique_pairs": 1500, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 178286 + "unique_sentence2": 1495 }, "doi_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 168195, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 168195 + "unique_sentence2": 1501 }, "doi_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 162764, + "unique_pairs": 1501, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 162764 + "unique_sentence2": 1498 }, "doi_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 166354, + "unique_pairs": 1501, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 166354 + "unique_sentence2": 1497 }, "doi_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 169671, + "unique_pairs": 1503, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 169671 + "unique_sentence2": 1500 }, "doi_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 165649, + "unique_pairs": 1500, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 165649 + "unique_sentence2": 1495 }, "doi_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 163541, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 163541 + "unique_sentence2": 1500 }, "doi_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 174827, + "unique_pairs": 1503, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 174827 + "unique_sentence2": 1500 }, "doi_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 168071, + "unique_pairs": 1500, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 168071 + "unique_sentence2": 1490 }, "doi_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 180314, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 180314 + "unique_sentence2": 1492 }, "doi_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 163138, + "unique_pairs": 1501, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 163138 + "unique_sentence2": 1495 }, "doi_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 166753, + "unique_pairs": 1500, "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, "max_sentence1_length": 209, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 166753 + "unique_sentence2": 1498 }, "eng_Latn-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 160716, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 160716 + "unique_sentence2": 1497 }, "eng_Latn-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155120, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 155120 + "unique_sentence2": 1497 }, "eng_Latn-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 161176, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 161176 + "unique_sentence2": 1498 }, "eng_Latn-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 166164, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 166164 + "unique_sentence2": 1499 }, "eng_Latn-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 155414, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 155414 + "unique_sentence2": 1500 }, "eng_Latn-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 157401, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 157401 + "unique_sentence2": 1500 }, "eng_Latn-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159096, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 159096 + "unique_sentence2": 1497 }, "eng_Latn-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 164309, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 164309 + "unique_sentence2": 1499 }, "eng_Latn-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 163813, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 163813 + "unique_sentence2": 1502 }, "eng_Latn-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 161540, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 161540 + "unique_sentence2": 1499 }, "eng_Latn-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 171970, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 171970 + "unique_sentence2": 1495 }, "eng_Latn-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 161879, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 161879 + "unique_sentence2": 1501 }, "eng_Latn-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 156448 + "unique_sentence2": 1498 }, "eng_Latn-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 160038, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 160038 + "unique_sentence2": 1497 }, "eng_Latn-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 163355, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 163355 + "unique_sentence2": 1500 }, "eng_Latn-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 159333, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 159333 + "unique_sentence2": 1495 }, "eng_Latn-san_Deva": { + "num_samples": 1503, + "number_of_characters": 157225, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 157225 + "unique_sentence2": 1500 }, "eng_Latn-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 168511, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 168511 + "unique_sentence2": 1500 }, "eng_Latn-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161755, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 161755 + "unique_sentence2": 1490 }, "eng_Latn-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 173998, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 173998 + "unique_sentence2": 1492 }, "eng_Latn-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 156822, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 156822 + "unique_sentence2": 1495 }, "eng_Latn-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 160437, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, "max_sentence1_length": 201, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 160437 + "unique_sentence2": 1498 }, "gom_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 156282, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 156282 + "unique_sentence2": 1497 }, "gom_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 150686, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 150686 + "unique_sentence2": 1497 }, "gom_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 156742, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 156742 + "unique_sentence2": 1498 }, "gom_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 161730 + "unique_sentence2": 1499 }, "gom_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 155414, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 155414 + "unique_sentence2": 1497 }, "gom_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 152967, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 152967 + "unique_sentence2": 1500 }, "gom_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 154662, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 154662 + "unique_sentence2": 1497 }, "gom_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 159875, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 159875 + "unique_sentence2": 1499 }, "gom_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 159379, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 159379 + "unique_sentence2": 1502 }, "gom_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 157106, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 157106 + "unique_sentence2": 1499 }, "gom_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 167536, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 167536 + "unique_sentence2": 1495 }, "gom_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 157445, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 157445 + "unique_sentence2": 1501 }, "gom_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 152014, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 152014 + "unique_sentence2": 1498 }, "gom_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 155604, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 155604 + "unique_sentence2": 1497 }, "gom_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 158921, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 158921 + "unique_sentence2": 1500 }, "gom_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 154899, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 154899 + "unique_sentence2": 1495 }, "gom_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 152791, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 152791 + "unique_sentence2": 1500 }, "gom_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 164077, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 164077 + "unique_sentence2": 1500 }, "gom_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 157321, + "unique_pairs": 1500, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 157321 + "unique_sentence2": 1490 }, "gom_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 169564, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 169564 + "unique_sentence2": 1492 }, "gom_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 152388, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 152388 + "unique_sentence2": 1495 }, "gom_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 156003, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, "max_sentence1_length": 203, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 156003 + "unique_sentence2": 1498 }, "guj_Gujr-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 158269, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 158269 + "unique_sentence2": 1497 }, "guj_Gujr-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 152673, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 152673 + "unique_sentence2": 1497 }, "guj_Gujr-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 158729 + "unique_sentence2": 1498 }, "guj_Gujr-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 163717, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 163717 + "unique_sentence2": 1499 }, "guj_Gujr-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 157401, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 157401 + "unique_sentence2": 1497 }, "guj_Gujr-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152967, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 152967 + "unique_sentence2": 1500 }, "guj_Gujr-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 156649, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 156649 + "unique_sentence2": 1497 }, "guj_Gujr-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 161862, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 161862 + "unique_sentence2": 1499 }, "guj_Gujr-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 161366 + "unique_sentence2": 1502 }, "guj_Gujr-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 159093, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 159093 + "unique_sentence2": 1499 }, "guj_Gujr-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 169523, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 169523 + "unique_sentence2": 1495 }, "guj_Gujr-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 159432, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 159432 + "unique_sentence2": 1501 }, "guj_Gujr-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 154001, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 154001 + "unique_sentence2": 1498 }, "guj_Gujr-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 157591, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 157591 + "unique_sentence2": 1497 }, "guj_Gujr-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 160908, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 160908 + "unique_sentence2": 1500 }, "guj_Gujr-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 156886, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 156886 + "unique_sentence2": 1495 }, "guj_Gujr-san_Deva": { + "num_samples": 1503, + "number_of_characters": 154778, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 154778 + "unique_sentence2": 1500 }, "guj_Gujr-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 166064, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 166064 + "unique_sentence2": 1500 }, "guj_Gujr-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 159308, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 159308 + "unique_sentence2": 1490 }, "guj_Gujr-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 171551, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 171551 + "unique_sentence2": 1492 }, "guj_Gujr-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 154375, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 154375 + "unique_sentence2": 1495 }, "guj_Gujr-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157990, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, "max_sentence1_length": 205, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 157990 + "unique_sentence2": 1498 }, "hin_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 159964, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 159964 + "unique_sentence2": 1497 }, "hin_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 154368, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 154368 + "unique_sentence2": 1497 }, "hin_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 160424, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 160424 + "unique_sentence2": 1498 }, "hin_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 165412, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 165412 + "unique_sentence2": 1499 }, "hin_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 159096, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 159096 + "unique_sentence2": 1497 }, "hin_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 154662, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 154662 + "unique_sentence2": 1500 }, "hin_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 156649, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 156649 + "unique_sentence2": 1500 }, "hin_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 163557, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 163557 + "unique_sentence2": 1499 }, "hin_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 163061, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 163061 + "unique_sentence2": 1502 }, "hin_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 160788, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 160788 + "unique_sentence2": 1499 }, "hin_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 171218, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 171218 + "unique_sentence2": 1495 }, "hin_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 161127, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 161127 + "unique_sentence2": 1501 }, "hin_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 155696, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 155696 + "unique_sentence2": 1498 }, "hin_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 159286, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 159286 + "unique_sentence2": 1497 }, "hin_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 162603, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 162603 + "unique_sentence2": 1500 }, "hin_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 158581, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 158581 + "unique_sentence2": 1495 }, "hin_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 156473, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 156473 + "unique_sentence2": 1500 }, "hin_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 167759, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 167759 + "unique_sentence2": 1500 }, "hin_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161003, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 161003 + "unique_sentence2": 1490 }, "hin_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 173246, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 173246 + "unique_sentence2": 1492 }, "hin_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 156070, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 156070 + "unique_sentence2": 1495 }, "hin_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 159685, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, "max_sentence1_length": 192, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 159685 + "unique_sentence2": 1498 }, "kan_Knda-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 165177, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 165177 + "unique_sentence2": 1497 }, "kan_Knda-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 159581, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 159581 + "unique_sentence2": 1497 }, "kan_Knda-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 165637, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 165637 + "unique_sentence2": 1498 }, "kan_Knda-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 170625, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 170625 + "unique_sentence2": 1499 }, "kan_Knda-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 164309, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 164309 + "unique_sentence2": 1497 }, "kan_Knda-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 159875, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 159875 + "unique_sentence2": 1500 }, "kan_Knda-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 161862, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 161862 + "unique_sentence2": 1500 }, "kan_Knda-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 163557, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 163557 + "unique_sentence2": 1497 }, "kan_Knda-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 168274, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 168274 + "unique_sentence2": 1502 }, "kan_Knda-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 166001, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 166001 + "unique_sentence2": 1499 }, "kan_Knda-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 176431, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 176431 + "unique_sentence2": 1495 }, "kan_Knda-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 166340, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 166340 + "unique_sentence2": 1501 }, "kan_Knda-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 160909, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 160909 + "unique_sentence2": 1498 }, "kan_Knda-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 164499, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 164499 + "unique_sentence2": 1497 }, "kan_Knda-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 167816, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 167816 + "unique_sentence2": 1500 }, "kan_Knda-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 163794, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 163794 + "unique_sentence2": 1495 }, "kan_Knda-san_Deva": { + "num_samples": 1503, + "number_of_characters": 161686, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 161686 + "unique_sentence2": 1500 }, "kan_Knda-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 172972, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 172972 + "unique_sentence2": 1500 }, "kan_Knda-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 166216, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 166216 + "unique_sentence2": 1490 }, "kan_Knda-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 178459, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 178459 + "unique_sentence2": 1492 }, "kan_Knda-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 161283, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 161283 + "unique_sentence2": 1495 }, "kan_Knda-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 164898, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, "max_sentence1_length": 201, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 164898 + "unique_sentence2": 1498 }, "kas_Arab-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 164681, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 164681 + "unique_sentence2": 1497 }, "kas_Arab-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 159085, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 159085 + "unique_sentence2": 1497 }, "kas_Arab-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 165141, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 165141 + "unique_sentence2": 1498 }, "kas_Arab-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 170129, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 170129 + "unique_sentence2": 1499 }, "kas_Arab-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 163813, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 163813 + "unique_sentence2": 1497 }, "kas_Arab-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 159379, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 159379 + "unique_sentence2": 1500 }, "kas_Arab-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 161366 + "unique_sentence2": 1500 }, "kas_Arab-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 163061, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 163061 + "unique_sentence2": 1497 }, "kas_Arab-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 168274, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 168274 + "unique_sentence2": 1499 }, "kas_Arab-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 165505, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 165505 + "unique_sentence2": 1499 }, "kas_Arab-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 175935, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 175935 + "unique_sentence2": 1495 }, "kas_Arab-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 165844, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 165844 + "unique_sentence2": 1501 }, "kas_Arab-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 160413, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 160413 + "unique_sentence2": 1498 }, "kas_Arab-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 164003, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 164003 + "unique_sentence2": 1497 }, "kas_Arab-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 167320, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 167320 + "unique_sentence2": 1500 }, "kas_Arab-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 163298, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 163298 + "unique_sentence2": 1495 }, "kas_Arab-san_Deva": { + "num_samples": 1503, + "number_of_characters": 161190, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 161190 + "unique_sentence2": 1500 }, "kas_Arab-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 172476, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 172476 + "unique_sentence2": 1500 }, "kas_Arab-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 165720, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 165720 + "unique_sentence2": 1490 }, "kas_Arab-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 177963, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 177963 + "unique_sentence2": 1492 }, "kas_Arab-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 160787, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 160787 + "unique_sentence2": 1495 }, "kas_Arab-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 164402, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, "max_sentence1_length": 203, + "unique_sentence1": 1502, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 164402 + "unique_sentence2": 1498 }, "mai_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162408, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 162408 + "unique_sentence2": 1497 }, "mai_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 156812, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 156812 + "unique_sentence2": 1497 }, "mai_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 162868, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 162868 + "unique_sentence2": 1498 }, "mai_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 167856, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 167856 + "unique_sentence2": 1499 }, "mai_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161540, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 161540 + "unique_sentence2": 1497 }, "mai_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 157106, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 157106 + "unique_sentence2": 1500 }, "mai_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 159093, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 159093 + "unique_sentence2": 1500 }, "mai_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 160788, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 160788 + "unique_sentence2": 1497 }, "mai_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 166001, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 166001 + "unique_sentence2": 1499 }, "mai_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165505, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 165505 + "unique_sentence2": 1502 }, "mai_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 173662, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 173662 + "unique_sentence2": 1495 }, "mai_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 163571, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 163571 + "unique_sentence2": 1501 }, "mai_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 158140, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 158140 + "unique_sentence2": 1498 }, "mai_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1500, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 161730 + "unique_sentence2": 1497 }, "mai_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 165047, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 165047 + "unique_sentence2": 1500 }, "mai_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 161025, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 161025 + "unique_sentence2": 1495 }, "mai_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 158917, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 158917 + "unique_sentence2": 1500 }, "mai_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 170203, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 170203 + "unique_sentence2": 1500 }, "mai_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 163447, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 163447 + "unique_sentence2": 1490 }, "mai_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 175690, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 175690 + "unique_sentence2": 1492 }, "mai_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158514, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 158514 + "unique_sentence2": 1495 }, "mai_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 162129, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, "max_sentence1_length": 230, + "unique_sentence1": 1499, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 162129 + "unique_sentence2": 1498 }, "mal_Mlym-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 172838, + "unique_pairs": 1498, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 172838 + "unique_sentence2": 1497 }, "mal_Mlym-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 167242, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 167242 + "unique_sentence2": 1497 }, "mal_Mlym-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 173298, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 173298 + "unique_sentence2": 1498 }, "mal_Mlym-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 178286, + "unique_pairs": 1500, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 178286 + "unique_sentence2": 1499 }, "mal_Mlym-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 171970, + "unique_pairs": 1499, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 171970 + "unique_sentence2": 1497 }, "mal_Mlym-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 167536, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 167536 + "unique_sentence2": 1500 }, "mal_Mlym-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 169523, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 169523 + "unique_sentence2": 1500 }, "mal_Mlym-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 171218, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 171218 + "unique_sentence2": 1497 }, "mal_Mlym-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 176431, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 176431 + "unique_sentence2": 1499 }, "mal_Mlym-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 175935, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 175935 + "unique_sentence2": 1502 }, "mal_Mlym-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 173662, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 173662 + "unique_sentence2": 1499 }, "mal_Mlym-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 174001, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 174001 + "unique_sentence2": 1501 }, "mal_Mlym-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 168570, + "unique_pairs": 1500, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 168570 + "unique_sentence2": 1498 }, "mal_Mlym-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 172160, + "unique_pairs": 1500, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 172160 + "unique_sentence2": 1497 }, "mal_Mlym-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 175477, + "unique_pairs": 1503, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 175477 + "unique_sentence2": 1500 }, "mal_Mlym-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 171455, + "unique_pairs": 1498, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 171455 + "unique_sentence2": 1495 }, "mal_Mlym-san_Deva": { + "num_samples": 1503, + "number_of_characters": 169347, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 169347 + "unique_sentence2": 1500 }, "mal_Mlym-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 180633, + "unique_pairs": 1501, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 180633 + "unique_sentence2": 1500 }, "mal_Mlym-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 173877, + "unique_pairs": 1499, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 173877 + "unique_sentence2": 1490 }, "mal_Mlym-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 186120, + "unique_pairs": 1502, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 186120 + "unique_sentence2": 1492 }, "mal_Mlym-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 168944, + "unique_pairs": 1500, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 168944 + "unique_sentence2": 1495 }, "mal_Mlym-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 172559, + "unique_pairs": 1499, "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, "max_sentence1_length": 219, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 172559 + "unique_sentence2": 1498 }, "mar_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162747, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 162747 + "unique_sentence2": 1497 }, "mar_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 157151, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 157151 + "unique_sentence2": 1497 }, "mar_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 163207, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 163207 + "unique_sentence2": 1498 }, "mar_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 168195, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 168195 + "unique_sentence2": 1499 }, "mar_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161879, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 161879 + "unique_sentence2": 1497 }, "mar_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 157445, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 157445 + "unique_sentence2": 1500 }, "mar_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 159432, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 159432 + "unique_sentence2": 1500 }, "mar_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 161127, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 161127 + "unique_sentence2": 1497 }, "mar_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 166340, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 166340 + "unique_sentence2": 1499 }, "mar_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165844, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 165844 + "unique_sentence2": 1502 }, "mar_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 163571, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 163571 + "unique_sentence2": 1499 }, "mar_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 174001, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 174001 + "unique_sentence2": 1495 }, "mar_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 158479, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 158479 + "unique_sentence2": 1498 }, "mar_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 162069, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 162069 + "unique_sentence2": 1497 }, "mar_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 165386, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 165386 + "unique_sentence2": 1500 }, "mar_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 161364, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 161364 + "unique_sentence2": 1495 }, "mar_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 159256, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 159256 + "unique_sentence2": 1500 }, "mar_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 170542, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 170542 + "unique_sentence2": 1500 }, "mar_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 163786, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 163786 + "unique_sentence2": 1490 }, "mar_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 176029, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 176029 + "unique_sentence2": 1492 }, "mar_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158853, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 158853 + "unique_sentence2": 1495 }, "mar_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 162468, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, "max_sentence1_length": 221, + "unique_sentence1": 1501, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 162468 + "unique_sentence2": 1498 }, "mni_Mtei-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 157316, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 157316 + "unique_sentence2": 1497 }, "mni_Mtei-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 151720, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 151720 + "unique_sentence2": 1497 }, "mni_Mtei-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 157776, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 157776 + "unique_sentence2": 1498 }, "mni_Mtei-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 162764, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 162764 + "unique_sentence2": 1499 }, "mni_Mtei-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 156448 + "unique_sentence2": 1497 }, "mni_Mtei-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152014, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 152014 + "unique_sentence2": 1500 }, "mni_Mtei-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 154001, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 154001 + "unique_sentence2": 1500 }, "mni_Mtei-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 155696, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 155696 + "unique_sentence2": 1497 }, "mni_Mtei-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 160909, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 160909 + "unique_sentence2": 1499 }, "mni_Mtei-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 160413, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 160413 + "unique_sentence2": 1502 }, "mni_Mtei-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 158140, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 158140 + "unique_sentence2": 1499 }, "mni_Mtei-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 168570, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 168570 + "unique_sentence2": 1495 }, "mni_Mtei-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 158479, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 158479 + "unique_sentence2": 1501 }, "mni_Mtei-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 156638, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 156638 + "unique_sentence2": 1497 }, "mni_Mtei-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 159955, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 159955 + "unique_sentence2": 1500 }, "mni_Mtei-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 155933, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 155933 + "unique_sentence2": 1495 }, "mni_Mtei-san_Deva": { + "num_samples": 1503, + "number_of_characters": 153825, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 153825 + "unique_sentence2": 1500 }, "mni_Mtei-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 165111, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 165111 + "unique_sentence2": 1500 }, "mni_Mtei-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 158355, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 158355 + "unique_sentence2": 1490 }, "mni_Mtei-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 170598, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 170598 + "unique_sentence2": 1492 }, "mni_Mtei-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 153422, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 153422 + "unique_sentence2": 1495 }, "mni_Mtei-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157037, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, "max_sentence1_length": 239, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 157037 + "unique_sentence2": 1498 }, "npi_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 160906, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 160906 + "unique_sentence2": 1497 }, "npi_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155310, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 155310 + "unique_sentence2": 1497 }, "npi_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 161366 + "unique_sentence2": 1498 }, "npi_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 166354, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 166354 + "unique_sentence2": 1499 }, "npi_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 160038, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 160038 + "unique_sentence2": 1497 }, "npi_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 155604, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 155604 + "unique_sentence2": 1500 }, "npi_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 157591, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 157591 + "unique_sentence2": 1500 }, "npi_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159286, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 159286 + "unique_sentence2": 1497 }, "npi_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 164499, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 164499 + "unique_sentence2": 1499 }, "npi_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 164003, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 164003 + "unique_sentence2": 1502 }, "npi_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 161730 + "unique_sentence2": 1499 }, "npi_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 172160, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 172160 + "unique_sentence2": 1495 }, "npi_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 162069, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 162069 + "unique_sentence2": 1501 }, "npi_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 156638, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 156638 + "unique_sentence2": 1498 }, "npi_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 163545, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 163545 + "unique_sentence2": 1500 }, "npi_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 159523, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 159523 + "unique_sentence2": 1495 }, "npi_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 157415, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 157415 + "unique_sentence2": 1500 }, "npi_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 168701, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 168701 + "unique_sentence2": 1500 }, "npi_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161945, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 161945 + "unique_sentence2": 1490 }, "npi_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 174188, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 174188 + "unique_sentence2": 1492 }, "npi_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 157012, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 157012 + "unique_sentence2": 1495 }, "npi_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 160627, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, "max_sentence1_length": 223, + "unique_sentence1": 1497, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 160627 + "unique_sentence2": 1498 }, "ory_Orya-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 164223, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 164223 + "unique_sentence2": 1497 }, "ory_Orya-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 158627, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 158627 + "unique_sentence2": 1497 }, "ory_Orya-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 164683, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 164683 + "unique_sentence2": 1498 }, "ory_Orya-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 169671, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 169671 + "unique_sentence2": 1499 }, "ory_Orya-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 163355, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 163355 + "unique_sentence2": 1497 }, "ory_Orya-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 158921, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 158921 + "unique_sentence2": 1500 }, "ory_Orya-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 160908, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 160908 + "unique_sentence2": 1500 }, "ory_Orya-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 162603, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 162603 + "unique_sentence2": 1497 }, "ory_Orya-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 167816, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 167816 + "unique_sentence2": 1499 }, "ory_Orya-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 167320, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 167320 + "unique_sentence2": 1502 }, "ory_Orya-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 165047, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 165047 + "unique_sentence2": 1499 }, "ory_Orya-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 175477, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 175477 + "unique_sentence2": 1495 }, "ory_Orya-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 165386, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 165386 + "unique_sentence2": 1501 }, "ory_Orya-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 159955, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 159955 + "unique_sentence2": 1498 }, "ory_Orya-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 163545, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 163545 + "unique_sentence2": 1497 }, "ory_Orya-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 162840, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 162840 + "unique_sentence2": 1495 }, "ory_Orya-san_Deva": { + "num_samples": 1503, + "number_of_characters": 160732, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 160732 + "unique_sentence2": 1500 }, "ory_Orya-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 172018, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 172018 + "unique_sentence2": 1500 }, "ory_Orya-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 165262, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 165262 + "unique_sentence2": 1490 }, "ory_Orya-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 177505, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 177505 + "unique_sentence2": 1492 }, "ory_Orya-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 160329, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 160329 + "unique_sentence2": 1495 }, "ory_Orya-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 163944, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, "max_sentence1_length": 195, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 163944 + "unique_sentence2": 1498 }, "pan_Guru-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 160201, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 160201 + "unique_sentence2": 1497 }, "pan_Guru-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 154605, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 154605 + "unique_sentence2": 1497 }, "pan_Guru-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 160661, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 160661 + "unique_sentence2": 1498 }, "pan_Guru-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 165649, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 165649 + "unique_sentence2": 1499 }, "pan_Guru-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 159333, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 159333 + "unique_sentence2": 1497 }, "pan_Guru-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 154899, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 154899 + "unique_sentence2": 1500 }, "pan_Guru-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 156886, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 156886 + "unique_sentence2": 1500 }, "pan_Guru-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 158581, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 158581 + "unique_sentence2": 1497 }, "pan_Guru-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 163794, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 163794 + "unique_sentence2": 1499 }, "pan_Guru-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 163298, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 163298 + "unique_sentence2": 1502 }, "pan_Guru-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 161025, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 161025 + "unique_sentence2": 1499 }, "pan_Guru-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 171455, + "unique_pairs": 1498, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 171455 + "unique_sentence2": 1495 }, "pan_Guru-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 161364, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 161364 + "unique_sentence2": 1501 }, "pan_Guru-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 155933, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 155933 + "unique_sentence2": 1498 }, "pan_Guru-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 159523, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 159523 + "unique_sentence2": 1497 }, "pan_Guru-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 162840, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 162840 + "unique_sentence2": 1500 }, "pan_Guru-san_Deva": { + "num_samples": 1503, + "number_of_characters": 156710, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 156710 + "unique_sentence2": 1500 }, "pan_Guru-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 167996, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 167996 + "unique_sentence2": 1500 }, "pan_Guru-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161240, + "unique_pairs": 1498, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 161240 + "unique_sentence2": 1490 }, "pan_Guru-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 173483, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 173483 + "unique_sentence2": 1492 }, "pan_Guru-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 156307, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 156307 + "unique_sentence2": 1495 }, "pan_Guru-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 159922, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, "max_sentence1_length": 221, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 159922 + "unique_sentence2": 1498 }, "san_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 158093, + "unique_pairs": 1501, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 158093 + "unique_sentence2": 1497 }, "san_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 152497, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 152497 + "unique_sentence2": 1497 }, "san_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 158553, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 158553 + "unique_sentence2": 1498 }, "san_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 163541, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 163541 + "unique_sentence2": 1499 }, "san_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 157225, + "unique_pairs": 1501, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 157225 + "unique_sentence2": 1497 }, "san_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152791, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 152791 + "unique_sentence2": 1500 }, "san_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 154778, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 154778 + "unique_sentence2": 1500 }, "san_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 156473, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 156473 + "unique_sentence2": 1497 }, "san_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 161686, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 161686 + "unique_sentence2": 1499 }, "san_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 161190, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 161190 + "unique_sentence2": 1502 }, "san_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 158917, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 158917 + "unique_sentence2": 1499 }, "san_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 169347, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 169347 + "unique_sentence2": 1495 }, "san_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 159256, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 159256 + "unique_sentence2": 1501 }, "san_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 153825, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 153825 + "unique_sentence2": 1498 }, "san_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 157415, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 157415 + "unique_sentence2": 1497 }, "san_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 160732, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 160732 + "unique_sentence2": 1500 }, "san_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 156710, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 156710 + "unique_sentence2": 1495 }, "san_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 165888, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 165888 + "unique_sentence2": 1500 }, "san_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 159132, + "unique_pairs": 1501, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 159132 + "unique_sentence2": 1490 }, "san_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 171375, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 171375 + "unique_sentence2": 1492 }, "san_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 154199, + "unique_pairs": 1501, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 154199 + "unique_sentence2": 1495 }, "san_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157814, + "unique_pairs": 1501, "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, "max_sentence1_length": 181, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 157814 + "unique_sentence2": 1498 }, "sat_Olck-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 169379, + "unique_pairs": 1502, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 169379 + "unique_sentence2": 1497 }, "sat_Olck-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 163783, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 163783 + "unique_sentence2": 1497 }, "sat_Olck-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 169839, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 169839 + "unique_sentence2": 1498 }, "sat_Olck-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 174827, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 174827 + "unique_sentence2": 1499 }, "sat_Olck-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 168511, + "unique_pairs": 1502, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 168511 + "unique_sentence2": 1497 }, "sat_Olck-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 164077, + "unique_pairs": 1502, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 164077 + "unique_sentence2": 1500 }, "sat_Olck-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 166064, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 166064 + "unique_sentence2": 1500 }, "sat_Olck-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 167759, + "unique_pairs": 1502, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 167759 + "unique_sentence2": 1497 }, "sat_Olck-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 172972, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 172972 + "unique_sentence2": 1499 }, "sat_Olck-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 172476, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 172476 + "unique_sentence2": 1502 }, "sat_Olck-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 170203, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 170203 + "unique_sentence2": 1499 }, "sat_Olck-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 180633, + "unique_pairs": 1501, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 180633 + "unique_sentence2": 1495 }, "sat_Olck-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 170542, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 170542 + "unique_sentence2": 1501 }, "sat_Olck-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 165111, + "unique_pairs": 1502, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 165111 + "unique_sentence2": 1498 }, "sat_Olck-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 168701, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 168701 + "unique_sentence2": 1497 }, "sat_Olck-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 172018, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 172018 + "unique_sentence2": 1500 }, "sat_Olck-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 167996, + "unique_pairs": 1501, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 167996 + "unique_sentence2": 1495 }, "sat_Olck-san_Deva": { + "num_samples": 1503, + "number_of_characters": 165888, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 165888 + "unique_sentence2": 1500 }, "sat_Olck-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 170418, + "unique_pairs": 1501, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 170418 + "unique_sentence2": 1490 }, "sat_Olck-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 182661, + "unique_pairs": 1503, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 182661 + "unique_sentence2": 1492 }, "sat_Olck-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 165485, + "unique_pairs": 1502, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 165485 + "unique_sentence2": 1495 }, "sat_Olck-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 169100, + "unique_pairs": 1502, "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, "max_sentence1_length": 225, + "unique_sentence1": 1500, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 169100 + "unique_sentence2": 1498 }, "snd_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162623, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 162623 + "unique_sentence2": 1497 }, "snd_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 157027, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 157027 + "unique_sentence2": 1497 }, "snd_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 163083, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 163083 + "unique_sentence2": 1498 }, "snd_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 168071, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 168071 + "unique_sentence2": 1499 }, "snd_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161755, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 161755 + "unique_sentence2": 1497 }, "snd_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 157321, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 157321 + "unique_sentence2": 1500 }, "snd_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 159308, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 159308 + "unique_sentence2": 1500 }, "snd_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 161003, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 161003 + "unique_sentence2": 1497 }, "snd_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 166216, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 166216 + "unique_sentence2": 1499 }, "snd_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165720, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 165720 + "unique_sentence2": 1502 }, "snd_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 163447, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 163447 + "unique_sentence2": 1499 }, "snd_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 173877, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 173877 + "unique_sentence2": 1495 }, "snd_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 163786, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 163786 + "unique_sentence2": 1501 }, "snd_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 158355, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 158355 + "unique_sentence2": 1498 }, "snd_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 161945, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 161945 + "unique_sentence2": 1497 }, "snd_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 165262, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 165262 + "unique_sentence2": 1500 }, "snd_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 161240, + "unique_pairs": 1498, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 161240 + "unique_sentence2": 1495 }, "snd_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 159132, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 159132 + "unique_sentence2": 1500 }, "snd_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 170418, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 170418 + "unique_sentence2": 1500 }, "snd_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 175905, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 175905 + "unique_sentence2": 1492 }, "snd_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 158729 + "unique_sentence2": 1495 }, "snd_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 162344, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, "max_sentence1_length": 195, + "unique_sentence1": 1490, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 162344 + "unique_sentence2": 1498 }, "tam_Taml-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 174866, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 174866 + "unique_sentence2": 1497 }, "tam_Taml-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 169270, + "unique_pairs": 1501, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 169270 + "unique_sentence2": 1497 }, "tam_Taml-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 175326, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 175326 + "unique_sentence2": 1498 }, "tam_Taml-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 180314, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 180314 + "unique_sentence2": 1499 }, "tam_Taml-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 173998, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 173998 + "unique_sentence2": 1497 }, "tam_Taml-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 169564, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 169564 + "unique_sentence2": 1500 }, "tam_Taml-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 171551, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 171551 + "unique_sentence2": 1500 }, "tam_Taml-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 173246, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 173246 + "unique_sentence2": 1497 }, "tam_Taml-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 178459, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 178459 + "unique_sentence2": 1499 }, "tam_Taml-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 177963, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 177963 + "unique_sentence2": 1502 }, "tam_Taml-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 175690, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 175690 + "unique_sentence2": 1499 }, "tam_Taml-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 186120, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 186120 + "unique_sentence2": 1495 }, "tam_Taml-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 176029, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 176029 + "unique_sentence2": 1501 }, "tam_Taml-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 170598, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 170598 + "unique_sentence2": 1498 }, "tam_Taml-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 174188, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 174188 + "unique_sentence2": 1497 }, "tam_Taml-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 177505, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 177505 + "unique_sentence2": 1500 }, "tam_Taml-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 173483, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 173483 + "unique_sentence2": 1495 }, "tam_Taml-san_Deva": { + "num_samples": 1503, + "number_of_characters": 171375, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 171375 + "unique_sentence2": 1500 }, "tam_Taml-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 182661, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 182661 + "unique_sentence2": 1500 }, "tam_Taml-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 175905, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 175905 + "unique_sentence2": 1490 }, "tam_Taml-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 170972, + "unique_pairs": 1502, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 170972 + "unique_sentence2": 1495 }, "tam_Taml-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 174587, + "unique_pairs": 1503, "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, "max_sentence1_length": 224, + "unique_sentence1": 1492, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 174587 + "unique_sentence2": 1498 }, "tel_Telu-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 157690, + "unique_pairs": 1499, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 157690 + "unique_sentence2": 1497 }, "tel_Telu-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 152094, + "unique_pairs": 1501, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 152094 + "unique_sentence2": 1497 }, "tel_Telu-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 158150, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 158150 + "unique_sentence2": 1498 }, "tel_Telu-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 163138, + "unique_pairs": 1501, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 163138 + "unique_sentence2": 1499 }, "tel_Telu-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 156822, + "unique_pairs": 1500, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 156822 + "unique_sentence2": 1497 }, "tel_Telu-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152388, + "unique_pairs": 1501, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 152388 + "unique_sentence2": 1500 }, "tel_Telu-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 154375, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 154375 + "unique_sentence2": 1500 }, "tel_Telu-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 156070, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 156070 + "unique_sentence2": 1497 }, "tel_Telu-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 161283, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 161283 + "unique_sentence2": 1499 }, "tel_Telu-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 160787, + "unique_pairs": 1503, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 160787 + "unique_sentence2": 1502 }, "tel_Telu-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 158514, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 158514 + "unique_sentence2": 1499 }, "tel_Telu-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 168944, + "unique_pairs": 1500, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 168944 + "unique_sentence2": 1495 }, "tel_Telu-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 158853, + "unique_pairs": 1503, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 158853 + "unique_sentence2": 1501 }, "tel_Telu-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 153422, + "unique_pairs": 1501, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 153422 + "unique_sentence2": 1498 }, "tel_Telu-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 157012, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 157012 + "unique_sentence2": 1497 }, "tel_Telu-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 160329, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 160329 + "unique_sentence2": 1500 }, "tel_Telu-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 156307, + "unique_pairs": 1499, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 156307 + "unique_sentence2": 1495 }, "tel_Telu-san_Deva": { + "num_samples": 1503, + "number_of_characters": 154199, + "unique_pairs": 1501, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 154199 + "unique_sentence2": 1500 }, "tel_Telu-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 165485, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 165485 + "unique_sentence2": 1500 }, "tel_Telu-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1499, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 158729 + "unique_sentence2": 1490 }, "tel_Telu-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 170972, + "unique_pairs": 1502, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 170972 + "unique_sentence2": 1492 }, "tel_Telu-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157411, + "unique_pairs": 1499, "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, "max_sentence1_length": 182, + "unique_sentence1": 1495, "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, "max_sentence2_length": 206, - "num_samples": 1503, - "number_of_characters": 157411 + "unique_sentence2": 1498 }, "urd_Arab-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 161305, + "unique_pairs": 1498, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, "max_sentence2_length": 208, - "num_samples": 1503, - "number_of_characters": 161305 + "unique_sentence2": 1497 }, "urd_Arab-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155709, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, "max_sentence2_length": 178, - "num_samples": 1503, - "number_of_characters": 155709 + "unique_sentence2": 1497 }, "urd_Arab-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 161765, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, "max_sentence2_length": 210, - "num_samples": 1503, - "number_of_characters": 161765 + "unique_sentence2": 1498 }, "urd_Arab-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 166753, + "unique_pairs": 1500, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, "max_sentence2_length": 209, - "num_samples": 1503, - "number_of_characters": 166753 + "unique_sentence2": 1499 }, "urd_Arab-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 160437, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 160437 + "unique_sentence2": 1497 }, "urd_Arab-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 156003, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 156003 + "unique_sentence2": 1500 }, "urd_Arab-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 157990, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, "max_sentence2_length": 205, - "num_samples": 1503, - "number_of_characters": 157990 + "unique_sentence2": 1500 }, "urd_Arab-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159685, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, "max_sentence2_length": 192, - "num_samples": 1503, - "number_of_characters": 159685 + "unique_sentence2": 1497 }, "urd_Arab-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 164898, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, "max_sentence2_length": 201, - "num_samples": 1503, - "number_of_characters": 164898 + "unique_sentence2": 1499 }, "urd_Arab-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 164402, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, "max_sentence2_length": 203, - "num_samples": 1503, - "number_of_characters": 164402 + "unique_sentence2": 1502 }, "urd_Arab-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 162129, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, "max_sentence2_length": 230, - "num_samples": 1503, - "number_of_characters": 162129 + "unique_sentence2": 1499 }, "urd_Arab-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 172559, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, "max_sentence2_length": 219, - "num_samples": 1503, - "number_of_characters": 172559 + "unique_sentence2": 1495 }, "urd_Arab-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 162468, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 162468 + "unique_sentence2": 1501 }, "urd_Arab-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 157037, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, "max_sentence2_length": 239, - "num_samples": 1503, - "number_of_characters": 157037 + "unique_sentence2": 1498 }, "urd_Arab-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 160627, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, "max_sentence2_length": 223, - "num_samples": 1503, - "number_of_characters": 160627 + "unique_sentence2": 1497 }, "urd_Arab-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 163944, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 163944 + "unique_sentence2": 1500 }, "urd_Arab-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 159922, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, "max_sentence2_length": 221, - "num_samples": 1503, - "number_of_characters": 159922 + "unique_sentence2": 1495 }, "urd_Arab-san_Deva": { + "num_samples": 1503, + "number_of_characters": 157814, + "unique_pairs": 1501, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, "max_sentence2_length": 181, - "num_samples": 1503, - "number_of_characters": 157814 + "unique_sentence2": 1500 }, "urd_Arab-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 169100, + "unique_pairs": 1502, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, "max_sentence2_length": 225, - "num_samples": 1503, - "number_of_characters": 169100 + "unique_sentence2": 1500 }, "urd_Arab-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 162344, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, "max_sentence2_length": 195, - "num_samples": 1503, - "number_of_characters": 162344 + "unique_sentence2": 1490 }, "urd_Arab-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 174587, + "unique_pairs": 1503, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, "max_sentence2_length": 224, - "num_samples": 1503, - "number_of_characters": 174587 + "unique_sentence2": 1492 }, "urd_Arab-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 157411, + "unique_pairs": 1499, "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, "max_sentence1_length": 206, + "unique_sentence1": 1498, "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, "max_sentence2_length": 182, - "num_samples": 1503, - "number_of_characters": 157411 + "unique_sentence2": 1495 } } } diff --git a/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json b/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json new file mode 100644 index 000000000..41741d3da --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json @@ -0,0 +1,5581 @@ +{ + "test": { + "min_sentence1_length": 9, + "average_sentence1_length": 156.6821925951087, + "max_sentence1_length": 692, + "min_sentence2_length": 9, + "average_sentence2_length": 156.6821925951087, + "max_sentence2_length": 692, + "num_samples": 518144, + "num_samples_sentence2": 518144, + "number_of_characters": 162367876, + "hf_subset_descriptive_stats": { + "asm_Beng-ben_Beng": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310622 + }, + "asm_Beng-brx_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323609 + }, + "asm_Beng-doi_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319020 + }, + "asm_Beng-eng_Latn": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320098 + }, + "asm_Beng-gom_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312594 + }, + "asm_Beng-guj_Gujr": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309440 + }, + "asm_Beng-hin_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320106 + }, + "asm_Beng-kan_Knda": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 332064 + }, + "asm_Beng-kas_Arab": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 322764 + }, + "asm_Beng-mai_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308682 + }, + "asm_Beng-mal_Mlym": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 343636 + }, + "asm_Beng-mar_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321784 + }, + "asm_Beng-mni_Mtei": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313134 + }, + "asm_Beng-npi_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313419 + }, + "asm_Beng-ory_Orya": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 334226 + }, + "asm_Beng-pan_Guru": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306863 + }, + "asm_Beng-san_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318079 + }, + "asm_Beng-sat_Olck": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 326732 + }, + "asm_Beng-snd_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320421 + }, + "asm_Beng-tam_Taml": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 348346 + }, + "asm_Beng-tel_Telu": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319045 + }, + "asm_Beng-urd_Arab": { + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 315134 + }, + "ben_Beng-asm_Beng": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310622 + }, + "ben_Beng-brx_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313313 + }, + "ben_Beng-doi_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308724 + }, + "ben_Beng-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309802 + }, + "ben_Beng-gom_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 302298 + }, + "ben_Beng-guj_Gujr": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 299144 + }, + "ben_Beng-hin_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309810 + }, + "ben_Beng-kan_Knda": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321768 + }, + "ben_Beng-kas_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312468 + }, + "ben_Beng-mai_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 298386 + }, + "ben_Beng-mal_Mlym": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 333340 + }, + "ben_Beng-mar_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311488 + }, + "ben_Beng-mni_Mtei": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 302838 + }, + "ben_Beng-npi_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 303123 + }, + "ben_Beng-ory_Orya": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323930 + }, + "ben_Beng-pan_Guru": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 296567 + }, + "ben_Beng-san_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307783 + }, + "ben_Beng-sat_Olck": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316436 + }, + "ben_Beng-snd_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310125 + }, + "ben_Beng-tam_Taml": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 338050 + }, + "ben_Beng-tel_Telu": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308749 + }, + "ben_Beng-urd_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 304838 + }, + "brx_Deva-asm_Beng": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323609 + }, + "brx_Deva-ben_Beng": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313313 + }, + "brx_Deva-doi_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321711 + }, + "brx_Deva-eng_Latn": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 322789 + }, + "brx_Deva-gom_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 315285 + }, + "brx_Deva-guj_Gujr": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312131 + }, + "brx_Deva-hin_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 322797 + }, + "brx_Deva-kan_Knda": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 334755 + }, + "brx_Deva-kas_Arab": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 325455 + }, + "brx_Deva-mai_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311373 + }, + "brx_Deva-mal_Mlym": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 346327 + }, + "brx_Deva-mar_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 324475 + }, + "brx_Deva-mni_Mtei": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 315825 + }, + "brx_Deva-npi_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316110 + }, + "brx_Deva-ory_Orya": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 336917 + }, + "brx_Deva-pan_Guru": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309554 + }, + "brx_Deva-san_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320770 + }, + "brx_Deva-sat_Olck": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 329423 + }, + "brx_Deva-snd_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323112 + }, + "brx_Deva-tam_Taml": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 351037 + }, + "brx_Deva-tel_Telu": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321736 + }, + "brx_Deva-urd_Arab": { + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 317825 + }, + "doi_Deva-asm_Beng": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319020 + }, + "doi_Deva-ben_Beng": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308724 + }, + "doi_Deva-brx_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321711 + }, + "doi_Deva-eng_Latn": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318200 + }, + "doi_Deva-gom_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310696 + }, + "doi_Deva-guj_Gujr": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307542 + }, + "doi_Deva-hin_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318208 + }, + "doi_Deva-kan_Knda": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 330166 + }, + "doi_Deva-kas_Arab": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320866 + }, + "doi_Deva-mai_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306784 + }, + "doi_Deva-mal_Mlym": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 341738 + }, + "doi_Deva-mar_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319886 + }, + "doi_Deva-mni_Mtei": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311236 + }, + "doi_Deva-npi_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311521 + }, + "doi_Deva-ory_Orya": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 332328 + }, + "doi_Deva-pan_Guru": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 304965 + }, + "doi_Deva-san_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316181 + }, + "doi_Deva-sat_Olck": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 324834 + }, + "doi_Deva-snd_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318523 + }, + "doi_Deva-tam_Taml": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 346448 + }, + "doi_Deva-tel_Telu": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 317147 + }, + "doi_Deva-urd_Arab": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313236 + }, + "eng_Latn-asm_Beng": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320098 + }, + "eng_Latn-ben_Beng": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309802 + }, + "eng_Latn-brx_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 322789 + }, + "eng_Latn-doi_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318200 + }, + "eng_Latn-gom_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311774 + }, + "eng_Latn-guj_Gujr": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308620 + }, + "eng_Latn-hin_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319286 + }, + "eng_Latn-kan_Knda": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 331244 + }, + "eng_Latn-kas_Arab": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321944 + }, + "eng_Latn-mai_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307862 + }, + "eng_Latn-mal_Mlym": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 342816 + }, + "eng_Latn-mar_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320964 + }, + "eng_Latn-mni_Mtei": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312314 + }, + "eng_Latn-npi_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312599 + }, + "eng_Latn-ory_Orya": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 333406 + }, + "eng_Latn-pan_Guru": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306043 + }, + "eng_Latn-san_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 317259 + }, + "eng_Latn-sat_Olck": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 325912 + }, + "eng_Latn-snd_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319601 + }, + "eng_Latn-tam_Taml": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 347526 + }, + "eng_Latn-tel_Telu": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318225 + }, + "eng_Latn-urd_Arab": { + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314314 + }, + "gom_Deva-asm_Beng": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312594 + }, + "gom_Deva-ben_Beng": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 302298 + }, + "gom_Deva-brx_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 315285 + }, + "gom_Deva-doi_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310696 + }, + "gom_Deva-eng_Latn": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311774 + }, + "gom_Deva-guj_Gujr": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 301116 + }, + "gom_Deva-hin_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311782 + }, + "gom_Deva-kan_Knda": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323740 + }, + "gom_Deva-kas_Arab": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314440 + }, + "gom_Deva-mai_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 300358 + }, + "gom_Deva-mal_Mlym": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 335312 + }, + "gom_Deva-mar_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313460 + }, + "gom_Deva-mni_Mtei": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 304810 + }, + "gom_Deva-npi_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 305095 + }, + "gom_Deva-ory_Orya": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 325902 + }, + "gom_Deva-pan_Guru": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 298539 + }, + "gom_Deva-san_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309755 + }, + "gom_Deva-sat_Olck": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318408 + }, + "gom_Deva-snd_Deva": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312097 + }, + "gom_Deva-tam_Taml": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 340022 + }, + "gom_Deva-tel_Telu": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310721 + }, + "gom_Deva-urd_Arab": { + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306810 + }, + "guj_Gujr-asm_Beng": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309440 + }, + "guj_Gujr-ben_Beng": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 299144 + }, + "guj_Gujr-brx_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312131 + }, + "guj_Gujr-doi_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307542 + }, + "guj_Gujr-eng_Latn": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308620 + }, + "guj_Gujr-gom_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 301116 + }, + "guj_Gujr-hin_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308628 + }, + "guj_Gujr-kan_Knda": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320586 + }, + "guj_Gujr-kas_Arab": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311286 + }, + "guj_Gujr-mai_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 297204 + }, + "guj_Gujr-mal_Mlym": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 332158 + }, + "guj_Gujr-mar_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310306 + }, + "guj_Gujr-mni_Mtei": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 301656 + }, + "guj_Gujr-npi_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 301941 + }, + "guj_Gujr-ory_Orya": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 322748 + }, + "guj_Gujr-pan_Guru": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 295385 + }, + "guj_Gujr-san_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306601 + }, + "guj_Gujr-sat_Olck": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 315254 + }, + "guj_Gujr-snd_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308943 + }, + "guj_Gujr-tam_Taml": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 336868 + }, + "guj_Gujr-tel_Telu": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307567 + }, + "guj_Gujr-urd_Arab": { + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 303656 + }, + "hin_Deva-asm_Beng": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320106 + }, + "hin_Deva-ben_Beng": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309810 + }, + "hin_Deva-brx_Deva": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 322797 + }, + "hin_Deva-doi_Deva": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318208 + }, + "hin_Deva-eng_Latn": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319286 + }, + "hin_Deva-gom_Deva": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311782 + }, + "hin_Deva-guj_Gujr": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308628 + }, + "hin_Deva-kan_Knda": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 331252 + }, + "hin_Deva-kas_Arab": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321952 + }, + "hin_Deva-mai_Deva": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307870 + }, + "hin_Deva-mal_Mlym": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 342824 + }, + "hin_Deva-mar_Deva": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320972 + }, + "hin_Deva-mni_Mtei": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312322 + }, + "hin_Deva-npi_Deva": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312607 + }, + "hin_Deva-ory_Orya": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 333414 + }, + "hin_Deva-pan_Guru": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306051 + }, + "hin_Deva-san_Deva": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 317267 + }, + "hin_Deva-sat_Olck": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 325920 + }, + "hin_Deva-snd_Deva": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319609 + }, + "hin_Deva-tam_Taml": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 347534 + }, + "hin_Deva-tel_Telu": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318233 + }, + "hin_Deva-urd_Arab": { + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314322 + }, + "kan_Knda-asm_Beng": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 332064 + }, + "kan_Knda-ben_Beng": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321768 + }, + "kan_Knda-brx_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 334755 + }, + "kan_Knda-doi_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 330166 + }, + "kan_Knda-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 331244 + }, + "kan_Knda-gom_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323740 + }, + "kan_Knda-guj_Gujr": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320586 + }, + "kan_Knda-hin_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 331252 + }, + "kan_Knda-kas_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 333910 + }, + "kan_Knda-mai_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319828 + }, + "kan_Knda-mal_Mlym": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 354782 + }, + "kan_Knda-mar_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 332930 + }, + "kan_Knda-mni_Mtei": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 324280 + }, + "kan_Knda-npi_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 324565 + }, + "kan_Knda-ory_Orya": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 345372 + }, + "kan_Knda-pan_Guru": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318009 + }, + "kan_Knda-san_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 329225 + }, + "kan_Knda-sat_Olck": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 337878 + }, + "kan_Knda-snd_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 331567 + }, + "kan_Knda-tam_Taml": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 359492 + }, + "kan_Knda-tel_Telu": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 330191 + }, + "kan_Knda-urd_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 326280 + }, + "kas_Arab-asm_Beng": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 322764 + }, + "kas_Arab-ben_Beng": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312468 + }, + "kas_Arab-brx_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 325455 + }, + "kas_Arab-doi_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320866 + }, + "kas_Arab-eng_Latn": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321944 + }, + "kas_Arab-gom_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314440 + }, + "kas_Arab-guj_Gujr": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311286 + }, + "kas_Arab-hin_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321952 + }, + "kas_Arab-kan_Knda": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 333910 + }, + "kas_Arab-mai_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310528 + }, + "kas_Arab-mal_Mlym": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 345482 + }, + "kas_Arab-mar_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323630 + }, + "kas_Arab-mni_Mtei": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314980 + }, + "kas_Arab-npi_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 315265 + }, + "kas_Arab-ory_Orya": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 336072 + }, + "kas_Arab-pan_Guru": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308709 + }, + "kas_Arab-san_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319925 + }, + "kas_Arab-sat_Olck": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 328578 + }, + "kas_Arab-snd_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 322267 + }, + "kas_Arab-tam_Taml": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 350192 + }, + "kas_Arab-tel_Telu": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320891 + }, + "kas_Arab-urd_Arab": { + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316980 + }, + "mai_Deva-asm_Beng": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308682 + }, + "mai_Deva-ben_Beng": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 298386 + }, + "mai_Deva-brx_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311373 + }, + "mai_Deva-doi_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306784 + }, + "mai_Deva-eng_Latn": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307862 + }, + "mai_Deva-gom_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 300358 + }, + "mai_Deva-guj_Gujr": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 297204 + }, + "mai_Deva-hin_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307870 + }, + "mai_Deva-kan_Knda": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319828 + }, + "mai_Deva-kas_Arab": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310528 + }, + "mai_Deva-mal_Mlym": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 331400 + }, + "mai_Deva-mar_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309548 + }, + "mai_Deva-mni_Mtei": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 300898 + }, + "mai_Deva-npi_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 301183 + }, + "mai_Deva-ory_Orya": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321990 + }, + "mai_Deva-pan_Guru": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 294627 + }, + "mai_Deva-san_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 305843 + }, + "mai_Deva-sat_Olck": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314496 + }, + "mai_Deva-snd_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308185 + }, + "mai_Deva-tam_Taml": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 336110 + }, + "mai_Deva-tel_Telu": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306809 + }, + "mai_Deva-urd_Arab": { + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 302898 + }, + "mal_Mlym-asm_Beng": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 343636 + }, + "mal_Mlym-ben_Beng": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 333340 + }, + "mal_Mlym-brx_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 346327 + }, + "mal_Mlym-doi_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 341738 + }, + "mal_Mlym-eng_Latn": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 342816 + }, + "mal_Mlym-gom_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 335312 + }, + "mal_Mlym-guj_Gujr": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 332158 + }, + "mal_Mlym-hin_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 342824 + }, + "mal_Mlym-kan_Knda": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 354782 + }, + "mal_Mlym-kas_Arab": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 345482 + }, + "mal_Mlym-mai_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 331400 + }, + "mal_Mlym-mar_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 344502 + }, + "mal_Mlym-mni_Mtei": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 335852 + }, + "mal_Mlym-npi_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 336137 + }, + "mal_Mlym-ory_Orya": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 356944 + }, + "mal_Mlym-pan_Guru": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 329581 + }, + "mal_Mlym-san_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 340797 + }, + "mal_Mlym-sat_Olck": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 349450 + }, + "mal_Mlym-snd_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 343139 + }, + "mal_Mlym-tam_Taml": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 371064 + }, + "mal_Mlym-tel_Telu": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 341763 + }, + "mal_Mlym-urd_Arab": { + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 337852 + }, + "mar_Deva-asm_Beng": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321784 + }, + "mar_Deva-ben_Beng": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311488 + }, + "mar_Deva-brx_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 324475 + }, + "mar_Deva-doi_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319886 + }, + "mar_Deva-eng_Latn": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320964 + }, + "mar_Deva-gom_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313460 + }, + "mar_Deva-guj_Gujr": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310306 + }, + "mar_Deva-hin_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320972 + }, + "mar_Deva-kan_Knda": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 332930 + }, + "mar_Deva-kas_Arab": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323630 + }, + "mar_Deva-mai_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309548 + }, + "mar_Deva-mal_Mlym": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 344502 + }, + "mar_Deva-mni_Mtei": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314000 + }, + "mar_Deva-npi_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314285 + }, + "mar_Deva-ory_Orya": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 335092 + }, + "mar_Deva-pan_Guru": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307729 + }, + "mar_Deva-san_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318945 + }, + "mar_Deva-sat_Olck": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 327598 + }, + "mar_Deva-snd_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321287 + }, + "mar_Deva-tam_Taml": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 349212 + }, + "mar_Deva-tel_Telu": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319911 + }, + "mar_Deva-urd_Arab": { + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316000 + }, + "mni_Mtei-asm_Beng": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313134 + }, + "mni_Mtei-ben_Beng": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 302838 + }, + "mni_Mtei-brx_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 315825 + }, + "mni_Mtei-doi_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311236 + }, + "mni_Mtei-eng_Latn": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312314 + }, + "mni_Mtei-gom_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 304810 + }, + "mni_Mtei-guj_Gujr": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 301656 + }, + "mni_Mtei-hin_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312322 + }, + "mni_Mtei-kan_Knda": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 324280 + }, + "mni_Mtei-kas_Arab": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314980 + }, + "mni_Mtei-mai_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 300898 + }, + "mni_Mtei-mal_Mlym": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 335852 + }, + "mni_Mtei-mar_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314000 + }, + "mni_Mtei-npi_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 305635 + }, + "mni_Mtei-ory_Orya": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 326442 + }, + "mni_Mtei-pan_Guru": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 299079 + }, + "mni_Mtei-san_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310295 + }, + "mni_Mtei-sat_Olck": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318948 + }, + "mni_Mtei-snd_Deva": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312637 + }, + "mni_Mtei-tam_Taml": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 340562 + }, + "mni_Mtei-tel_Telu": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311261 + }, + "mni_Mtei-urd_Arab": { + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307350 + }, + "npi_Deva-asm_Beng": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313419 + }, + "npi_Deva-ben_Beng": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 303123 + }, + "npi_Deva-brx_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316110 + }, + "npi_Deva-doi_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311521 + }, + "npi_Deva-eng_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312599 + }, + "npi_Deva-gom_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 305095 + }, + "npi_Deva-guj_Gujr": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 301941 + }, + "npi_Deva-hin_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312607 + }, + "npi_Deva-kan_Knda": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 324565 + }, + "npi_Deva-kas_Arab": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 315265 + }, + "npi_Deva-mai_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 301183 + }, + "npi_Deva-mal_Mlym": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 336137 + }, + "npi_Deva-mar_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314285 + }, + "npi_Deva-mni_Mtei": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 305635 + }, + "npi_Deva-ory_Orya": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 326727 + }, + "npi_Deva-pan_Guru": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 299364 + }, + "npi_Deva-san_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310580 + }, + "npi_Deva-sat_Olck": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319233 + }, + "npi_Deva-snd_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312922 + }, + "npi_Deva-tam_Taml": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 340847 + }, + "npi_Deva-tel_Telu": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311546 + }, + "npi_Deva-urd_Arab": { + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307635 + }, + "ory_Orya-asm_Beng": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 334226 + }, + "ory_Orya-ben_Beng": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323930 + }, + "ory_Orya-brx_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 336917 + }, + "ory_Orya-doi_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 332328 + }, + "ory_Orya-eng_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 333406 + }, + "ory_Orya-gom_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 325902 + }, + "ory_Orya-guj_Gujr": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 322748 + }, + "ory_Orya-hin_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 333414 + }, + "ory_Orya-kan_Knda": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 345372 + }, + "ory_Orya-kas_Arab": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 336072 + }, + "ory_Orya-mai_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321990 + }, + "ory_Orya-mal_Mlym": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 356944 + }, + "ory_Orya-mar_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 335092 + }, + "ory_Orya-mni_Mtei": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 326442 + }, + "ory_Orya-npi_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 326727 + }, + "ory_Orya-pan_Guru": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320171 + }, + "ory_Orya-san_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 331387 + }, + "ory_Orya-sat_Olck": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 340040 + }, + "ory_Orya-snd_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 333729 + }, + "ory_Orya-tam_Taml": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 361654 + }, + "ory_Orya-tel_Telu": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 332353 + }, + "ory_Orya-urd_Arab": { + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 328442 + }, + "pan_Guru-asm_Beng": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306863 + }, + "pan_Guru-ben_Beng": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 296567 + }, + "pan_Guru-brx_Deva": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309554 + }, + "pan_Guru-doi_Deva": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 304965 + }, + "pan_Guru-eng_Latn": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306043 + }, + "pan_Guru-gom_Deva": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 298539 + }, + "pan_Guru-guj_Gujr": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 295385 + }, + "pan_Guru-hin_Deva": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306051 + }, + "pan_Guru-kan_Knda": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318009 + }, + "pan_Guru-kas_Arab": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308709 + }, + "pan_Guru-mai_Deva": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 294627 + }, + "pan_Guru-mal_Mlym": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 329581 + }, + "pan_Guru-mar_Deva": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307729 + }, + "pan_Guru-mni_Mtei": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 299079 + }, + "pan_Guru-npi_Deva": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 299364 + }, + "pan_Guru-ory_Orya": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320171 + }, + "pan_Guru-san_Deva": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 304024 + }, + "pan_Guru-sat_Olck": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312677 + }, + "pan_Guru-snd_Deva": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306366 + }, + "pan_Guru-tam_Taml": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 334291 + }, + "pan_Guru-tel_Telu": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 304990 + }, + "pan_Guru-urd_Arab": { + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 301079 + }, + "san_Deva-asm_Beng": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318079 + }, + "san_Deva-ben_Beng": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307783 + }, + "san_Deva-brx_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320770 + }, + "san_Deva-doi_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316181 + }, + "san_Deva-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 317259 + }, + "san_Deva-gom_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 309755 + }, + "san_Deva-guj_Gujr": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306601 + }, + "san_Deva-hin_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 317267 + }, + "san_Deva-kan_Knda": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 329225 + }, + "san_Deva-kas_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319925 + }, + "san_Deva-mai_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 305843 + }, + "san_Deva-mal_Mlym": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 340797 + }, + "san_Deva-mar_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318945 + }, + "san_Deva-mni_Mtei": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310295 + }, + "san_Deva-npi_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310580 + }, + "san_Deva-ory_Orya": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 331387 + }, + "san_Deva-pan_Guru": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 304024 + }, + "san_Deva-sat_Olck": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323893 + }, + "san_Deva-snd_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 317582 + }, + "san_Deva-tam_Taml": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 345507 + }, + "san_Deva-tel_Telu": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316206 + }, + "san_Deva-urd_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312295 + }, + "sat_Olck-asm_Beng": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 326732 + }, + "sat_Olck-ben_Beng": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316436 + }, + "sat_Olck-brx_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 329423 + }, + "sat_Olck-doi_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 324834 + }, + "sat_Olck-eng_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 325912 + }, + "sat_Olck-gom_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318408 + }, + "sat_Olck-guj_Gujr": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 315254 + }, + "sat_Olck-hin_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 325920 + }, + "sat_Olck-kan_Knda": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 337878 + }, + "sat_Olck-kas_Arab": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 328578 + }, + "sat_Olck-mai_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314496 + }, + "sat_Olck-mal_Mlym": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 349450 + }, + "sat_Olck-mar_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 327598 + }, + "sat_Olck-mni_Mtei": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318948 + }, + "sat_Olck-npi_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319233 + }, + "sat_Olck-ory_Orya": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 340040 + }, + "sat_Olck-pan_Guru": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312677 + }, + "sat_Olck-san_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323893 + }, + "sat_Olck-snd_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 326235 + }, + "sat_Olck-tam_Taml": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 354160 + }, + "sat_Olck-tel_Telu": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 324859 + }, + "sat_Olck-urd_Arab": { + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320948 + }, + "snd_Deva-asm_Beng": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320421 + }, + "snd_Deva-ben_Beng": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310125 + }, + "snd_Deva-brx_Deva": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 323112 + }, + "snd_Deva-doi_Deva": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318523 + }, + "snd_Deva-eng_Latn": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319601 + }, + "snd_Deva-gom_Deva": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312097 + }, + "snd_Deva-guj_Gujr": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308943 + }, + "snd_Deva-hin_Deva": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319609 + }, + "snd_Deva-kan_Knda": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 331567 + }, + "snd_Deva-kas_Arab": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 322267 + }, + "snd_Deva-mai_Deva": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308185 + }, + "snd_Deva-mal_Mlym": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 343139 + }, + "snd_Deva-mar_Deva": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321287 + }, + "snd_Deva-mni_Mtei": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312637 + }, + "snd_Deva-npi_Deva": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312922 + }, + "snd_Deva-ory_Orya": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 333729 + }, + "snd_Deva-pan_Guru": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306366 + }, + "snd_Deva-san_Deva": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 317582 + }, + "snd_Deva-sat_Olck": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 326235 + }, + "snd_Deva-tam_Taml": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 347849 + }, + "snd_Deva-tel_Telu": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318548 + }, + "snd_Deva-urd_Arab": { + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314637 + }, + "tam_Taml-asm_Beng": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 348346 + }, + "tam_Taml-ben_Beng": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 338050 + }, + "tam_Taml-brx_Deva": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 351037 + }, + "tam_Taml-doi_Deva": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 346448 + }, + "tam_Taml-eng_Latn": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 347526 + }, + "tam_Taml-gom_Deva": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 340022 + }, + "tam_Taml-guj_Gujr": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 336868 + }, + "tam_Taml-hin_Deva": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 347534 + }, + "tam_Taml-kan_Knda": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 359492 + }, + "tam_Taml-kas_Arab": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 350192 + }, + "tam_Taml-mai_Deva": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 336110 + }, + "tam_Taml-mal_Mlym": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 371064 + }, + "tam_Taml-mar_Deva": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 349212 + }, + "tam_Taml-mni_Mtei": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 340562 + }, + "tam_Taml-npi_Deva": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 340847 + }, + "tam_Taml-ory_Orya": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 361654 + }, + "tam_Taml-pan_Guru": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 334291 + }, + "tam_Taml-san_Deva": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 345507 + }, + "tam_Taml-sat_Olck": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 354160 + }, + "tam_Taml-snd_Deva": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 347849 + }, + "tam_Taml-tel_Telu": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 346473 + }, + "tam_Taml-urd_Arab": { + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 342562 + }, + "tel_Telu-asm_Beng": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319045 + }, + "tel_Telu-ben_Beng": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 308749 + }, + "tel_Telu-brx_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 321736 + }, + "tel_Telu-doi_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 317147 + }, + "tel_Telu-eng_Latn": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318225 + }, + "tel_Telu-gom_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 310721 + }, + "tel_Telu-guj_Gujr": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307567 + }, + "tel_Telu-hin_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318233 + }, + "tel_Telu-kan_Knda": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 330191 + }, + "tel_Telu-kas_Arab": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320891 + }, + "tel_Telu-mai_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306809 + }, + "tel_Telu-mal_Mlym": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 341763 + }, + "tel_Telu-mar_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 319911 + }, + "tel_Telu-mni_Mtei": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311261 + }, + "tel_Telu-npi_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 311546 + }, + "tel_Telu-ory_Orya": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 332353 + }, + "tel_Telu-pan_Guru": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 304990 + }, + "tel_Telu-san_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316206 + }, + "tel_Telu-sat_Olck": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 324859 + }, + "tel_Telu-snd_Deva": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 318548 + }, + "tel_Telu-tam_Taml": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 346473 + }, + "tel_Telu-urd_Arab": { + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313261 + }, + "urd_Arab-asm_Beng": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 315134 + }, + "urd_Arab-ben_Beng": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 304838 + }, + "urd_Arab-brx_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 317825 + }, + "urd_Arab-doi_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313236 + }, + "urd_Arab-eng_Latn": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314314 + }, + "urd_Arab-gom_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 306810 + }, + "urd_Arab-guj_Gujr": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 303656 + }, + "urd_Arab-hin_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314322 + }, + "urd_Arab-kan_Knda": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 326280 + }, + "urd_Arab-kas_Arab": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316980 + }, + "urd_Arab-mai_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 302898 + }, + "urd_Arab-mal_Mlym": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 337852 + }, + "urd_Arab-mar_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 316000 + }, + "urd_Arab-mni_Mtei": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307350 + }, + "urd_Arab-npi_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 307635 + }, + "urd_Arab-ory_Orya": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 328442 + }, + "urd_Arab-pan_Guru": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 301079 + }, + "urd_Arab-san_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 312295 + }, + "urd_Arab-sat_Olck": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 320948 + }, + "urd_Arab-snd_Deva": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 314637 + }, + "urd_Arab-tam_Taml": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 342562 + }, + "urd_Arab-tel_Telu": { + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "num_samples": 1024, + "num_samples_sentence2": 1024, + "number_of_characters": 313261 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json b/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json new file mode 100644 index 000000000..40451d3b5 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json @@ -0,0 +1,279 @@ +{ + "validation": { + "min_sentence1_length": 2, + "average_sentence1_length": 97.0061992889051, + "max_sentence1_length": 521, + "min_sentence2_length": 2, + "average_sentence2_length": 97.0061992889051, + "max_sentence2_length": 521, + "num_samples": 21938, + "num_samples_sentence2": 21938, + "number_of_characters": 4256244, + "hf_subset_descriptive_stats": { + "ar-en": { + "min_sentence1_length": 4, + "average_sentence1_length": 85.48873873873873, + "max_sentence1_length": 369, + "min_sentence2_length": 10, + "average_sentence2_length": 108.76689189189189, + "max_sentence2_length": 462, + "num_samples": 888, + "num_samples_sentence2": 888, + "number_of_characters": 172499 + }, + "de-en": { + "min_sentence1_length": 6, + "average_sentence1_length": 119.02702702702703, + "max_sentence1_length": 521, + "min_sentence2_length": 10, + "average_sentence2_length": 108.82882882882883, + "max_sentence2_length": 462, + "num_samples": 888, + "num_samples_sentence2": 888, + "number_of_characters": 202336 + }, + "en-ar": { + "min_sentence1_length": 10, + "average_sentence1_length": 108.76689189189189, + "max_sentence1_length": 462, + "min_sentence2_length": 4, + "average_sentence2_length": 85.48873873873873, + "max_sentence2_length": 369, + "num_samples": 888, + "num_samples_sentence2": 888, + "number_of_characters": 172499 + }, + "en-de": { + "min_sentence1_length": 10, + "average_sentence1_length": 108.82882882882883, + "max_sentence1_length": 462, + "min_sentence2_length": 6, + "average_sentence2_length": 119.02702702702703, + "max_sentence2_length": 521, + "num_samples": 888, + "num_samples_sentence2": 888, + "number_of_characters": 202336 + }, + "en-fr": { + "min_sentence1_length": 10, + "average_sentence1_length": 108.4123595505618, + "max_sentence1_length": 462, + "min_sentence2_length": 6, + "average_sentence2_length": 113.63146067415731, + "max_sentence2_length": 493, + "num_samples": 890, + "num_samples_sentence2": 890, + "number_of_characters": 197619 + }, + "en-it": { + "min_sentence1_length": 10, + "average_sentence1_length": 103.0010764262648, + "max_sentence1_length": 433, + "min_sentence2_length": 7, + "average_sentence2_length": 103.46071044133477, + "max_sentence2_length": 444, + "num_samples": 929, + "num_samples_sentence2": 929, + "number_of_characters": 191803 + }, + "en-ja": { + "min_sentence1_length": 10, + "average_sentence1_length": 109.80826636050517, + "max_sentence1_length": 462, + "min_sentence2_length": 5, + "average_sentence2_length": 42.59357060849598, + "max_sentence2_length": 225, + "num_samples": 871, + "num_samples_sentence2": 871, + "number_of_characters": 132742 + }, + "en-ko": { + "min_sentence1_length": 10, + "average_sentence1_length": 107.74175199089875, + "max_sentence1_length": 462, + "min_sentence2_length": 3, + "average_sentence2_length": 54.5551763367463, + "max_sentence2_length": 250, + "num_samples": 879, + "num_samples_sentence2": 879, + "number_of_characters": 142659 + }, + "en-nl": { + "min_sentence1_length": 10, + "average_sentence1_length": 95.26819541375872, + "max_sentence1_length": 433, + "min_sentence2_length": 4, + "average_sentence2_length": 93.80159521435692, + "max_sentence2_length": 477, + "num_samples": 1003, + "num_samples_sentence2": 1003, + "number_of_characters": 189637 + }, + "en-ro": { + "min_sentence1_length": 10, + "average_sentence1_length": 104.72100656455142, + "max_sentence1_length": 433, + "min_sentence2_length": 9, + "average_sentence2_length": 107.67286652078775, + "max_sentence2_length": 448, + "num_samples": 914, + "num_samples_sentence2": 914, + "number_of_characters": 194128 + }, + "en-zh": { + "min_sentence1_length": 10, + "average_sentence1_length": 109.36518771331058, + "max_sentence1_length": 462, + "min_sentence2_length": 2, + "average_sentence2_length": 39.811149032992034, + "max_sentence2_length": 230, + "num_samples": 879, + "num_samples_sentence2": 879, + "number_of_characters": 131126 + }, + "fr-en": { + "min_sentence1_length": 6, + "average_sentence1_length": 113.63146067415731, + "max_sentence1_length": 493, + "min_sentence2_length": 10, + "average_sentence2_length": 108.4123595505618, + "max_sentence2_length": 462, + "num_samples": 890, + "num_samples_sentence2": 890, + "number_of_characters": 197619 + }, + "it-en": { + "min_sentence1_length": 7, + "average_sentence1_length": 103.46071044133477, + "max_sentence1_length": 444, + "min_sentence2_length": 10, + "average_sentence2_length": 103.0010764262648, + "max_sentence2_length": 433, + "num_samples": 929, + "num_samples_sentence2": 929, + "number_of_characters": 191803 + }, + "it-nl": { + "min_sentence1_length": 7, + "average_sentence1_length": 94.64235764235764, + "max_sentence1_length": 459, + "min_sentence2_length": 7, + "average_sentence2_length": 94.02697302697302, + "max_sentence2_length": 505, + "num_samples": 1001, + "num_samples_sentence2": 1001, + "number_of_characters": 188858 + }, + "it-ro": { + "min_sentence1_length": 7, + "average_sentence1_length": 103.90809628008753, + "max_sentence1_length": 435, + "min_sentence2_length": 9, + "average_sentence2_length": 107.62253829321664, + "max_sentence2_length": 448, + "num_samples": 914, + "num_samples_sentence2": 914, + "number_of_characters": 193339 + }, + "ja-en": { + "min_sentence1_length": 5, + "average_sentence1_length": 42.59357060849598, + "max_sentence1_length": 225, + "min_sentence2_length": 10, + "average_sentence2_length": 109.80826636050517, + "max_sentence2_length": 462, + "num_samples": 871, + "num_samples_sentence2": 871, + "number_of_characters": 132742 + }, + "ko-en": { + "min_sentence1_length": 3, + "average_sentence1_length": 54.5551763367463, + "max_sentence1_length": 250, + "min_sentence2_length": 10, + "average_sentence2_length": 107.74175199089875, + "max_sentence2_length": 462, + "num_samples": 879, + "num_samples_sentence2": 879, + "number_of_characters": 142659 + }, + "nl-en": { + "min_sentence1_length": 4, + "average_sentence1_length": 93.80159521435692, + "max_sentence1_length": 477, + "min_sentence2_length": 10, + "average_sentence2_length": 95.26819541375872, + "max_sentence2_length": 433, + "num_samples": 1003, + "num_samples_sentence2": 1003, + "number_of_characters": 189637 + }, + "nl-it": { + "min_sentence1_length": 7, + "average_sentence1_length": 94.02697302697302, + "max_sentence1_length": 505, + "min_sentence2_length": 7, + "average_sentence2_length": 94.64235764235764, + "max_sentence2_length": 459, + "num_samples": 1001, + "num_samples_sentence2": 1001, + "number_of_characters": 188858 + }, + "nl-ro": { + "min_sentence1_length": 7, + "average_sentence1_length": 102.01971522453451, + "max_sentence1_length": 478, + "min_sentence2_length": 9, + "average_sentence2_length": 107.59255202628697, + "max_sentence2_length": 515, + "num_samples": 913, + "num_samples_sentence2": 913, + "number_of_characters": 191376 + }, + "ro-en": { + "min_sentence1_length": 9, + "average_sentence1_length": 107.67286652078775, + "max_sentence1_length": 448, + "min_sentence2_length": 10, + "average_sentence2_length": 104.72100656455142, + "max_sentence2_length": 433, + "num_samples": 914, + "num_samples_sentence2": 914, + "number_of_characters": 194128 + }, + "ro-it": { + "min_sentence1_length": 9, + "average_sentence1_length": 107.62253829321664, + "max_sentence1_length": 448, + "min_sentence2_length": 7, + "average_sentence2_length": 103.90809628008753, + "max_sentence2_length": 435, + "num_samples": 914, + "num_samples_sentence2": 914, + "number_of_characters": 193339 + }, + "ro-nl": { + "min_sentence1_length": 9, + "average_sentence1_length": 107.59255202628697, + "max_sentence1_length": 515, + "min_sentence2_length": 7, + "average_sentence2_length": 102.01971522453451, + "max_sentence2_length": 478, + "num_samples": 913, + "num_samples_sentence2": 913, + "number_of_characters": 191376 + }, + "zh-en": { + "min_sentence1_length": 2, + "average_sentence1_length": 39.811149032992034, + "max_sentence1_length": 230, + "min_sentence2_length": 10, + "average_sentence2_length": 109.36518771331058, + "max_sentence2_length": 462, + "num_samples": 879, + "num_samples_sentence2": 879, + "number_of_characters": 131126 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json b/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json new file mode 100644 index 000000000..ac4678376 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json @@ -0,0 +1,1304 @@ +{ + "validation": { + "min_sentence1_length": 24, + "average_sentence1_length": 126.2541071490333, + "max_sentence1_length": 368, + "min_sentence2_length": 24, + "average_sentence2_length": 126.24390412617161, + "max_sentence2_length": 368, + "num_samples": 57826, + "num_samples_sentence2": 57826, + "number_of_characters": 14600950, + "hf_subset_descriptive_stats": { + "ben-eng": { + "min_sentence1_length": 30, + "average_sentence1_length": 123.64593781344033, + "max_sentence1_length": 320, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 248469 + }, + "eng-ben": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 30, + "average_sentence2_length": 123.64593781344033, + "max_sentence2_length": 320, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 248469 + }, + "guj-eng": { + "min_sentence1_length": 30, + "average_sentence1_length": 120.64493480441324, + "max_sentence1_length": 368, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 245477 + }, + "eng-guj": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 30, + "average_sentence2_length": 120.64493480441324, + "max_sentence2_length": 368, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 245477 + }, + "hin-eng": { + "min_sentence1_length": 31, + "average_sentence1_length": 125.75626880641926, + "max_sentence1_length": 355, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 250573 + }, + "eng-hin": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 31, + "average_sentence2_length": 125.74724172517553, + "max_sentence2_length": 355, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 250564 + }, + "kan-eng": { + "min_sentence1_length": 34, + "average_sentence1_length": 132.33400200601807, + "max_sentence1_length": 331, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 257131 + }, + "eng-kan": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 34, + "average_sentence2_length": 132.18856569709126, + "max_sentence2_length": 331, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 256986 + }, + "mal-eng": { + "min_sentence1_length": 31, + "average_sentence1_length": 142.52858575727183, + "max_sentence1_length": 360, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 267295 + }, + "eng-mal": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 31, + "average_sentence2_length": 142.5295887662989, + "max_sentence2_length": 360, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 267296 + }, + "mar-eng": { + "min_sentence1_length": 29, + "average_sentence1_length": 126.29187562688064, + "max_sentence1_length": 321, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 251107 + }, + "eng-mar": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 29, + "average_sentence2_length": 126.08124373119358, + "max_sentence2_length": 321, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 250897 + }, + "tam-eng": { + "min_sentence1_length": 30, + "average_sentence1_length": 146.567703109328, + "max_sentence1_length": 358, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 271322 + }, + "eng-tam": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 30, + "average_sentence2_length": 146.567703109328, + "max_sentence2_length": 358, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 271322 + }, + "tel-eng": { + "min_sentence1_length": 29, + "average_sentence1_length": 127.57372116349048, + "max_sentence1_length": 317, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 252385 + }, + "eng-tel": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 29, + "average_sentence2_length": 127.56870611835507, + "max_sentence2_length": 317, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 252380 + }, + "urd-eng": { + "min_sentence1_length": 37, + "average_sentence1_length": 125.00501504513541, + "max_sentence1_length": 295, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 249824 + }, + "eng-urd": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 37, + "average_sentence2_length": 125.00501504513541, + "max_sentence2_length": 295, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 249824 + }, + "asm-eng": { + "min_sentence1_length": 30, + "average_sentence1_length": 121.3901705115346, + "max_sentence1_length": 314, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 246220 + }, + "eng-asm": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 30, + "average_sentence2_length": 121.39418254764293, + "max_sentence2_length": 314, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 246224 + }, + "bho-eng": { + "min_sentence1_length": 25, + "average_sentence1_length": 122.06720160481444, + "max_sentence1_length": 326, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 246895 + }, + "eng-bho": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 25, + "average_sentence2_length": 122.0912738214644, + "max_sentence2_length": 326, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 246919 + }, + "nep-eng": { + "min_sentence1_length": 24, + "average_sentence1_length": 121.15346038114343, + "max_sentence1_length": 307, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 245984 + }, + "eng-nep": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 24, + "average_sentence2_length": 121.15346038114343, + "max_sentence2_length": 307, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 245984 + }, + "ory-eng": { + "min_sentence1_length": 34, + "average_sentence1_length": 129.4002006018054, + "max_sentence1_length": 308, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 254206 + }, + "eng-ory": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 34, + "average_sentence2_length": 129.4002006018054, + "max_sentence2_length": 308, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 254206 + }, + "pan-eng": { + "min_sentence1_length": 29, + "average_sentence1_length": 126.78435305917753, + "max_sentence1_length": 309, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 251598 + }, + "eng-pan": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 29, + "average_sentence2_length": 126.78335005015045, + "max_sentence2_length": 309, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 251597 + }, + "pus-eng": { + "min_sentence1_length": 32, + "average_sentence1_length": 122.62387161484453, + "max_sentence1_length": 300, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 247450 + }, + "eng-pus": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 32, + "average_sentence2_length": 122.62387161484453, + "max_sentence2_length": 300, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 247450 + }, + "san-eng": { + "min_sentence1_length": 31, + "average_sentence1_length": 124.22066198595788, + "max_sentence1_length": 311, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 249042 + }, + "eng-san": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 31, + "average_sentence2_length": 124.05516549648947, + "max_sentence2_length": 311, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 248877 + }, + "awa-eng": { + "min_sentence1_length": 34, + "average_sentence1_length": 123.11935807422267, + "max_sentence1_length": 329, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 247944 + }, + "eng-awa": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 34, + "average_sentence2_length": 123.05917753259779, + "max_sentence2_length": 329, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 247884 + }, + "bgc-eng": { + "min_sentence1_length": 27, + "average_sentence1_length": 121.10431293881645, + "max_sentence1_length": 303, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 245935 + }, + "eng-bgc": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 27, + "average_sentence2_length": 121.10431293881645, + "max_sentence2_length": 303, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 245935 + }, + "bod-eng": { + "min_sentence1_length": 26, + "average_sentence1_length": 141.74623871614844, + "max_sentence1_length": 355, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 266515 + }, + "eng-bod": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 26, + "average_sentence2_length": 141.72617853560683, + "max_sentence2_length": 355, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 266495 + }, + "boy-eng": { + "min_sentence1_length": 31, + "average_sentence1_length": 135.38615847542627, + "max_sentence1_length": 312, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 260174 + }, + "eng-boy": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 31, + "average_sentence2_length": 135.38615847542627, + "max_sentence2_length": 312, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 260174 + }, + "gbm-eng": { + "min_sentence1_length": 30, + "average_sentence1_length": 122.18154463390171, + "max_sentence1_length": 344, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 247009 + }, + "eng-gbm": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 30, + "average_sentence2_length": 122.18154463390171, + "max_sentence2_length": 344, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 247009 + }, + "gom-eng": { + "min_sentence1_length": 31, + "average_sentence1_length": 119.71815446339016, + "max_sentence1_length": 306, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 244553 + }, + "eng-gom": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 31, + "average_sentence2_length": 119.71815446339016, + "max_sentence2_length": 306, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 244553 + }, + "hne-eng": { + "min_sentence1_length": 28, + "average_sentence1_length": 121.58676028084253, + "max_sentence1_length": 321, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 246416 + }, + "eng-hne": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 28, + "average_sentence2_length": 121.57572718154464, + "max_sentence2_length": 321, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 246405 + }, + "raj-eng": { + "min_sentence1_length": 32, + "average_sentence1_length": 124.72116349047141, + "max_sentence1_length": 313, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 249541 + }, + "eng-raj": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 32, + "average_sentence2_length": 124.72116349047141, + "max_sentence2_length": 313, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 249541 + }, + "mai-eng": { + "min_sentence1_length": 29, + "average_sentence1_length": 123.16649949849548, + "max_sentence1_length": 312, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 247991 + }, + "eng-mai": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 29, + "average_sentence2_length": 123.16950852557673, + "max_sentence2_length": 312, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 247994 + }, + "mni-eng": { + "min_sentence1_length": 39, + "average_sentence1_length": 129.5025075225677, + "max_sentence1_length": 310, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 254308 + }, + "eng-mni": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 39, + "average_sentence2_length": 129.50651955867602, + "max_sentence2_length": 310, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 254312 + }, + "mup-eng": { + "min_sentence1_length": 28, + "average_sentence1_length": 123.6629889669007, + "max_sentence1_length": 312, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 248486 + }, + "eng-mup": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 28, + "average_sentence2_length": 123.6629889669007, + "max_sentence2_length": 312, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 248486 + }, + "mwr-eng": { + "min_sentence1_length": 31, + "average_sentence1_length": 123.81845536609829, + "max_sentence1_length": 324, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 248641 + }, + "eng-mwr": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 31, + "average_sentence2_length": 123.81845536609829, + "max_sentence2_length": 324, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 248641 + }, + "sat-eng": { + "min_sentence1_length": 37, + "average_sentence1_length": 133.4854563691073, + "max_sentence1_length": 333, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 258279 + }, + "eng-sat": { + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "min_sentence2_length": 37, + "average_sentence2_length": 133.4854563691073, + "max_sentence2_length": 333, + "num_samples": 997, + "num_samples_sentence2": 997, + "number_of_characters": 258279 + } + } + }, + "test": { + "min_sentence1_length": 33, + "average_sentence1_length": 130.84266389532507, + "max_sentence1_length": 431, + "min_sentence2_length": 33, + "average_sentence2_length": 130.834724683113, + "max_sentence2_length": 431, + "num_samples": 58696, + "num_samples_sentence2": 58696, + "number_of_characters": 15359416, + "hf_subset_descriptive_stats": { + "ben-eng": { + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 261008 + }, + "eng-ben": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 261008 + }, + "guj-eng": { + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 258394 + }, + "eng-guj": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 258394 + }, + "hin-eng": { + "min_sentence1_length": 41, + "average_sentence1_length": 129.5197628458498, + "max_sentence1_length": 381, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 263040 + }, + "eng-hin": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 41, + "average_sentence2_length": 129.5088932806324, + "max_sentence2_length": 381, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 263029 + }, + "kan-eng": { + "min_sentence1_length": 43, + "average_sentence1_length": 136.48715415019763, + "max_sentence1_length": 388, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 270091 + }, + "eng-kan": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 43, + "average_sentence2_length": 136.4179841897233, + "max_sentence2_length": 388, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 270021 + }, + "mal-eng": { + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 281302 + }, + "eng-mal": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 281302 + }, + "mar-eng": { + "min_sentence1_length": 34, + "average_sentence1_length": 131.66600790513834, + "max_sentence1_length": 356, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 265212 + }, + "eng-mar": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47924901185772, + "max_sentence2_length": 355, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 265023 + }, + "tam-eng": { + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 286099 + }, + "eng-tam": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 286099 + }, + "tel-eng": { + "min_sentence1_length": 39, + "average_sentence1_length": 130.92292490118578, + "max_sentence1_length": 359, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 264460 + }, + "eng-tel": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 39, + "average_sentence2_length": 130.9100790513834, + "max_sentence2_length": 359, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 264447 + }, + "urd-eng": { + "min_sentence1_length": 34, + "average_sentence1_length": 128.37944664031622, + "max_sentence1_length": 348, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 261886 + }, + "eng-urd": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 261885 + }, + "asm-eng": { + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 257902 + }, + "eng-asm": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 35, + "average_sentence2_length": 124.449604743083, + "max_sentence2_length": 329, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 257909 + }, + "bho-eng": { + "min_sentence1_length": 36, + "average_sentence1_length": 127.08695652173913, + "max_sentence1_length": 367, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 260578 + }, + "eng-bho": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 36, + "average_sentence2_length": 127.1096837944664, + "max_sentence2_length": 367, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 260601 + }, + "nep-eng": { + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 258869 + }, + "eng-nep": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 258869 + }, + "ory-eng": { + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 266805 + }, + "eng-ory": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 266805 + }, + "pan-eng": { + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 265391 + }, + "eng-pan": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 265391 + }, + "pus-eng": { + "min_sentence1_length": 35, + "average_sentence1_length": 121.00395256916995, + "max_sentence1_length": 325, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 254422 + }, + "eng-pus": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 254421 + }, + "san-eng": { + "min_sentence1_length": 33, + "average_sentence1_length": 126.85079051383399, + "max_sentence1_length": 358, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 260339 + }, + "eng-san": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73715415019763, + "max_sentence2_length": 358, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 260224 + }, + "awa-eng": { + "min_sentence1_length": 34, + "average_sentence1_length": 126.69268774703558, + "max_sentence1_length": 378, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 260179 + }, + "eng-awa": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 260137 + }, + "bgc-eng": { + "min_sentence1_length": 38, + "average_sentence1_length": 123.99604743083005, + "max_sentence1_length": 332, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 257450 + }, + "eng-bgc": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 38, + "average_sentence2_length": 123.99604743083005, + "max_sentence2_length": 332, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 257450 + }, + "bod-eng": { + "min_sentence1_length": 42, + "average_sentence1_length": 146.46442687747034, + "max_sentence1_length": 431, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 280188 + }, + "eng-bod": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 42, + "average_sentence2_length": 146.40316205533597, + "max_sentence2_length": 431, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 280126 + }, + "boy-eng": { + "min_sentence1_length": 36, + "average_sentence1_length": 143.84584980237153, + "max_sentence1_length": 396, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 277538 + }, + "eng-boy": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 36, + "average_sentence2_length": 143.84584980237153, + "max_sentence2_length": 396, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 277538 + }, + "gbm-eng": { + "min_sentence1_length": 38, + "average_sentence1_length": 127.53063241106719, + "max_sentence1_length": 333, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 261027 + }, + "eng-gbm": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 38, + "average_sentence2_length": 127.53063241106719, + "max_sentence2_length": 333, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 261027 + }, + "gom-eng": { + "min_sentence1_length": 37, + "average_sentence1_length": 125.70750988142292, + "max_sentence1_length": 335, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 259182 + }, + "eng-gom": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 37, + "average_sentence2_length": 125.70750988142292, + "max_sentence2_length": 335, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 259182 + }, + "hne-eng": { + "min_sentence1_length": 42, + "average_sentence1_length": 125.43972332015811, + "max_sentence1_length": 327, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 258911 + }, + "eng-hne": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 42, + "average_sentence2_length": 125.44367588932806, + "max_sentence2_length": 326, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 258915 + }, + "raj-eng": { + "min_sentence1_length": 38, + "average_sentence1_length": 128.47924901185772, + "max_sentence1_length": 338, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 261987 + }, + "eng-raj": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 38, + "average_sentence2_length": 128.47924901185772, + "max_sentence2_length": 338, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 261987 + }, + "mai-eng": { + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 261374 + }, + "eng-mai": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87648221343873, + "max_sentence2_length": 350, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 261377 + }, + "mni-eng": { + "min_sentence1_length": 38, + "average_sentence1_length": 135.17885375494072, + "max_sentence1_length": 353, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 268767 + }, + "eng-mni": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1798418972332, + "max_sentence2_length": 354, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 268768 + }, + "mup-eng": { + "min_sentence1_length": 40, + "average_sentence1_length": 128.52569169960475, + "max_sentence1_length": 340, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 262034 + }, + "eng-mup": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 40, + "average_sentence2_length": 128.52569169960475, + "max_sentence2_length": 340, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 262034 + }, + "mwr-eng": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.22035573122528, + "max_sentence1_length": 345, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 263749 + }, + "eng-mwr": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 35, + "average_sentence2_length": 130.22035573122528, + "max_sentence2_length": 345, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 263749 + }, + "sat-eng": { + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 271757 + }, + "eng-sat": { + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "num_samples": 1012, + "num_samples_sentence2": 1012, + "number_of_characters": 271757 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json b/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json new file mode 100644 index 000000000..9c1432ff7 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json @@ -0,0 +1,21091 @@ +{ + "test": { + "min_sentence1_length": 1, + "average_sentence1_length": 129.15449296073547, + "max_sentence1_length": 773, + "min_sentence2_length": 1, + "average_sentence2_length": 129.15449296073547, + "max_sentence2_length": 773, + "num_samples": 3826252, + "num_samples_sentence2": 3826252, + "number_of_characters": 988355274, + "hf_subset_descriptive_stats": { + "afr_Latn-dan_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520490 + }, + "afr_Latn-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564002 + }, + "afr_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516072 + }, + "afr_Latn-fao_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526155 + }, + "afr_Latn-isl_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530560 + }, + "afr_Latn-ltz_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549109 + }, + "afr_Latn-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560267 + }, + "afr_Latn-nno_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516709 + }, + "afr_Latn-nob_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519796 + }, + "afr_Latn-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520179 + }, + "amh_Ethi-eng_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 415227 + }, + "amh_Ethi-hau_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 437473 + }, + "amh_Ethi-ibo_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 413608 + }, + "amh_Ethi-nso_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 459006 + }, + "amh_Ethi-orm_Ethi": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 404938 + }, + "amh_Ethi-som_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 458799 + }, + "amh_Ethi-ssw_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 455649 + }, + "amh_Ethi-swa_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 440016 + }, + "amh_Ethi-tir_Ethi": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 332745 + }, + "amh_Ethi-tsn_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 501790 + }, + "amh_Ethi-wol_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 407310 + }, + "amh_Ethi-xho_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 435597 + }, + "amh_Ethi-yor_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 483595 + }, + "amh_Ethi-zul_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 425239 + }, + "arb_Arab-ben_Beng": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 474983 + }, + "arb_Arab-ckb_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 483548 + }, + "arb_Arab-deu_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526831 + }, + "arb_Arab-ell_Grek": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530308 + }, + "arb_Arab-eng_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 478901 + }, + "arb_Arab-fas_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 474520 + }, + "arb_Arab-fin_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500981 + }, + "arb_Arab-fra_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524289 + }, + "arb_Arab-heb_Hebr": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 431477 + }, + "arb_Arab-hin_Deva": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492756 + }, + "arb_Arab-hun_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509557 + }, + "arb_Arab-ind_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518153 + }, + "arb_Arab-jpn_Jpan": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 342807 + }, + "arb_Arab-kmr_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 477127 + }, + "arb_Arab-kor_Hang": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 364586 + }, + "arb_Arab-lit_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 490578 + }, + "arb_Arab-mey_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 445016 + }, + "arb_Arab-nld_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523096 + }, + "arb_Arab-pol_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509047 + }, + "arb_Arab-por_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508396 + }, + "arb_Arab-prs_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 473717 + }, + "arb_Arab-pus_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 473814 + }, + "arb_Arab-rus_Cyrl": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506074 + }, + "arb_Arab-shi_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 446094 + }, + "arb_Arab-spa_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519381 + }, + "arb_Arab-swa_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503690 + }, + "arb_Arab-swe_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 483008 + }, + "arb_Arab-tam_Taml": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 541142 + }, + "arb_Arab-tgk_Cyrl": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505328 + }, + "arb_Arab-tur_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496794 + }, + "arb_Arab-vie_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 502302 + }, + "arb_Arab-zho_Hant": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 322659 + }, + "arb_Arab-zul_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 488913 + }, + "aze_Latn-bak_Cyrl": { + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515960 + }, + "aze_Latn-eng_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517354 + }, + "aze_Latn-kaz_Cyrl": { + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529910 + }, + "aze_Latn-kir_Cyrl": { + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520498 + }, + "aze_Latn-tat_Cyrl": { + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515560 + }, + "aze_Latn-tuk_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554908 + }, + "aze_Latn-tur_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535247 + }, + "aze_Latn-uig_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580656 + }, + "aze_Latn-uzb_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563329 + }, + "bak_Cyrl-aze_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515960 + }, + "bak_Cyrl-eng_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 494046 + }, + "bak_Cyrl-kaz_Cyrl": { + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506602 + }, + "bak_Cyrl-kir_Cyrl": { + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497190 + }, + "bak_Cyrl-tat_Cyrl": { + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492252 + }, + "bak_Cyrl-tuk_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531600 + }, + "bak_Cyrl-tur_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511939 + }, + "bak_Cyrl-uig_Arab": { + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557348 + }, + "bak_Cyrl-uzb_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540021 + }, + "bel_Cyrl-bos_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511000 + }, + "bel_Cyrl-bul_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525979 + }, + "bel_Cyrl-ces_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497408 + }, + "bel_Cyrl-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503810 + }, + "bel_Cyrl-hrv_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512015 + }, + "bel_Cyrl-mkd_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523981 + }, + "bel_Cyrl-pol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 533956 + }, + "bel_Cyrl-rus_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530983 + }, + "bel_Cyrl-slk_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509059 + }, + "bel_Cyrl-slv_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508986 + }, + "bel_Cyrl-srp_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508393 + }, + "bel_Cyrl-srp_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512231 + }, + "bel_Cyrl-ukr_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518873 + }, + "bem_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546212 + }, + "bem_Latn-ewe_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537470 + }, + "bem_Latn-fuc_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526972 + }, + "bem_Latn-kin_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 602279 + }, + "bem_Latn-nde_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596231 + }, + "bem_Latn-nya_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582774 + }, + "bem_Latn-sna_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596822 + }, + "bem_Latn-ven_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 598248 + }, + "ben_Beng-arb_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 474983 + }, + "ben_Beng-deu_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539452 + }, + "ben_Beng-div_Thaa": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547650 + }, + "ben_Beng-ell_Grek": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542929 + }, + "ben_Beng-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 491522 + }, + "ben_Beng-eus_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519005 + }, + "ben_Beng-fas_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 487141 + }, + "ben_Beng-fin_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513602 + }, + "ben_Beng-fra_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536910 + }, + "ben_Beng-guj_Gujr": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 488733 + }, + "ben_Beng-heb_Hebr": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 444098 + }, + "ben_Beng-hin_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505377 + }, + "ben_Beng-hun_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522178 + }, + "ben_Beng-ind_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530774 + }, + "ben_Beng-jpn_Jpan": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 355428 + }, + "ben_Beng-kan_Knda": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509338 + }, + "ben_Beng-kor_Hang": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 377207 + }, + "ben_Beng-lit_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503199 + }, + "ben_Beng-mar_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 504689 + }, + "ben_Beng-nep_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492025 + }, + "ben_Beng-nld_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535717 + }, + "ben_Beng-pan_Guru": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 494224 + }, + "ben_Beng-pol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521668 + }, + "ben_Beng-por_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521017 + }, + "ben_Beng-rus_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518695 + }, + "ben_Beng-sin_Sinh": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 502543 + }, + "ben_Beng-snd_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 464129 + }, + "ben_Beng-spa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532002 + }, + "ben_Beng-swa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516311 + }, + "ben_Beng-swe_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495629 + }, + "ben_Beng-tam_Taml": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553763 + }, + "ben_Beng-tel_Telu": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 491329 + }, + "ben_Beng-tur_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509415 + }, + "ben_Beng-urd_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 491800 + }, + "ben_Beng-vie_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514923 + }, + "ben_Beng-zho_Hant": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 335280 + }, + "ben_Beng-zul_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 501534 + }, + "bod_Tibt-dzo_Tibt": { + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543850 + }, + "bod_Tibt-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548349 + }, + "bod_Tibt-khm_Khmr": { + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 589120 + }, + "bod_Tibt-lao_Laoo": { + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 567609 + }, + "bod_Tibt-mon_Mong": { + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559677 + }, + "bod_Tibt-mya_Mymr": { + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 612483 + }, + "bod_Tibt-tha_Thai": { + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538097 + }, + "bos_Latn-bel_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511000 + }, + "bos_Latn-bul_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524799 + }, + "bos_Latn-ces_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496228 + }, + "bos_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 502630 + }, + "bos_Latn-hrv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510835 + }, + "bos_Latn-mkd_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522801 + }, + "bos_Latn-pol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532776 + }, + "bos_Latn-rus_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529803 + }, + "bos_Latn-slk_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507879 + }, + "bos_Latn-slv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507806 + }, + "bos_Latn-srp_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507213 + }, + "bos_Latn-srp_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511051 + }, + "bos_Latn-ukr_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517693 + }, + "bul_Cyrl-bel_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525979 + }, + "bul_Cyrl-bos_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524799 + }, + "bul_Cyrl-ces_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511207 + }, + "bul_Cyrl-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517609 + }, + "bul_Cyrl-hrv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525814 + }, + "bul_Cyrl-mkd_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537780 + }, + "bul_Cyrl-pol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547755 + }, + "bul_Cyrl-rus_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544782 + }, + "bul_Cyrl-slk_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522858 + }, + "bul_Cyrl-slv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522785 + }, + "bul_Cyrl-srp_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522192 + }, + "bul_Cyrl-srp_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526030 + }, + "bul_Cyrl-ukr_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532672 + }, + "cat_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530680 + }, + "cat_Latn-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 576068 + }, + "cat_Latn-glg_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554946 + }, + "cat_Latn-ita_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 572177 + }, + "cat_Latn-mlt_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560435 + }, + "cat_Latn-por_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560175 + }, + "cat_Latn-ron_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 575445 + }, + "cat_Latn-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571160 + }, + "ces_Latn-bel_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497408 + }, + "ces_Latn-bos_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496228 + }, + "ces_Latn-bul_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511207 + }, + "ces_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 489038 + }, + "ces_Latn-hrv_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497243 + }, + "ces_Latn-mkd_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509209 + }, + "ces_Latn-pol_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519184 + }, + "ces_Latn-rus_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516211 + }, + "ces_Latn-slk_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 494287 + }, + "ces_Latn-slv_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 494214 + }, + "ces_Latn-srp_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493621 + }, + "ces_Latn-srp_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497459 + }, + "ces_Latn-ukr_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 504101 + }, + "ckb_Arab-arb_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 483548 + }, + "ckb_Arab-eng_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500087 + }, + "ckb_Arab-fas_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495706 + }, + "ckb_Arab-heb_Hebr": { + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 452663 + }, + "ckb_Arab-kmr_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 498313 + }, + "ckb_Arab-mey_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 466202 + }, + "ckb_Arab-prs_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 494903 + }, + "ckb_Arab-pus_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495000 + }, + "ckb_Arab-shi_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 467280 + }, + "ckb_Arab-tgk_Cyrl": { + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526514 + }, + "cym_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.4526790185278, + "max_sentence1_length": 444, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514225 + }, + "cym_Latn-gle_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.4526790185278, + "max_sentence1_length": 444, + "min_sentence2_length": 11, + "average_sentence2_length": 147.62593890836254, + "max_sentence2_length": 461, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561314 + }, + "dan_Latn-afr_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520490 + }, + "dan_Latn-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547788 + }, + "dan_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499858 + }, + "dan_Latn-fao_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509941 + }, + "dan_Latn-isl_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514346 + }, + "dan_Latn-ltz_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532895 + }, + "dan_Latn-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544053 + }, + "dan_Latn-nno_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500495 + }, + "dan_Latn-nob_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503582 + }, + "dan_Latn-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503965 + }, + "deu_Latn-afr_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564002 + }, + "deu_Latn-arb_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526831 + }, + "deu_Latn-ben_Beng": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539452 + }, + "deu_Latn-dan_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547788 + }, + "deu_Latn-ell_Grek": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 594777 + }, + "deu_Latn-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543370 + }, + "deu_Latn-fao_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553453 + }, + "deu_Latn-fas_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538989 + }, + "deu_Latn-fin_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565450 + }, + "deu_Latn-fra_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 588758 + }, + "deu_Latn-heb_Hebr": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495946 + }, + "deu_Latn-hin_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557225 + }, + "deu_Latn-hun_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 574026 + }, + "deu_Latn-ind_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582622 + }, + "deu_Latn-isl_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557858 + }, + "deu_Latn-jpn_Jpan": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 407276 + }, + "deu_Latn-kor_Hang": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 429055 + }, + "deu_Latn-lit_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555047 + }, + "deu_Latn-ltz_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 576407 + }, + "deu_Latn-nld_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587565 + }, + "deu_Latn-nno_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544007 + }, + "deu_Latn-nob_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547094 + }, + "deu_Latn-pol_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 573516 + }, + "deu_Latn-por_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 572865 + }, + "deu_Latn-rus_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570543 + }, + "deu_Latn-spa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 583850 + }, + "deu_Latn-swa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 568159 + }, + "deu_Latn-swe_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547477 + }, + "deu_Latn-tam_Taml": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 605611 + }, + "deu_Latn-tur_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561263 + }, + "deu_Latn-vie_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566771 + }, + "deu_Latn-zho_Hant": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 387128 + }, + "deu_Latn-zul_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553382 + }, + "div_Thaa-ben_Beng": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547650 + }, + "div_Thaa-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551568 + }, + "div_Thaa-eus_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 579051 + }, + "div_Thaa-guj_Gujr": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548779 + }, + "div_Thaa-hin_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565423 + }, + "div_Thaa-kan_Knda": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569384 + }, + "div_Thaa-mar_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564735 + }, + "div_Thaa-nep_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 552071 + }, + "div_Thaa-pan_Guru": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554270 + }, + "div_Thaa-sin_Sinh": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 562589 + }, + "div_Thaa-snd_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524175 + }, + "div_Thaa-tam_Taml": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 613809 + }, + "div_Thaa-tel_Telu": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551375 + }, + "div_Thaa-urd_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551846 + }, + "dzo_Tibt-bod_Tibt": { + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543850 + }, + "dzo_Tibt-eng_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 490941 + }, + "dzo_Tibt-khm_Khmr": { + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531712 + }, + "dzo_Tibt-lao_Laoo": { + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510201 + }, + "dzo_Tibt-mon_Mong": { + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 502269 + }, + "dzo_Tibt-mya_Mymr": { + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555075 + }, + "dzo_Tibt-tha_Thai": { + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 480689 + }, + "ell_Grek-arb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530308 + }, + "ell_Grek-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542929 + }, + "ell_Grek-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 594777 + }, + "ell_Grek-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546847 + }, + "ell_Grek-fas_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542466 + }, + "ell_Grek-fin_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 568927 + }, + "ell_Grek-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 592235 + }, + "ell_Grek-heb_Hebr": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499423 + }, + "ell_Grek-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560702 + }, + "ell_Grek-hun_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 577503 + }, + "ell_Grek-hye_Armn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563842 + }, + "ell_Grek-ind_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 586099 + }, + "ell_Grek-jpn_Jpan": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 410753 + }, + "ell_Grek-kat_Geor": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565719 + }, + "ell_Grek-kor_Hang": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 432532 + }, + "ell_Grek-lit_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558524 + }, + "ell_Grek-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 591042 + }, + "ell_Grek-pol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 576993 + }, + "ell_Grek-por_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 576342 + }, + "ell_Grek-rus_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 574020 + }, + "ell_Grek-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587327 + }, + "ell_Grek-sqi_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582734 + }, + "ell_Grek-swa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571636 + }, + "ell_Grek-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550954 + }, + "ell_Grek-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 609088 + }, + "ell_Grek-tur_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564740 + }, + "ell_Grek-vie_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570248 + }, + "ell_Grek-zho_Hant": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 390605 + }, + "ell_Grek-zul_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 556859 + }, + "eng_Latn-afr_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516072 + }, + "eng_Latn-amh_Ethi": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 415227 + }, + "eng_Latn-arb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 478901 + }, + "eng_Latn-aze_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517354 + }, + "eng_Latn-bak_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 494046 + }, + "eng_Latn-bel_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503810 + }, + "eng_Latn-bem_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546212 + }, + "eng_Latn-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 491522 + }, + "eng_Latn-bod_Tibt": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548349 + }, + "eng_Latn-bos_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 502630 + }, + "eng_Latn-bul_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517609 + }, + "eng_Latn-cat_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530680 + }, + "eng_Latn-ces_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 489038 + }, + "eng_Latn-ckb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500087 + }, + "eng_Latn-cym_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 133.4526790185278, + "max_sentence2_length": 444, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514225 + }, + "eng_Latn-dan_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499858 + }, + "eng_Latn-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543370 + }, + "eng_Latn-div_Thaa": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551568 + }, + "eng_Latn-dzo_Tibt": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 490941 + }, + "eng_Latn-ell_Grek": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546847 + }, + "eng_Latn-eus_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522923 + }, + "eng_Latn-ewe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 486698 + }, + "eng_Latn-fao_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505523 + }, + "eng_Latn-fas_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 491059 + }, + "eng_Latn-fij_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548225 + }, + "eng_Latn-fil_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 541140 + }, + "eng_Latn-fin_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517520 + }, + "eng_Latn-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540828 + }, + "eng_Latn-fuc_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 476200 + }, + "eng_Latn-gle_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 11, + "average_sentence2_length": 147.62593890836254, + "max_sentence2_length": 461, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542529 + }, + "eng_Latn-glg_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519706 + }, + "eng_Latn-guj_Gujr": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492651 + }, + "eng_Latn-hau_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517686 + }, + "eng_Latn-heb_Hebr": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 448016 + }, + "eng_Latn-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509295 + }, + "eng_Latn-hmn_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 165.6434651977967, + "max_sentence2_length": 643, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578510 + }, + "eng_Latn-hrv_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503645 + }, + "eng_Latn-hun_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526096 + }, + "eng_Latn-hye_Armn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512435 + }, + "eng_Latn-ibo_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493821 + }, + "eng_Latn-ind_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534692 + }, + "eng_Latn-isl_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509928 + }, + "eng_Latn-ita_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536937 + }, + "eng_Latn-jpn_Jpan": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 359346 + }, + "eng_Latn-kan_Knda": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513256 + }, + "eng_Latn-kat_Geor": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514312 + }, + "eng_Latn-kaz_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507996 + }, + "eng_Latn-khm_Khmr": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536211 + }, + "eng_Latn-kin_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551507 + }, + "eng_Latn-kir_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 498584 + }, + "eng_Latn-kmr_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493666 + }, + "eng_Latn-kor_Hang": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 381125 + }, + "eng_Latn-lao_Laoo": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514700 + }, + "eng_Latn-lav_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515908 + }, + "eng_Latn-lit_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507117 + }, + "eng_Latn-ltz_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 528477 + }, + "eng_Latn-mal_Mlym": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551872 + }, + "eng_Latn-mar_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508607 + }, + "eng_Latn-mey_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 461555 + }, + "eng_Latn-mkd_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515611 + }, + "eng_Latn-mlg_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 568028 + }, + "eng_Latn-mlt_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525195 + }, + "eng_Latn-mon_Mong": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506768 + }, + "eng_Latn-mri_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521844 + }, + "eng_Latn-msa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524903 + }, + "eng_Latn-mya_Mymr": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559574 + }, + "eng_Latn-nde_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 545459 + }, + "eng_Latn-nep_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495943 + }, + "eng_Latn-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539635 + }, + "eng_Latn-nno_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496077 + }, + "eng_Latn-nob_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499164 + }, + "eng_Latn-nso_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539219 + }, + "eng_Latn-nya_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532002 + }, + "eng_Latn-orm_Ethi": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485151 + }, + "eng_Latn-pan_Guru": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 498142 + }, + "eng_Latn-pol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525586 + }, + "eng_Latn-por_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524935 + }, + "eng_Latn-prs_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 490256 + }, + "eng_Latn-pus_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 490353 + }, + "eng_Latn-ron_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540205 + }, + "eng_Latn-rus_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522613 + }, + "eng_Latn-shi_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 462633 + }, + "eng_Latn-sin_Sinh": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506461 + }, + "eng_Latn-slk_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500689 + }, + "eng_Latn-slv_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500616 + }, + "eng_Latn-smo_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525575 + }, + "eng_Latn-sna_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546050 + }, + "eng_Latn-snd_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 468047 + }, + "eng_Latn-som_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539012 + }, + "eng_Latn-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535920 + }, + "eng_Latn-sqi_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531327 + }, + "eng_Latn-srp_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500023 + }, + "eng_Latn-srp_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503861 + }, + "eng_Latn-ssw_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535862 + }, + "eng_Latn-swa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520229 + }, + "eng_Latn-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499547 + }, + "eng_Latn-tah_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557343 + }, + "eng_Latn-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557681 + }, + "eng_Latn-tat_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493646 + }, + "eng_Latn-tel_Telu": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495247 + }, + "eng_Latn-tgk_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521867 + }, + "eng_Latn-tha_Thai": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485188 + }, + "eng_Latn-tir_Ethi": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 412958 + }, + "eng_Latn-ton_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561360 + }, + "eng_Latn-tsn_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582003 + }, + "eng_Latn-tuk_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532994 + }, + "eng_Latn-tur_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513333 + }, + "eng_Latn-uig_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558742 + }, + "eng_Latn-ukr_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510503 + }, + "eng_Latn-urd_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495718 + }, + "eng_Latn-uzb_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 541415 + }, + "eng_Latn-ven_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547476 + }, + "eng_Latn-vie_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518841 + }, + "eng_Latn-wol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 487523 + }, + "eng_Latn-xho_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515810 + }, + "eng_Latn-yor_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563808 + }, + "eng_Latn-yue_Hant": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 326607 + }, + "eng_Latn-zho_Hans": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 332681 + }, + "eng_Latn-zho_Hant": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 339198 + }, + "eng_Latn-zul_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505452 + }, + "eus_Latn-ben_Beng": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519005 + }, + "eus_Latn-div_Thaa": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 579051 + }, + "eus_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522923 + }, + "eus_Latn-guj_Gujr": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520134 + }, + "eus_Latn-hin_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536778 + }, + "eus_Latn-kan_Knda": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540739 + }, + "eus_Latn-mar_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536090 + }, + "eus_Latn-nep_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523426 + }, + "eus_Latn-pan_Guru": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525625 + }, + "eus_Latn-sin_Sinh": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 533944 + }, + "eus_Latn-snd_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495530 + }, + "eus_Latn-tam_Taml": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 585164 + }, + "eus_Latn-tel_Telu": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522730 + }, + "eus_Latn-urd_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523201 + }, + "ewe_Latn-bem_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537470 + }, + "ewe_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 486698 + }, + "ewe_Latn-fuc_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 467458 + }, + "ewe_Latn-kin_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542765 + }, + "ewe_Latn-nde_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536717 + }, + "ewe_Latn-nya_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523260 + }, + "ewe_Latn-sna_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537308 + }, + "ewe_Latn-ven_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538734 + }, + "fao_Latn-afr_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526155 + }, + "fao_Latn-dan_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509941 + }, + "fao_Latn-deu_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553453 + }, + "fao_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505523 + }, + "fao_Latn-isl_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520011 + }, + "fao_Latn-ltz_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538560 + }, + "fao_Latn-nld_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549718 + }, + "fao_Latn-nno_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506160 + }, + "fao_Latn-nob_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509247 + }, + "fao_Latn-swe_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509630 + }, + "fas_Arab-arb_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 474520 + }, + "fas_Arab-ben_Beng": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 487141 + }, + "fas_Arab-ckb_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495706 + }, + "fas_Arab-deu_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538989 + }, + "fas_Arab-ell_Grek": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542466 + }, + "fas_Arab-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 491059 + }, + "fas_Arab-fin_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513139 + }, + "fas_Arab-fra_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536447 + }, + "fas_Arab-heb_Hebr": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 443635 + }, + "fas_Arab-hin_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 504914 + }, + "fas_Arab-hun_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521715 + }, + "fas_Arab-ind_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530311 + }, + "fas_Arab-jpn_Jpan": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 354965 + }, + "fas_Arab-kmr_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 489285 + }, + "fas_Arab-kor_Hang": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 376744 + }, + "fas_Arab-lit_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 502736 + }, + "fas_Arab-mey_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 457174 + }, + "fas_Arab-nld_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535254 + }, + "fas_Arab-pol_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521205 + }, + "fas_Arab-por_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520554 + }, + "fas_Arab-prs_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485875 + }, + "fas_Arab-pus_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485972 + }, + "fas_Arab-rus_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518232 + }, + "fas_Arab-shi_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 458252 + }, + "fas_Arab-spa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531539 + }, + "fas_Arab-swa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515848 + }, + "fas_Arab-swe_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495166 + }, + "fas_Arab-tam_Taml": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553300 + }, + "fas_Arab-tgk_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517486 + }, + "fas_Arab-tur_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508952 + }, + "fas_Arab-vie_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514460 + }, + "fas_Arab-zho_Hant": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 334817 + }, + "fas_Arab-zul_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 501071 + }, + "fij_Latn-eng_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548225 + }, + "fij_Latn-fil_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 593925 + }, + "fij_Latn-ind_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587477 + }, + "fij_Latn-mal_Mlym": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 604657 + }, + "fij_Latn-mlg_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 620813 + }, + "fij_Latn-mri_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 574629 + }, + "fij_Latn-msa_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 577688 + }, + "fij_Latn-smo_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578360 + }, + "fij_Latn-tah_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 610128 + }, + "fij_Latn-ton_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 614145 + }, + "fil_Latn-eng_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 541140 + }, + "fil_Latn-fij_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 593925 + }, + "fil_Latn-ind_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580392 + }, + "fil_Latn-mal_Mlym": { + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 597572 + }, + "fil_Latn-mlg_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 613728 + }, + "fil_Latn-mri_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 567544 + }, + "fil_Latn-msa_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570603 + }, + "fil_Latn-smo_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571275 + }, + "fil_Latn-tah_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 603043 + }, + "fil_Latn-ton_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 607060 + }, + "fin_Latn-arb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500981 + }, + "fin_Latn-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513602 + }, + "fin_Latn-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565450 + }, + "fin_Latn-ell_Grek": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 568927 + }, + "fin_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517520 + }, + "fin_Latn-fas_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513139 + }, + "fin_Latn-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 562908 + }, + "fin_Latn-heb_Hebr": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 470096 + }, + "fin_Latn-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531375 + }, + "fin_Latn-hun_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548176 + }, + "fin_Latn-ind_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 556772 + }, + "fin_Latn-jpn_Jpan": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 381426 + }, + "fin_Latn-kor_Hang": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 403205 + }, + "fin_Latn-lav_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537988 + }, + "fin_Latn-lit_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529197 + }, + "fin_Latn-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561715 + }, + "fin_Latn-pol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547666 + }, + "fin_Latn-por_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547015 + }, + "fin_Latn-rus_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544693 + }, + "fin_Latn-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558000 + }, + "fin_Latn-swa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542309 + }, + "fin_Latn-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521627 + }, + "fin_Latn-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 579761 + }, + "fin_Latn-tur_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535413 + }, + "fin_Latn-vie_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540921 + }, + "fin_Latn-zho_Hant": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 361278 + }, + "fin_Latn-zul_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527532 + }, + "fra_Latn-arb_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524289 + }, + "fra_Latn-ben_Beng": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536910 + }, + "fra_Latn-cat_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 576068 + }, + "fra_Latn-deu_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 588758 + }, + "fra_Latn-ell_Grek": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 592235 + }, + "fra_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540828 + }, + "fra_Latn-fas_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536447 + }, + "fra_Latn-fin_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 562908 + }, + "fra_Latn-glg_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565094 + }, + "fra_Latn-heb_Hebr": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493404 + }, + "fra_Latn-hin_Deva": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554683 + }, + "fra_Latn-hun_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571484 + }, + "fra_Latn-ind_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580080 + }, + "fra_Latn-ita_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582325 + }, + "fra_Latn-jpn_Jpan": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 404734 + }, + "fra_Latn-kor_Hang": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 426513 + }, + "fra_Latn-lit_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 552505 + }, + "fra_Latn-mlt_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570583 + }, + "fra_Latn-nld_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 585023 + }, + "fra_Latn-pol_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570974 + }, + "fra_Latn-por_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570323 + }, + "fra_Latn-ron_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 585593 + }, + "fra_Latn-rus_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 568001 + }, + "fra_Latn-spa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 581308 + }, + "fra_Latn-swa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565617 + }, + "fra_Latn-swe_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544935 + }, + "fra_Latn-tam_Taml": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 603069 + }, + "fra_Latn-tur_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558721 + }, + "fra_Latn-vie_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564229 + }, + "fra_Latn-zho_Hant": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 384586 + }, + "fra_Latn-zul_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550840 + }, + "fuc_Latn-bem_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526972 + }, + "fuc_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 476200 + }, + "fuc_Latn-ewe_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 467458 + }, + "fuc_Latn-kin_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532267 + }, + "fuc_Latn-nde_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526219 + }, + "fuc_Latn-nya_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512762 + }, + "fuc_Latn-sna_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526810 + }, + "fuc_Latn-ven_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 528236 + }, + "gle_Latn-cym_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 147.62593890836254, + "max_sentence1_length": 461, + "min_sentence2_length": 6, + "average_sentence2_length": 133.4526790185278, + "max_sentence2_length": 444, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561314 + }, + "gle_Latn-eng_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 147.62593890836254, + "max_sentence1_length": 461, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542529 + }, + "glg_Latn-cat_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554946 + }, + "glg_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519706 + }, + "glg_Latn-fra_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565094 + }, + "glg_Latn-ita_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561203 + }, + "glg_Latn-mlt_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549461 + }, + "glg_Latn-por_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549201 + }, + "glg_Latn-ron_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564471 + }, + "glg_Latn-spa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560186 + }, + "guj_Gujr-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 488733 + }, + "guj_Gujr-div_Thaa": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548779 + }, + "guj_Gujr-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492651 + }, + "guj_Gujr-eus_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520134 + }, + "guj_Gujr-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506506 + }, + "guj_Gujr-kan_Knda": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510467 + }, + "guj_Gujr-mar_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505818 + }, + "guj_Gujr-nep_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493154 + }, + "guj_Gujr-pan_Guru": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495353 + }, + "guj_Gujr-sin_Sinh": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503672 + }, + "guj_Gujr-snd_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 465258 + }, + "guj_Gujr-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554892 + }, + "guj_Gujr-tel_Telu": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492458 + }, + "guj_Gujr-urd_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492929 + }, + "hau_Latn-amh_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 437473 + }, + "hau_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517686 + }, + "hau_Latn-ibo_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516067 + }, + "hau_Latn-nso_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561465 + }, + "hau_Latn-orm_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507397 + }, + "hau_Latn-som_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561258 + }, + "hau_Latn-ssw_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558108 + }, + "hau_Latn-swa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542475 + }, + "hau_Latn-tir_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 435204 + }, + "hau_Latn-tsn_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 604249 + }, + "hau_Latn-wol_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509769 + }, + "hau_Latn-xho_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538056 + }, + "hau_Latn-yor_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 586054 + }, + "hau_Latn-zul_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527698 + }, + "heb_Hebr-arb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 431477 + }, + "heb_Hebr-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 444098 + }, + "heb_Hebr-ckb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 452663 + }, + "heb_Hebr-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495946 + }, + "heb_Hebr-ell_Grek": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499423 + }, + "heb_Hebr-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 448016 + }, + "heb_Hebr-fas_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 443635 + }, + "heb_Hebr-fin_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 470096 + }, + "heb_Hebr-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493404 + }, + "heb_Hebr-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 461871 + }, + "heb_Hebr-hun_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 478672 + }, + "heb_Hebr-ind_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 487268 + }, + "heb_Hebr-jpn_Jpan": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 311922 + }, + "heb_Hebr-kmr_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 446242 + }, + "heb_Hebr-kor_Hang": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 333701 + }, + "heb_Hebr-lit_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 459693 + }, + "heb_Hebr-mey_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 414131 + }, + "heb_Hebr-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492211 + }, + "heb_Hebr-pol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 478162 + }, + "heb_Hebr-por_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 477511 + }, + "heb_Hebr-prs_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 442832 + }, + "heb_Hebr-pus_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 442929 + }, + "heb_Hebr-rus_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 475189 + }, + "heb_Hebr-shi_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 415209 + }, + "heb_Hebr-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 488496 + }, + "heb_Hebr-swa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 472805 + }, + "heb_Hebr-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 452123 + }, + "heb_Hebr-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510257 + }, + "heb_Hebr-tgk_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 474443 + }, + "heb_Hebr-tur_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 465909 + }, + "heb_Hebr-vie_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 471417 + }, + "heb_Hebr-zho_Hant": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 291774 + }, + "heb_Hebr-zul_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 458028 + }, + "hin_Deva-arb_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492756 + }, + "hin_Deva-ben_Beng": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505377 + }, + "hin_Deva-deu_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557225 + }, + "hin_Deva-div_Thaa": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565423 + }, + "hin_Deva-ell_Grek": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560702 + }, + "hin_Deva-eng_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509295 + }, + "hin_Deva-eus_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536778 + }, + "hin_Deva-fas_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 504914 + }, + "hin_Deva-fin_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531375 + }, + "hin_Deva-fra_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554683 + }, + "hin_Deva-guj_Gujr": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506506 + }, + "hin_Deva-heb_Hebr": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 461871 + }, + "hin_Deva-hun_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539951 + }, + "hin_Deva-ind_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548547 + }, + "hin_Deva-jpn_Jpan": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 373201 + }, + "hin_Deva-kan_Knda": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527111 + }, + "hin_Deva-kor_Hang": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 394980 + }, + "hin_Deva-lit_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520972 + }, + "hin_Deva-mar_Deva": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522462 + }, + "hin_Deva-nep_Deva": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509798 + }, + "hin_Deva-nld_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553490 + }, + "hin_Deva-pan_Guru": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511997 + }, + "hin_Deva-pol_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539441 + }, + "hin_Deva-por_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538790 + }, + "hin_Deva-rus_Cyrl": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536468 + }, + "hin_Deva-sin_Sinh": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520316 + }, + "hin_Deva-snd_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 481902 + }, + "hin_Deva-spa_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549775 + }, + "hin_Deva-swa_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534084 + }, + "hin_Deva-swe_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513402 + }, + "hin_Deva-tam_Taml": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571536 + }, + "hin_Deva-tel_Telu": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509102 + }, + "hin_Deva-tur_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527188 + }, + "hin_Deva-urd_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509573 + }, + "hin_Deva-vie_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532696 + }, + "hin_Deva-zho_Hant": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 353053 + }, + "hin_Deva-zul_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519307 + }, + "hmn_Latn-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 165.6434651977967, + "max_sentence1_length": 643, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578510 + }, + "hrv_Latn-bel_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512015 + }, + "hrv_Latn-bos_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510835 + }, + "hrv_Latn-bul_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525814 + }, + "hrv_Latn-ces_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497243 + }, + "hrv_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503645 + }, + "hrv_Latn-mkd_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523816 + }, + "hrv_Latn-pol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 533791 + }, + "hrv_Latn-rus_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530818 + }, + "hrv_Latn-slk_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508894 + }, + "hrv_Latn-slv_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508821 + }, + "hrv_Latn-srp_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508228 + }, + "hrv_Latn-srp_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512066 + }, + "hrv_Latn-ukr_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518708 + }, + "hun_Latn-arb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509557 + }, + "hun_Latn-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522178 + }, + "hun_Latn-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 574026 + }, + "hun_Latn-ell_Grek": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 577503 + }, + "hun_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526096 + }, + "hun_Latn-fas_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521715 + }, + "hun_Latn-fin_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548176 + }, + "hun_Latn-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571484 + }, + "hun_Latn-heb_Hebr": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 478672 + }, + "hun_Latn-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539951 + }, + "hun_Latn-ind_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565348 + }, + "hun_Latn-jpn_Jpan": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 390002 + }, + "hun_Latn-kor_Hang": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 411781 + }, + "hun_Latn-lav_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546564 + }, + "hun_Latn-lit_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537773 + }, + "hun_Latn-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570291 + }, + "hun_Latn-pol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 556242 + }, + "hun_Latn-por_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555591 + }, + "hun_Latn-rus_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553269 + }, + "hun_Latn-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566576 + }, + "hun_Latn-swa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550885 + }, + "hun_Latn-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530203 + }, + "hun_Latn-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 588337 + }, + "hun_Latn-tur_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543989 + }, + "hun_Latn-vie_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549497 + }, + "hun_Latn-zho_Hant": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 369854 + }, + "hun_Latn-zul_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536108 + }, + "hye_Armn-ell_Grek": { + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563842 + }, + "hye_Armn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512435 + }, + "hye_Armn-kat_Geor": { + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531307 + }, + "hye_Armn-sqi_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548322 + }, + "ibo_Latn-amh_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 413608 + }, + "ibo_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493821 + }, + "ibo_Latn-hau_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516067 + }, + "ibo_Latn-nso_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537600 + }, + "ibo_Latn-orm_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 483532 + }, + "ibo_Latn-som_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537393 + }, + "ibo_Latn-ssw_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534243 + }, + "ibo_Latn-swa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518610 + }, + "ibo_Latn-tir_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 411339 + }, + "ibo_Latn-tsn_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580384 + }, + "ibo_Latn-wol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485904 + }, + "ibo_Latn-xho_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514191 + }, + "ibo_Latn-yor_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 562189 + }, + "ibo_Latn-zul_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503833 + }, + "ind_Latn-arb_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518153 + }, + "ind_Latn-ben_Beng": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530774 + }, + "ind_Latn-deu_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582622 + }, + "ind_Latn-ell_Grek": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 586099 + }, + "ind_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534692 + }, + "ind_Latn-fas_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530311 + }, + "ind_Latn-fij_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587477 + }, + "ind_Latn-fil_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580392 + }, + "ind_Latn-fin_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 556772 + }, + "ind_Latn-fra_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580080 + }, + "ind_Latn-heb_Hebr": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 487268 + }, + "ind_Latn-hin_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548547 + }, + "ind_Latn-hun_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565348 + }, + "ind_Latn-jpn_Jpan": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 398598 + }, + "ind_Latn-kor_Hang": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 420377 + }, + "ind_Latn-lit_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546369 + }, + "ind_Latn-mal_Mlym": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 591124 + }, + "ind_Latn-mlg_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 607280 + }, + "ind_Latn-mri_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561096 + }, + "ind_Latn-msa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564155 + }, + "ind_Latn-nld_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578887 + }, + "ind_Latn-pol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564838 + }, + "ind_Latn-por_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564187 + }, + "ind_Latn-rus_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561865 + }, + "ind_Latn-smo_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564827 + }, + "ind_Latn-spa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 575172 + }, + "ind_Latn-swa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559481 + }, + "ind_Latn-swe_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538799 + }, + "ind_Latn-tah_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596595 + }, + "ind_Latn-tam_Taml": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596933 + }, + "ind_Latn-ton_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 600612 + }, + "ind_Latn-tur_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 552585 + }, + "ind_Latn-vie_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558093 + }, + "ind_Latn-zho_Hant": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 378450 + }, + "ind_Latn-zul_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544704 + }, + "isl_Latn-afr_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530560 + }, + "isl_Latn-dan_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514346 + }, + "isl_Latn-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557858 + }, + "isl_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509928 + }, + "isl_Latn-fao_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520011 + }, + "isl_Latn-ltz_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542965 + }, + "isl_Latn-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554123 + }, + "isl_Latn-nno_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510565 + }, + "isl_Latn-nob_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513652 + }, + "isl_Latn-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514035 + }, + "ita_Latn-cat_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 572177 + }, + "ita_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536937 + }, + "ita_Latn-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582325 + }, + "ita_Latn-glg_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561203 + }, + "ita_Latn-mlt_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566692 + }, + "ita_Latn-por_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566432 + }, + "ita_Latn-ron_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 581702 + }, + "ita_Latn-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 577417 + }, + "jpn_Jpan-arb_Arab": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 342807 + }, + "jpn_Jpan-ben_Beng": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 355428 + }, + "jpn_Jpan-deu_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 407276 + }, + "jpn_Jpan-ell_Grek": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 410753 + }, + "jpn_Jpan-eng_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 359346 + }, + "jpn_Jpan-fas_Arab": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 354965 + }, + "jpn_Jpan-fin_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 381426 + }, + "jpn_Jpan-fra_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 404734 + }, + "jpn_Jpan-heb_Hebr": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 311922 + }, + "jpn_Jpan-hin_Deva": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 373201 + }, + "jpn_Jpan-hun_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 390002 + }, + "jpn_Jpan-ind_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 398598 + }, + "jpn_Jpan-kor_Hang": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 245031 + }, + "jpn_Jpan-lit_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 371023 + }, + "jpn_Jpan-nld_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 403541 + }, + "jpn_Jpan-pol_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 389492 + }, + "jpn_Jpan-por_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 388841 + }, + "jpn_Jpan-rus_Cyrl": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 386519 + }, + "jpn_Jpan-spa_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 399826 + }, + "jpn_Jpan-swa_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 384135 + }, + "jpn_Jpan-swe_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 363453 + }, + "jpn_Jpan-tam_Taml": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 421587 + }, + "jpn_Jpan-tur_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 377239 + }, + "jpn_Jpan-vie_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 382747 + }, + "jpn_Jpan-yue_Hant": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 190513 + }, + "jpn_Jpan-zho_Hans": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 196587 + }, + "jpn_Jpan-zho_Hant": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 203104 + }, + "jpn_Jpan-zul_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 369358 + }, + "kan_Knda-ben_Beng": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509338 + }, + "kan_Knda-div_Thaa": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569384 + }, + "kan_Knda-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513256 + }, + "kan_Knda-eus_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540739 + }, + "kan_Knda-guj_Gujr": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510467 + }, + "kan_Knda-hin_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527111 + }, + "kan_Knda-mar_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526423 + }, + "kan_Knda-nep_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513759 + }, + "kan_Knda-pan_Guru": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515958 + }, + "kan_Knda-sin_Sinh": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524277 + }, + "kan_Knda-snd_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485863 + }, + "kan_Knda-tam_Taml": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 575497 + }, + "kan_Knda-tel_Telu": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513063 + }, + "kan_Knda-urd_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513534 + }, + "kat_Geor-ell_Grek": { + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565719 + }, + "kat_Geor-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514312 + }, + "kat_Geor-hye_Armn": { + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531307 + }, + "kat_Geor-sqi_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550199 + }, + "kaz_Cyrl-aze_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529910 + }, + "kaz_Cyrl-bak_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506602 + }, + "kaz_Cyrl-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507996 + }, + "kaz_Cyrl-kir_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511140 + }, + "kaz_Cyrl-tat_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506202 + }, + "kaz_Cyrl-tuk_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 545550 + }, + "kaz_Cyrl-tur_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525889 + }, + "kaz_Cyrl-uig_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571298 + }, + "kaz_Cyrl-uzb_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553971 + }, + "khm_Khmr-bod_Tibt": { + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 589120 + }, + "khm_Khmr-dzo_Tibt": { + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531712 + }, + "khm_Khmr-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536211 + }, + "khm_Khmr-lao_Laoo": { + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555471 + }, + "khm_Khmr-mon_Mong": { + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547539 + }, + "khm_Khmr-mya_Mymr": { + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 600345 + }, + "khm_Khmr-tha_Thai": { + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525959 + }, + "kin_Latn-bem_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 602279 + }, + "kin_Latn-eng_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551507 + }, + "kin_Latn-ewe_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542765 + }, + "kin_Latn-fuc_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532267 + }, + "kin_Latn-nde_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 601526 + }, + "kin_Latn-nya_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 588069 + }, + "kin_Latn-sna_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 602117 + }, + "kin_Latn-ven_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 603543 + }, + "kir_Cyrl-aze_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520498 + }, + "kir_Cyrl-bak_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497190 + }, + "kir_Cyrl-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 498584 + }, + "kir_Cyrl-kaz_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511140 + }, + "kir_Cyrl-tat_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496790 + }, + "kir_Cyrl-tuk_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536138 + }, + "kir_Cyrl-tur_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516477 + }, + "kir_Cyrl-uig_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561886 + }, + "kir_Cyrl-uzb_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544559 + }, + "kmr_Latn-arb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 477127 + }, + "kmr_Latn-ckb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 498313 + }, + "kmr_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493666 + }, + "kmr_Latn-fas_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 489285 + }, + "kmr_Latn-heb_Hebr": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 446242 + }, + "kmr_Latn-mey_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 459781 + }, + "kmr_Latn-prs_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 488482 + }, + "kmr_Latn-pus_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 488579 + }, + "kmr_Latn-shi_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 460859 + }, + "kmr_Latn-tgk_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520093 + }, + "kor_Hang-arb_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 364586 + }, + "kor_Hang-ben_Beng": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 377207 + }, + "kor_Hang-deu_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 429055 + }, + "kor_Hang-ell_Grek": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 432532 + }, + "kor_Hang-eng_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 381125 + }, + "kor_Hang-fas_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 376744 + }, + "kor_Hang-fin_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 403205 + }, + "kor_Hang-fra_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 426513 + }, + "kor_Hang-heb_Hebr": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 333701 + }, + "kor_Hang-hin_Deva": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 394980 + }, + "kor_Hang-hun_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 411781 + }, + "kor_Hang-ind_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 420377 + }, + "kor_Hang-jpn_Jpan": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 245031 + }, + "kor_Hang-lit_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 392802 + }, + "kor_Hang-nld_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 425320 + }, + "kor_Hang-pol_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 411271 + }, + "kor_Hang-por_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 410620 + }, + "kor_Hang-rus_Cyrl": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 408298 + }, + "kor_Hang-spa_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 421605 + }, + "kor_Hang-swa_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 405914 + }, + "kor_Hang-swe_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 385232 + }, + "kor_Hang-tam_Taml": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 443366 + }, + "kor_Hang-tur_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 399018 + }, + "kor_Hang-vie_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 404526 + }, + "kor_Hang-yue_Hant": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 212292 + }, + "kor_Hang-zho_Hans": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 218366 + }, + "kor_Hang-zho_Hant": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 224883 + }, + "kor_Hang-zul_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 391137 + }, + "lao_Laoo-bod_Tibt": { + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 567609 + }, + "lao_Laoo-dzo_Tibt": { + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510201 + }, + "lao_Laoo-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514700 + }, + "lao_Laoo-khm_Khmr": { + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555471 + }, + "lao_Laoo-mon_Mong": { + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526028 + }, + "lao_Laoo-mya_Mymr": { + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578834 + }, + "lao_Laoo-tha_Thai": { + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 504448 + }, + "lav_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515908 + }, + "lav_Latn-fin_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537988 + }, + "lav_Latn-hun_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546564 + }, + "lav_Latn-lit_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527585 + }, + "lit_Latn-arb_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 490578 + }, + "lit_Latn-ben_Beng": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503199 + }, + "lit_Latn-deu_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555047 + }, + "lit_Latn-ell_Grek": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558524 + }, + "lit_Latn-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507117 + }, + "lit_Latn-fas_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 502736 + }, + "lit_Latn-fin_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529197 + }, + "lit_Latn-fra_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 552505 + }, + "lit_Latn-heb_Hebr": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 459693 + }, + "lit_Latn-hin_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520972 + }, + "lit_Latn-hun_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537773 + }, + "lit_Latn-ind_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546369 + }, + "lit_Latn-jpn_Jpan": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 371023 + }, + "lit_Latn-kor_Hang": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 392802 + }, + "lit_Latn-lav_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527585 + }, + "lit_Latn-nld_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551312 + }, + "lit_Latn-pol_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537263 + }, + "lit_Latn-por_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536612 + }, + "lit_Latn-rus_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534290 + }, + "lit_Latn-spa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547597 + }, + "lit_Latn-swa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531906 + }, + "lit_Latn-swe_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511224 + }, + "lit_Latn-tam_Taml": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569358 + }, + "lit_Latn-tur_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525010 + }, + "lit_Latn-vie_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530518 + }, + "lit_Latn-zho_Hant": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 350875 + }, + "lit_Latn-zul_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517129 + }, + "ltz_Latn-afr_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549109 + }, + "ltz_Latn-dan_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532895 + }, + "ltz_Latn-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 576407 + }, + "ltz_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 528477 + }, + "ltz_Latn-fao_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538560 + }, + "ltz_Latn-isl_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542965 + }, + "ltz_Latn-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 572672 + }, + "ltz_Latn-nno_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529114 + }, + "ltz_Latn-nob_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532201 + }, + "ltz_Latn-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532584 + }, + "mal_Mlym-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551872 + }, + "mal_Mlym-fij_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 604657 + }, + "mal_Mlym-fil_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 597572 + }, + "mal_Mlym-ind_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 591124 + }, + "mal_Mlym-mlg_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 624460 + }, + "mal_Mlym-mri_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578276 + }, + "mal_Mlym-msa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 581335 + }, + "mal_Mlym-smo_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582007 + }, + "mal_Mlym-tah_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 613775 + }, + "mal_Mlym-ton_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 617792 + }, + "mar_Deva-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 504689 + }, + "mar_Deva-div_Thaa": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564735 + }, + "mar_Deva-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508607 + }, + "mar_Deva-eus_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536090 + }, + "mar_Deva-guj_Gujr": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505818 + }, + "mar_Deva-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522462 + }, + "mar_Deva-kan_Knda": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526423 + }, + "mar_Deva-nep_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509110 + }, + "mar_Deva-pan_Guru": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511309 + }, + "mar_Deva-sin_Sinh": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519628 + }, + "mar_Deva-snd_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 481214 + }, + "mar_Deva-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570848 + }, + "mar_Deva-tel_Telu": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508414 + }, + "mar_Deva-urd_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508885 + }, + "mey_Arab-arb_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 445016 + }, + "mey_Arab-ckb_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 466202 + }, + "mey_Arab-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 461555 + }, + "mey_Arab-fas_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 457174 + }, + "mey_Arab-heb_Hebr": { + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 414131 + }, + "mey_Arab-kmr_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 459781 + }, + "mey_Arab-prs_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 456371 + }, + "mey_Arab-pus_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 456468 + }, + "mey_Arab-shi_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 428748 + }, + "mey_Arab-tgk_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 487982 + }, + "mkd_Cyrl-bel_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523981 + }, + "mkd_Cyrl-bos_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522801 + }, + "mkd_Cyrl-bul_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537780 + }, + "mkd_Cyrl-ces_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509209 + }, + "mkd_Cyrl-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515611 + }, + "mkd_Cyrl-hrv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523816 + }, + "mkd_Cyrl-pol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 545757 + }, + "mkd_Cyrl-rus_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542784 + }, + "mkd_Cyrl-slk_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520860 + }, + "mkd_Cyrl-slv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520787 + }, + "mkd_Cyrl-srp_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520194 + }, + "mkd_Cyrl-srp_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524032 + }, + "mkd_Cyrl-ukr_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530674 + }, + "mlg_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 568028 + }, + "mlg_Latn-fij_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 620813 + }, + "mlg_Latn-fil_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 613728 + }, + "mlg_Latn-ind_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 607280 + }, + "mlg_Latn-mal_Mlym": { + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 624460 + }, + "mlg_Latn-mri_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 594432 + }, + "mlg_Latn-msa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 597491 + }, + "mlg_Latn-smo_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 598163 + }, + "mlg_Latn-tah_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 629931 + }, + "mlg_Latn-ton_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 633948 + }, + "mlt_Latn-cat_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560435 + }, + "mlt_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525195 + }, + "mlt_Latn-fra_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570583 + }, + "mlt_Latn-glg_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549461 + }, + "mlt_Latn-ita_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566692 + }, + "mlt_Latn-por_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554690 + }, + "mlt_Latn-ron_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569960 + }, + "mlt_Latn-spa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565675 + }, + "mon_Mong-bod_Tibt": { + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559677 + }, + "mon_Mong-dzo_Tibt": { + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 502269 + }, + "mon_Mong-eng_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506768 + }, + "mon_Mong-khm_Khmr": { + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547539 + }, + "mon_Mong-lao_Laoo": { + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526028 + }, + "mon_Mong-mya_Mymr": { + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570902 + }, + "mon_Mong-tha_Thai": { + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496516 + }, + "mri_Latn-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521844 + }, + "mri_Latn-fij_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 574629 + }, + "mri_Latn-fil_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 567544 + }, + "mri_Latn-ind_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561096 + }, + "mri_Latn-mal_Mlym": { + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578276 + }, + "mri_Latn-mlg_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 594432 + }, + "mri_Latn-msa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551307 + }, + "mri_Latn-smo_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551979 + }, + "mri_Latn-tah_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 583747 + }, + "mri_Latn-ton_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587764 + }, + "msa_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524903 + }, + "msa_Latn-fij_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 577688 + }, + "msa_Latn-fil_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570603 + }, + "msa_Latn-ind_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564155 + }, + "msa_Latn-mal_Mlym": { + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 581335 + }, + "msa_Latn-mlg_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 597491 + }, + "msa_Latn-mri_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551307 + }, + "msa_Latn-smo_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555038 + }, + "msa_Latn-tah_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 586806 + }, + "msa_Latn-ton_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 590823 + }, + "mya_Mymr-bod_Tibt": { + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 612483 + }, + "mya_Mymr-dzo_Tibt": { + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555075 + }, + "mya_Mymr-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559574 + }, + "mya_Mymr-khm_Khmr": { + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 600345 + }, + "mya_Mymr-lao_Laoo": { + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578834 + }, + "mya_Mymr-mon_Mong": { + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570902 + }, + "mya_Mymr-tha_Thai": { + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549322 + }, + "nde_Latn-bem_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596231 + }, + "nde_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 545459 + }, + "nde_Latn-ewe_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536717 + }, + "nde_Latn-fuc_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526219 + }, + "nde_Latn-kin_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 601526 + }, + "nde_Latn-nya_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582021 + }, + "nde_Latn-sna_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596069 + }, + "nde_Latn-ven_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 597495 + }, + "nep_Deva-ben_Beng": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492025 + }, + "nep_Deva-div_Thaa": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 552071 + }, + "nep_Deva-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495943 + }, + "nep_Deva-eus_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523426 + }, + "nep_Deva-guj_Gujr": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493154 + }, + "nep_Deva-hin_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509798 + }, + "nep_Deva-kan_Knda": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513759 + }, + "nep_Deva-mar_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509110 + }, + "nep_Deva-pan_Guru": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 498645 + }, + "nep_Deva-sin_Sinh": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506964 + }, + "nep_Deva-snd_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 468550 + }, + "nep_Deva-tam_Taml": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558184 + }, + "nep_Deva-tel_Telu": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495750 + }, + "nep_Deva-urd_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496221 + }, + "nld_Latn-afr_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560267 + }, + "nld_Latn-arb_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523096 + }, + "nld_Latn-ben_Beng": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535717 + }, + "nld_Latn-dan_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544053 + }, + "nld_Latn-deu_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587565 + }, + "nld_Latn-ell_Grek": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 591042 + }, + "nld_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539635 + }, + "nld_Latn-fao_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549718 + }, + "nld_Latn-fas_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535254 + }, + "nld_Latn-fin_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561715 + }, + "nld_Latn-fra_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 585023 + }, + "nld_Latn-heb_Hebr": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492211 + }, + "nld_Latn-hin_Deva": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553490 + }, + "nld_Latn-hun_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570291 + }, + "nld_Latn-ind_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578887 + }, + "nld_Latn-isl_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554123 + }, + "nld_Latn-jpn_Jpan": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 403541 + }, + "nld_Latn-kor_Hang": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 425320 + }, + "nld_Latn-lit_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551312 + }, + "nld_Latn-ltz_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 572672 + }, + "nld_Latn-nno_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540272 + }, + "nld_Latn-nob_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543359 + }, + "nld_Latn-pol_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569781 + }, + "nld_Latn-por_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569130 + }, + "nld_Latn-rus_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566808 + }, + "nld_Latn-spa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580115 + }, + "nld_Latn-swa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564424 + }, + "nld_Latn-swe_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543742 + }, + "nld_Latn-tam_Taml": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 601876 + }, + "nld_Latn-tur_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557528 + }, + "nld_Latn-vie_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563036 + }, + "nld_Latn-zho_Hant": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 383393 + }, + "nld_Latn-zul_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549647 + }, + "nno_Latn-afr_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516709 + }, + "nno_Latn-dan_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500495 + }, + "nno_Latn-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544007 + }, + "nno_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496077 + }, + "nno_Latn-fao_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506160 + }, + "nno_Latn-isl_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510565 + }, + "nno_Latn-ltz_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529114 + }, + "nno_Latn-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540272 + }, + "nno_Latn-nob_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499801 + }, + "nno_Latn-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500184 + }, + "nob_Latn-afr_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519796 + }, + "nob_Latn-dan_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503582 + }, + "nob_Latn-deu_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547094 + }, + "nob_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499164 + }, + "nob_Latn-fao_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509247 + }, + "nob_Latn-isl_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513652 + }, + "nob_Latn-ltz_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532201 + }, + "nob_Latn-nld_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543359 + }, + "nob_Latn-nno_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499801 + }, + "nob_Latn-swe_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503271 + }, + "nso_Latn-amh_Ethi": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 459006 + }, + "nso_Latn-eng_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539219 + }, + "nso_Latn-hau_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561465 + }, + "nso_Latn-ibo_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537600 + }, + "nso_Latn-orm_Ethi": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 528930 + }, + "nso_Latn-som_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582791 + }, + "nso_Latn-ssw_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 579641 + }, + "nso_Latn-swa_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564008 + }, + "nso_Latn-tir_Ethi": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 456737 + }, + "nso_Latn-tsn_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 625782 + }, + "nso_Latn-wol_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531302 + }, + "nso_Latn-xho_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559589 + }, + "nso_Latn-yor_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 607587 + }, + "nso_Latn-zul_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549231 + }, + "nya_Latn-bem_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582774 + }, + "nya_Latn-eng_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532002 + }, + "nya_Latn-ewe_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523260 + }, + "nya_Latn-fuc_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512762 + }, + "nya_Latn-kin_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 588069 + }, + "nya_Latn-nde_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582021 + }, + "nya_Latn-sna_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582612 + }, + "nya_Latn-ven_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 584038 + }, + "orm_Ethi-amh_Ethi": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 404938 + }, + "orm_Ethi-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485151 + }, + "orm_Ethi-hau_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507397 + }, + "orm_Ethi-ibo_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 483532 + }, + "orm_Ethi-nso_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 528930 + }, + "orm_Ethi-som_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 528723 + }, + "orm_Ethi-ssw_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525573 + }, + "orm_Ethi-swa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509940 + }, + "orm_Ethi-tir_Ethi": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 402669 + }, + "orm_Ethi-tsn_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571714 + }, + "orm_Ethi-wol_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 477234 + }, + "orm_Ethi-xho_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505521 + }, + "orm_Ethi-yor_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553519 + }, + "orm_Ethi-zul_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495163 + }, + "pan_Guru-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 494224 + }, + "pan_Guru-div_Thaa": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554270 + }, + "pan_Guru-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 498142 + }, + "pan_Guru-eus_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525625 + }, + "pan_Guru-guj_Gujr": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495353 + }, + "pan_Guru-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511997 + }, + "pan_Guru-kan_Knda": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515958 + }, + "pan_Guru-mar_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511309 + }, + "pan_Guru-nep_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 498645 + }, + "pan_Guru-sin_Sinh": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509163 + }, + "pan_Guru-snd_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 470749 + }, + "pan_Guru-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560383 + }, + "pan_Guru-tel_Telu": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497949 + }, + "pan_Guru-urd_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 498420 + }, + "pol_Latn-arb_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509047 + }, + "pol_Latn-bel_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 533956 + }, + "pol_Latn-ben_Beng": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521668 + }, + "pol_Latn-bos_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532776 + }, + "pol_Latn-bul_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547755 + }, + "pol_Latn-ces_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519184 + }, + "pol_Latn-deu_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 573516 + }, + "pol_Latn-ell_Grek": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 576993 + }, + "pol_Latn-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525586 + }, + "pol_Latn-fas_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521205 + }, + "pol_Latn-fin_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547666 + }, + "pol_Latn-fra_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570974 + }, + "pol_Latn-heb_Hebr": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 478162 + }, + "pol_Latn-hin_Deva": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539441 + }, + "pol_Latn-hrv_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 533791 + }, + "pol_Latn-hun_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 556242 + }, + "pol_Latn-ind_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564838 + }, + "pol_Latn-jpn_Jpan": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 389492 + }, + "pol_Latn-kor_Hang": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 411271 + }, + "pol_Latn-lit_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537263 + }, + "pol_Latn-mkd_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 545757 + }, + "pol_Latn-nld_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569781 + }, + "pol_Latn-por_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555081 + }, + "pol_Latn-rus_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 552759 + }, + "pol_Latn-slk_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530835 + }, + "pol_Latn-slv_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530762 + }, + "pol_Latn-spa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566066 + }, + "pol_Latn-srp_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530169 + }, + "pol_Latn-srp_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534007 + }, + "pol_Latn-swa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550375 + }, + "pol_Latn-swe_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529693 + }, + "pol_Latn-tam_Taml": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587827 + }, + "pol_Latn-tur_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543479 + }, + "pol_Latn-ukr_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540649 + }, + "pol_Latn-vie_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548987 + }, + "pol_Latn-zho_Hant": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 369344 + }, + "pol_Latn-zul_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535598 + }, + "por_Latn-arb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508396 + }, + "por_Latn-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521017 + }, + "por_Latn-cat_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560175 + }, + "por_Latn-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 572865 + }, + "por_Latn-ell_Grek": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 576342 + }, + "por_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524935 + }, + "por_Latn-fas_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520554 + }, + "por_Latn-fin_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547015 + }, + "por_Latn-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570323 + }, + "por_Latn-glg_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549201 + }, + "por_Latn-heb_Hebr": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 477511 + }, + "por_Latn-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538790 + }, + "por_Latn-hun_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555591 + }, + "por_Latn-ind_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564187 + }, + "por_Latn-ita_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566432 + }, + "por_Latn-jpn_Jpan": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 388841 + }, + "por_Latn-kor_Hang": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 410620 + }, + "por_Latn-lit_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536612 + }, + "por_Latn-mlt_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554690 + }, + "por_Latn-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569130 + }, + "por_Latn-pol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555081 + }, + "por_Latn-ron_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569700 + }, + "por_Latn-rus_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 552108 + }, + "por_Latn-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565415 + }, + "por_Latn-swa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549724 + }, + "por_Latn-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529042 + }, + "por_Latn-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587176 + }, + "por_Latn-tur_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542828 + }, + "por_Latn-vie_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548336 + }, + "por_Latn-zho_Hant": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 368693 + }, + "por_Latn-zul_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534947 + }, + "prs_Arab-arb_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 473717 + }, + "prs_Arab-ckb_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 494903 + }, + "prs_Arab-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 490256 + }, + "prs_Arab-fas_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485875 + }, + "prs_Arab-heb_Hebr": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 442832 + }, + "prs_Arab-kmr_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 488482 + }, + "prs_Arab-mey_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 456371 + }, + "prs_Arab-pus_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485169 + }, + "prs_Arab-shi_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 457449 + }, + "prs_Arab-tgk_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516683 + }, + "pus_Arab-arb_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 473814 + }, + "pus_Arab-ckb_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495000 + }, + "pus_Arab-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 490353 + }, + "pus_Arab-fas_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485972 + }, + "pus_Arab-heb_Hebr": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 442929 + }, + "pus_Arab-kmr_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 488579 + }, + "pus_Arab-mey_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 456468 + }, + "pus_Arab-prs_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485169 + }, + "pus_Arab-shi_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 457546 + }, + "pus_Arab-tgk_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516780 + }, + "ron_Latn-cat_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 575445 + }, + "ron_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540205 + }, + "ron_Latn-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 585593 + }, + "ron_Latn-glg_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564471 + }, + "ron_Latn-ita_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 581702 + }, + "ron_Latn-mlt_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569960 + }, + "ron_Latn-por_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569700 + }, + "ron_Latn-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580685 + }, + "rus_Cyrl-arb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506074 + }, + "rus_Cyrl-bel_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530983 + }, + "rus_Cyrl-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518695 + }, + "rus_Cyrl-bos_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529803 + }, + "rus_Cyrl-bul_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544782 + }, + "rus_Cyrl-ces_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516211 + }, + "rus_Cyrl-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570543 + }, + "rus_Cyrl-ell_Grek": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 574020 + }, + "rus_Cyrl-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522613 + }, + "rus_Cyrl-fas_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518232 + }, + "rus_Cyrl-fin_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544693 + }, + "rus_Cyrl-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 568001 + }, + "rus_Cyrl-heb_Hebr": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 475189 + }, + "rus_Cyrl-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536468 + }, + "rus_Cyrl-hrv_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530818 + }, + "rus_Cyrl-hun_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553269 + }, + "rus_Cyrl-ind_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561865 + }, + "rus_Cyrl-jpn_Jpan": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 386519 + }, + "rus_Cyrl-kor_Hang": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 408298 + }, + "rus_Cyrl-lit_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534290 + }, + "rus_Cyrl-mkd_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542784 + }, + "rus_Cyrl-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566808 + }, + "rus_Cyrl-pol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 552759 + }, + "rus_Cyrl-por_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 552108 + }, + "rus_Cyrl-slk_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527862 + }, + "rus_Cyrl-slv_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527789 + }, + "rus_Cyrl-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563093 + }, + "rus_Cyrl-srp_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527196 + }, + "rus_Cyrl-srp_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531034 + }, + "rus_Cyrl-swa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547402 + }, + "rus_Cyrl-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526720 + }, + "rus_Cyrl-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 584854 + }, + "rus_Cyrl-tur_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540506 + }, + "rus_Cyrl-ukr_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537676 + }, + "rus_Cyrl-vie_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546014 + }, + "rus_Cyrl-zho_Hant": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 366371 + }, + "rus_Cyrl-zul_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532625 + }, + "shi_Arab-arb_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 446094 + }, + "shi_Arab-ckb_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 467280 + }, + "shi_Arab-eng_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 462633 + }, + "shi_Arab-fas_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 458252 + }, + "shi_Arab-heb_Hebr": { + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 415209 + }, + "shi_Arab-kmr_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 460859 + }, + "shi_Arab-mey_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 428748 + }, + "shi_Arab-prs_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 457449 + }, + "shi_Arab-pus_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 457546 + }, + "shi_Arab-tgk_Cyrl": { + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 489060 + }, + "sin_Sinh-ben_Beng": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 502543 + }, + "sin_Sinh-div_Thaa": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 562589 + }, + "sin_Sinh-eng_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506461 + }, + "sin_Sinh-eus_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 533944 + }, + "sin_Sinh-guj_Gujr": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503672 + }, + "sin_Sinh-hin_Deva": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520316 + }, + "sin_Sinh-kan_Knda": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524277 + }, + "sin_Sinh-mar_Deva": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519628 + }, + "sin_Sinh-nep_Deva": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506964 + }, + "sin_Sinh-pan_Guru": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509163 + }, + "sin_Sinh-snd_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 479068 + }, + "sin_Sinh-tam_Taml": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 568702 + }, + "sin_Sinh-tel_Telu": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506268 + }, + "sin_Sinh-urd_Arab": { + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506739 + }, + "slk_Latn-bel_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509059 + }, + "slk_Latn-bos_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507879 + }, + "slk_Latn-bul_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522858 + }, + "slk_Latn-ces_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 494287 + }, + "slk_Latn-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500689 + }, + "slk_Latn-hrv_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508894 + }, + "slk_Latn-mkd_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520860 + }, + "slk_Latn-pol_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530835 + }, + "slk_Latn-rus_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527862 + }, + "slk_Latn-slv_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505865 + }, + "slk_Latn-srp_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505272 + }, + "slk_Latn-srp_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509110 + }, + "slk_Latn-ukr_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515752 + }, + "slv_Latn-bel_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508986 + }, + "slv_Latn-bos_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507806 + }, + "slv_Latn-bul_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522785 + }, + "slv_Latn-ces_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 494214 + }, + "slv_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500616 + }, + "slv_Latn-hrv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508821 + }, + "slv_Latn-mkd_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520787 + }, + "slv_Latn-pol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530762 + }, + "slv_Latn-rus_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527789 + }, + "slv_Latn-slk_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505865 + }, + "slv_Latn-srp_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505199 + }, + "slv_Latn-srp_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509037 + }, + "slv_Latn-ukr_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515679 + }, + "smo_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525575 + }, + "smo_Latn-fij_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578360 + }, + "smo_Latn-fil_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571275 + }, + "smo_Latn-ind_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564827 + }, + "smo_Latn-mal_Mlym": { + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582007 + }, + "smo_Latn-mlg_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 598163 + }, + "smo_Latn-mri_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551979 + }, + "smo_Latn-msa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555038 + }, + "smo_Latn-tah_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587478 + }, + "smo_Latn-ton_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 591495 + }, + "sna_Latn-bem_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596822 + }, + "sna_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546050 + }, + "sna_Latn-ewe_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537308 + }, + "sna_Latn-fuc_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526810 + }, + "sna_Latn-kin_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 602117 + }, + "sna_Latn-nde_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596069 + }, + "sna_Latn-nya_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582612 + }, + "sna_Latn-ven_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 598086 + }, + "snd_Arab-ben_Beng": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 464129 + }, + "snd_Arab-div_Thaa": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524175 + }, + "snd_Arab-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 468047 + }, + "snd_Arab-eus_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495530 + }, + "snd_Arab-guj_Gujr": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 465258 + }, + "snd_Arab-hin_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 481902 + }, + "snd_Arab-kan_Knda": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485863 + }, + "snd_Arab-mar_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 481214 + }, + "snd_Arab-nep_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 468550 + }, + "snd_Arab-pan_Guru": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 470749 + }, + "snd_Arab-sin_Sinh": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 479068 + }, + "snd_Arab-tam_Taml": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530288 + }, + "snd_Arab-tel_Telu": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 467854 + }, + "snd_Arab-urd_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 468325 + }, + "som_Latn-amh_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 458799 + }, + "som_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539012 + }, + "som_Latn-hau_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561258 + }, + "som_Latn-ibo_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537393 + }, + "som_Latn-nso_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582791 + }, + "som_Latn-orm_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 528723 + }, + "som_Latn-ssw_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 579434 + }, + "som_Latn-swa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563801 + }, + "som_Latn-tir_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 456530 + }, + "som_Latn-tsn_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 625575 + }, + "som_Latn-wol_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531095 + }, + "som_Latn-xho_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559382 + }, + "som_Latn-yor_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 607380 + }, + "som_Latn-zul_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549024 + }, + "spa_Latn-arb_Arab": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519381 + }, + "spa_Latn-ben_Beng": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532002 + }, + "spa_Latn-cat_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571160 + }, + "spa_Latn-deu_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 583850 + }, + "spa_Latn-ell_Grek": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587327 + }, + "spa_Latn-eng_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535920 + }, + "spa_Latn-fas_Arab": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531539 + }, + "spa_Latn-fin_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558000 + }, + "spa_Latn-fra_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 581308 + }, + "spa_Latn-glg_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560186 + }, + "spa_Latn-heb_Hebr": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 488496 + }, + "spa_Latn-hin_Deva": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549775 + }, + "spa_Latn-hun_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566576 + }, + "spa_Latn-ind_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 575172 + }, + "spa_Latn-ita_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 577417 + }, + "spa_Latn-jpn_Jpan": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 399826 + }, + "spa_Latn-kor_Hang": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 421605 + }, + "spa_Latn-lit_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547597 + }, + "spa_Latn-mlt_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565675 + }, + "spa_Latn-nld_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580115 + }, + "spa_Latn-pol_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566066 + }, + "spa_Latn-por_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565415 + }, + "spa_Latn-ron_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580685 + }, + "spa_Latn-rus_Cyrl": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563093 + }, + "spa_Latn-swa_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560709 + }, + "spa_Latn-swe_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540027 + }, + "spa_Latn-tam_Taml": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 598161 + }, + "spa_Latn-tur_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553813 + }, + "spa_Latn-vie_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559321 + }, + "spa_Latn-zho_Hant": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 379678 + }, + "spa_Latn-zul_Latn": { + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 545932 + }, + "sqi_Latn-ell_Grek": { + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582734 + }, + "sqi_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531327 + }, + "sqi_Latn-hye_Armn": { + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548322 + }, + "sqi_Latn-kat_Geor": { + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550199 + }, + "srp_Cyrl-bel_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508393 + }, + "srp_Cyrl-bos_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507213 + }, + "srp_Cyrl-bul_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522192 + }, + "srp_Cyrl-ces_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493621 + }, + "srp_Cyrl-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500023 + }, + "srp_Cyrl-hrv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508228 + }, + "srp_Cyrl-mkd_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520194 + }, + "srp_Cyrl-pol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530169 + }, + "srp_Cyrl-rus_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527196 + }, + "srp_Cyrl-slk_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505272 + }, + "srp_Cyrl-slv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505199 + }, + "srp_Cyrl-srp_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508444 + }, + "srp_Cyrl-ukr_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515086 + }, + "srp_Latn-bel_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512231 + }, + "srp_Latn-bos_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511051 + }, + "srp_Latn-bul_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526030 + }, + "srp_Latn-ces_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497459 + }, + "srp_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503861 + }, + "srp_Latn-hrv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512066 + }, + "srp_Latn-mkd_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524032 + }, + "srp_Latn-pol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534007 + }, + "srp_Latn-rus_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531034 + }, + "srp_Latn-slk_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509110 + }, + "srp_Latn-slv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509037 + }, + "srp_Latn-srp_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508444 + }, + "srp_Latn-ukr_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518924 + }, + "ssw_Latn-amh_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 455649 + }, + "ssw_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535862 + }, + "ssw_Latn-hau_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558108 + }, + "ssw_Latn-ibo_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534243 + }, + "ssw_Latn-nso_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 579641 + }, + "ssw_Latn-orm_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525573 + }, + "ssw_Latn-som_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 579434 + }, + "ssw_Latn-swa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560651 + }, + "ssw_Latn-tir_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 453380 + }, + "ssw_Latn-tsn_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 622425 + }, + "ssw_Latn-wol_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527945 + }, + "ssw_Latn-xho_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 556232 + }, + "ssw_Latn-yor_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 604230 + }, + "ssw_Latn-zul_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 545874 + }, + "swa_Latn-amh_Ethi": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 440016 + }, + "swa_Latn-arb_Arab": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503690 + }, + "swa_Latn-ben_Beng": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516311 + }, + "swa_Latn-deu_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 568159 + }, + "swa_Latn-ell_Grek": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571636 + }, + "swa_Latn-eng_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520229 + }, + "swa_Latn-fas_Arab": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515848 + }, + "swa_Latn-fin_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542309 + }, + "swa_Latn-fra_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 565617 + }, + "swa_Latn-hau_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542475 + }, + "swa_Latn-heb_Hebr": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 472805 + }, + "swa_Latn-hin_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534084 + }, + "swa_Latn-hun_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550885 + }, + "swa_Latn-ibo_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518610 + }, + "swa_Latn-ind_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559481 + }, + "swa_Latn-jpn_Jpan": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 384135 + }, + "swa_Latn-kor_Hang": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 405914 + }, + "swa_Latn-lit_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531906 + }, + "swa_Latn-nld_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564424 + }, + "swa_Latn-nso_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564008 + }, + "swa_Latn-orm_Ethi": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509940 + }, + "swa_Latn-pol_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550375 + }, + "swa_Latn-por_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549724 + }, + "swa_Latn-rus_Cyrl": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547402 + }, + "swa_Latn-som_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563801 + }, + "swa_Latn-spa_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560709 + }, + "swa_Latn-ssw_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560651 + }, + "swa_Latn-swe_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524336 + }, + "swa_Latn-tam_Taml": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582470 + }, + "swa_Latn-tir_Ethi": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 437747 + }, + "swa_Latn-tsn_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 606792 + }, + "swa_Latn-tur_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538122 + }, + "swa_Latn-vie_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543630 + }, + "swa_Latn-wol_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512312 + }, + "swa_Latn-xho_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540599 + }, + "swa_Latn-yor_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 588597 + }, + "swa_Latn-zho_Hant": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 363987 + }, + "swa_Latn-zul_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530241 + }, + "swe_Latn-afr_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520179 + }, + "swe_Latn-arb_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 483008 + }, + "swe_Latn-ben_Beng": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495629 + }, + "swe_Latn-dan_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503965 + }, + "swe_Latn-deu_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547477 + }, + "swe_Latn-ell_Grek": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550954 + }, + "swe_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499547 + }, + "swe_Latn-fao_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509630 + }, + "swe_Latn-fas_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495166 + }, + "swe_Latn-fin_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521627 + }, + "swe_Latn-fra_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544935 + }, + "swe_Latn-heb_Hebr": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 452123 + }, + "swe_Latn-hin_Deva": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513402 + }, + "swe_Latn-hun_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530203 + }, + "swe_Latn-ind_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538799 + }, + "swe_Latn-isl_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514035 + }, + "swe_Latn-jpn_Jpan": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 363453 + }, + "swe_Latn-kor_Hang": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 385232 + }, + "swe_Latn-lit_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511224 + }, + "swe_Latn-ltz_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532584 + }, + "swe_Latn-nld_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543742 + }, + "swe_Latn-nno_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 500184 + }, + "swe_Latn-nob_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503271 + }, + "swe_Latn-pol_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529693 + }, + "swe_Latn-por_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 529042 + }, + "swe_Latn-rus_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526720 + }, + "swe_Latn-spa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540027 + }, + "swe_Latn-swa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 524336 + }, + "swe_Latn-tam_Taml": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561788 + }, + "swe_Latn-tur_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517440 + }, + "swe_Latn-vie_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522948 + }, + "swe_Latn-zho_Hant": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 343305 + }, + "swe_Latn-zul_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509559 + }, + "tah_Latn-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557343 + }, + "tah_Latn-fij_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 610128 + }, + "tah_Latn-fil_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 603043 + }, + "tah_Latn-ind_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596595 + }, + "tah_Latn-mal_Mlym": { + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 613775 + }, + "tah_Latn-mlg_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 629931 + }, + "tah_Latn-mri_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 583747 + }, + "tah_Latn-msa_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 586806 + }, + "tah_Latn-smo_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587478 + }, + "tah_Latn-ton_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 623263 + }, + "tam_Taml-arb_Arab": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 541142 + }, + "tam_Taml-ben_Beng": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553763 + }, + "tam_Taml-deu_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 605611 + }, + "tam_Taml-div_Thaa": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 613809 + }, + "tam_Taml-ell_Grek": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 609088 + }, + "tam_Taml-eng_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557681 + }, + "tam_Taml-eus_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 585164 + }, + "tam_Taml-fas_Arab": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553300 + }, + "tam_Taml-fin_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 579761 + }, + "tam_Taml-fra_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 603069 + }, + "tam_Taml-guj_Gujr": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554892 + }, + "tam_Taml-heb_Hebr": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510257 + }, + "tam_Taml-hin_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571536 + }, + "tam_Taml-hun_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 588337 + }, + "tam_Taml-ind_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596933 + }, + "tam_Taml-jpn_Jpan": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 421587 + }, + "tam_Taml-kan_Knda": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 575497 + }, + "tam_Taml-kor_Hang": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 443366 + }, + "tam_Taml-lit_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 569358 + }, + "tam_Taml-mar_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570848 + }, + "tam_Taml-nep_Deva": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558184 + }, + "tam_Taml-nld_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 601876 + }, + "tam_Taml-pan_Guru": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 560383 + }, + "tam_Taml-pol_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587827 + }, + "tam_Taml-por_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587176 + }, + "tam_Taml-rus_Cyrl": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 584854 + }, + "tam_Taml-sin_Sinh": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 568702 + }, + "tam_Taml-snd_Arab": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530288 + }, + "tam_Taml-spa_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 598161 + }, + "tam_Taml-swa_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582470 + }, + "tam_Taml-swe_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561788 + }, + "tam_Taml-tel_Telu": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557488 + }, + "tam_Taml-tur_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 575574 + }, + "tam_Taml-urd_Arab": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557959 + }, + "tam_Taml-vie_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 581082 + }, + "tam_Taml-zho_Hant": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 401439 + }, + "tam_Taml-zul_Latn": { + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 567693 + }, + "tat_Cyrl-aze_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515560 + }, + "tat_Cyrl-bak_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492252 + }, + "tat_Cyrl-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 493646 + }, + "tat_Cyrl-kaz_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506202 + }, + "tat_Cyrl-kir_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496790 + }, + "tat_Cyrl-tuk_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531200 + }, + "tat_Cyrl-tur_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511539 + }, + "tat_Cyrl-uig_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 556948 + }, + "tat_Cyrl-uzb_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539621 + }, + "tel_Telu-ben_Beng": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 491329 + }, + "tel_Telu-div_Thaa": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551375 + }, + "tel_Telu-eng_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495247 + }, + "tel_Telu-eus_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522730 + }, + "tel_Telu-guj_Gujr": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492458 + }, + "tel_Telu-hin_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509102 + }, + "tel_Telu-kan_Knda": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513063 + }, + "tel_Telu-mar_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508414 + }, + "tel_Telu-nep_Deva": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495750 + }, + "tel_Telu-pan_Guru": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497949 + }, + "tel_Telu-sin_Sinh": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506268 + }, + "tel_Telu-snd_Arab": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 467854 + }, + "tel_Telu-tam_Taml": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557488 + }, + "tel_Telu-urd_Arab": { + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495525 + }, + "tgk_Cyrl-arb_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505328 + }, + "tgk_Cyrl-ckb_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 526514 + }, + "tgk_Cyrl-eng_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 521867 + }, + "tgk_Cyrl-fas_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517486 + }, + "tgk_Cyrl-heb_Hebr": { + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 474443 + }, + "tgk_Cyrl-kmr_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 520093 + }, + "tgk_Cyrl-mey_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 487982 + }, + "tgk_Cyrl-prs_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516683 + }, + "tgk_Cyrl-pus_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516780 + }, + "tgk_Cyrl-shi_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 489060 + }, + "tha_Thai-bod_Tibt": { + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538097 + }, + "tha_Thai-dzo_Tibt": { + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 480689 + }, + "tha_Thai-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485188 + }, + "tha_Thai-khm_Khmr": { + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525959 + }, + "tha_Thai-lao_Laoo": { + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 504448 + }, + "tha_Thai-mon_Mong": { + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496516 + }, + "tha_Thai-mya_Mymr": { + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549322 + }, + "tir_Ethi-amh_Ethi": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 332745 + }, + "tir_Ethi-eng_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 412958 + }, + "tir_Ethi-hau_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 435204 + }, + "tir_Ethi-ibo_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 411339 + }, + "tir_Ethi-nso_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 456737 + }, + "tir_Ethi-orm_Ethi": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 402669 + }, + "tir_Ethi-som_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 456530 + }, + "tir_Ethi-ssw_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 453380 + }, + "tir_Ethi-swa_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 437747 + }, + "tir_Ethi-tsn_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499521 + }, + "tir_Ethi-wol_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 405041 + }, + "tir_Ethi-xho_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 433328 + }, + "tir_Ethi-yor_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 481326 + }, + "tir_Ethi-zul_Latn": { + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 422970 + }, + "ton_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561360 + }, + "ton_Latn-fij_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 614145 + }, + "ton_Latn-fil_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 607060 + }, + "ton_Latn-ind_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 600612 + }, + "ton_Latn-mal_Mlym": { + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 617792 + }, + "ton_Latn-mlg_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 633948 + }, + "ton_Latn-mri_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 587764 + }, + "ton_Latn-msa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 590823 + }, + "ton_Latn-smo_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 591495 + }, + "ton_Latn-tah_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 623263 + }, + "tsn_Latn-amh_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 501790 + }, + "tsn_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 582003 + }, + "tsn_Latn-hau_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 604249 + }, + "tsn_Latn-ibo_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580384 + }, + "tsn_Latn-nso_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 625782 + }, + "tsn_Latn-orm_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571714 + }, + "tsn_Latn-som_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 625575 + }, + "tsn_Latn-ssw_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 622425 + }, + "tsn_Latn-swa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 606792 + }, + "tsn_Latn-tir_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 499521 + }, + "tsn_Latn-wol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 574086 + }, + "tsn_Latn-xho_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 602373 + }, + "tsn_Latn-yor_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 650371 + }, + "tsn_Latn-zul_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 592015 + }, + "tuk_Latn-aze_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 554908 + }, + "tuk_Latn-bak_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531600 + }, + "tuk_Latn-eng_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532994 + }, + "tuk_Latn-kaz_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 545550 + }, + "tuk_Latn-kir_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536138 + }, + "tuk_Latn-tat_Cyrl": { + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531200 + }, + "tuk_Latn-tur_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550887 + }, + "tuk_Latn-uig_Arab": { + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596296 + }, + "tuk_Latn-uzb_Latn": { + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578969 + }, + "tur_Latn-arb_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496794 + }, + "tur_Latn-aze_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535247 + }, + "tur_Latn-bak_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511939 + }, + "tur_Latn-ben_Beng": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509415 + }, + "tur_Latn-deu_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561263 + }, + "tur_Latn-ell_Grek": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564740 + }, + "tur_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513333 + }, + "tur_Latn-fas_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508952 + }, + "tur_Latn-fin_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535413 + }, + "tur_Latn-fra_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558721 + }, + "tur_Latn-heb_Hebr": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 465909 + }, + "tur_Latn-hin_Deva": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527188 + }, + "tur_Latn-hun_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543989 + }, + "tur_Latn-ind_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 552585 + }, + "tur_Latn-jpn_Jpan": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 377239 + }, + "tur_Latn-kaz_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525889 + }, + "tur_Latn-kir_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 516477 + }, + "tur_Latn-kor_Hang": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 399018 + }, + "tur_Latn-lit_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525010 + }, + "tur_Latn-nld_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557528 + }, + "tur_Latn-pol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543479 + }, + "tur_Latn-por_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 542828 + }, + "tur_Latn-rus_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540506 + }, + "tur_Latn-spa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553813 + }, + "tur_Latn-swa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538122 + }, + "tur_Latn-swe_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517440 + }, + "tur_Latn-tam_Taml": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 575574 + }, + "tur_Latn-tat_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 511539 + }, + "tur_Latn-tuk_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550887 + }, + "tur_Latn-uig_Arab": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 576635 + }, + "tur_Latn-uzb_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559308 + }, + "tur_Latn-vie_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536734 + }, + "tur_Latn-zho_Hant": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 357091 + }, + "tur_Latn-zul_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523345 + }, + "uig_Arab-aze_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 580656 + }, + "uig_Arab-bak_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557348 + }, + "uig_Arab-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558742 + }, + "uig_Arab-kaz_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 571298 + }, + "uig_Arab-kir_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 561886 + }, + "uig_Arab-tat_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 556948 + }, + "uig_Arab-tuk_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 596296 + }, + "uig_Arab-tur_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 576635 + }, + "uig_Arab-uzb_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 604717 + }, + "ukr_Cyrl-bel_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518873 + }, + "ukr_Cyrl-bos_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517693 + }, + "ukr_Cyrl-bul_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532672 + }, + "ukr_Cyrl-ces_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 504101 + }, + "ukr_Cyrl-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 510503 + }, + "ukr_Cyrl-hrv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518708 + }, + "ukr_Cyrl-mkd_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530674 + }, + "ukr_Cyrl-pol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540649 + }, + "ukr_Cyrl-rus_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 537676 + }, + "ukr_Cyrl-slk_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515752 + }, + "ukr_Cyrl-slv_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515679 + }, + "ukr_Cyrl-srp_Cyrl": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515086 + }, + "ukr_Cyrl-srp_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518924 + }, + "urd_Arab-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 491800 + }, + "urd_Arab-div_Thaa": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 551846 + }, + "urd_Arab-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495718 + }, + "urd_Arab-eus_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523201 + }, + "urd_Arab-guj_Gujr": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 492929 + }, + "urd_Arab-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509573 + }, + "urd_Arab-kan_Knda": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 513534 + }, + "urd_Arab-mar_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 508885 + }, + "urd_Arab-nep_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 496221 + }, + "urd_Arab-pan_Guru": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 498420 + }, + "urd_Arab-sin_Sinh": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 506739 + }, + "urd_Arab-snd_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 468325 + }, + "urd_Arab-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 557959 + }, + "urd_Arab-tel_Telu": { + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495525 + }, + "uzb_Latn-aze_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563329 + }, + "uzb_Latn-bak_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540021 + }, + "uzb_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 541415 + }, + "uzb_Latn-kaz_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553971 + }, + "uzb_Latn-kir_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544559 + }, + "uzb_Latn-tat_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 539621 + }, + "uzb_Latn-tuk_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 578969 + }, + "uzb_Latn-tur_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559308 + }, + "uzb_Latn-uig_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 604717 + }, + "ven_Latn-bem_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 598248 + }, + "ven_Latn-eng_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 547476 + }, + "ven_Latn-ewe_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538734 + }, + "ven_Latn-fuc_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 528236 + }, + "ven_Latn-kin_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 603543 + }, + "ven_Latn-nde_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 597495 + }, + "ven_Latn-nya_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 584038 + }, + "ven_Latn-sna_Latn": { + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 598086 + }, + "vie_Latn-arb_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 502302 + }, + "vie_Latn-ben_Beng": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514923 + }, + "vie_Latn-deu_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 566771 + }, + "vie_Latn-ell_Grek": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 570248 + }, + "vie_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 518841 + }, + "vie_Latn-fas_Arab": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514460 + }, + "vie_Latn-fin_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540921 + }, + "vie_Latn-fra_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 564229 + }, + "vie_Latn-heb_Hebr": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 471417 + }, + "vie_Latn-hin_Deva": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532696 + }, + "vie_Latn-hun_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549497 + }, + "vie_Latn-ind_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 558093 + }, + "vie_Latn-jpn_Jpan": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 382747 + }, + "vie_Latn-kor_Hang": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 404526 + }, + "vie_Latn-lit_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530518 + }, + "vie_Latn-nld_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563036 + }, + "vie_Latn-pol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548987 + }, + "vie_Latn-por_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 548336 + }, + "vie_Latn-rus_Cyrl": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 546014 + }, + "vie_Latn-spa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559321 + }, + "vie_Latn-swa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 543630 + }, + "vie_Latn-swe_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 522948 + }, + "vie_Latn-tam_Taml": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 581082 + }, + "vie_Latn-tur_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536734 + }, + "vie_Latn-yue_Hant": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 350008 + }, + "vie_Latn-zho_Hans": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 356082 + }, + "vie_Latn-zho_Hant": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 362599 + }, + "vie_Latn-zul_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 528853 + }, + "wol_Latn-amh_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 407310 + }, + "wol_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 487523 + }, + "wol_Latn-hau_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509769 + }, + "wol_Latn-ibo_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 485904 + }, + "wol_Latn-nso_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531302 + }, + "wol_Latn-orm_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 477234 + }, + "wol_Latn-som_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 531095 + }, + "wol_Latn-ssw_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527945 + }, + "wol_Latn-swa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 512312 + }, + "wol_Latn-tir_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 405041 + }, + "wol_Latn-tsn_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 574086 + }, + "wol_Latn-xho_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507893 + }, + "wol_Latn-yor_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555891 + }, + "wol_Latn-zul_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497535 + }, + "xho_Latn-amh_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 435597 + }, + "xho_Latn-eng_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 515810 + }, + "xho_Latn-hau_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 538056 + }, + "xho_Latn-ibo_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 514191 + }, + "xho_Latn-nso_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559589 + }, + "xho_Latn-orm_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505521 + }, + "xho_Latn-som_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 559382 + }, + "xho_Latn-ssw_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 556232 + }, + "xho_Latn-swa_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 540599 + }, + "xho_Latn-tir_Ethi": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 433328 + }, + "xho_Latn-tsn_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 602373 + }, + "xho_Latn-wol_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 507893 + }, + "xho_Latn-yor_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 584178 + }, + "xho_Latn-zul_Latn": { + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525822 + }, + "yor_Latn-amh_Ethi": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 483595 + }, + "yor_Latn-eng_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 563808 + }, + "yor_Latn-hau_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 586054 + }, + "yor_Latn-ibo_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 562189 + }, + "yor_Latn-nso_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 607587 + }, + "yor_Latn-orm_Ethi": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553519 + }, + "yor_Latn-som_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 607380 + }, + "yor_Latn-ssw_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 604230 + }, + "yor_Latn-swa_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 588597 + }, + "yor_Latn-tir_Ethi": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 481326 + }, + "yor_Latn-tsn_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 650371 + }, + "yor_Latn-wol_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 555891 + }, + "yor_Latn-xho_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 584178 + }, + "yor_Latn-zul_Latn": { + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 573820 + }, + "yue_Hant-eng_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 326607 + }, + "yue_Hant-jpn_Jpan": { + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 190513 + }, + "yue_Hant-kor_Hang": { + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 212292 + }, + "yue_Hant-vie_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 350008 + }, + "yue_Hant-zho_Hans": { + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 163848 + }, + "yue_Hant-zho_Hant": { + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 170365 + }, + "zho_Hans-eng_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 332681 + }, + "zho_Hans-jpn_Jpan": { + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 196587 + }, + "zho_Hans-kor_Hang": { + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 218366 + }, + "zho_Hans-vie_Latn": { + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 356082 + }, + "zho_Hans-yue_Hant": { + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 163848 + }, + "zho_Hans-zho_Hant": { + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 176439 + }, + "zho_Hant-arb_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 322659 + }, + "zho_Hant-ben_Beng": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 335280 + }, + "zho_Hant-deu_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 387128 + }, + "zho_Hant-ell_Grek": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 390605 + }, + "zho_Hant-eng_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 339198 + }, + "zho_Hant-fas_Arab": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 334817 + }, + "zho_Hant-fin_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 361278 + }, + "zho_Hant-fra_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 384586 + }, + "zho_Hant-heb_Hebr": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 291774 + }, + "zho_Hant-hin_Deva": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 353053 + }, + "zho_Hant-hun_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 369854 + }, + "zho_Hant-ind_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 378450 + }, + "zho_Hant-jpn_Jpan": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 203104 + }, + "zho_Hant-kor_Hang": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 224883 + }, + "zho_Hant-lit_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 350875 + }, + "zho_Hant-nld_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 383393 + }, + "zho_Hant-pol_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 369344 + }, + "zho_Hant-por_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 368693 + }, + "zho_Hant-rus_Cyrl": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 366371 + }, + "zho_Hant-spa_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 379678 + }, + "zho_Hant-swa_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 363987 + }, + "zho_Hant-swe_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 343305 + }, + "zho_Hant-tam_Taml": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 401439 + }, + "zho_Hant-tur_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 357091 + }, + "zho_Hant-vie_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 362599 + }, + "zho_Hant-yue_Hant": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 170365 + }, + "zho_Hant-zho_Hans": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 176439 + }, + "zho_Hant-zul_Latn": { + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 349210 + }, + "zul_Latn-amh_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 425239 + }, + "zul_Latn-arb_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 488913 + }, + "zul_Latn-ben_Beng": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 501534 + }, + "zul_Latn-deu_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 553382 + }, + "zul_Latn-ell_Grek": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 556859 + }, + "zul_Latn-eng_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 505452 + }, + "zul_Latn-fas_Arab": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 501071 + }, + "zul_Latn-fin_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527532 + }, + "zul_Latn-fra_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 550840 + }, + "zul_Latn-hau_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 527698 + }, + "zul_Latn-heb_Hebr": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 458028 + }, + "zul_Latn-hin_Deva": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 519307 + }, + "zul_Latn-hun_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 536108 + }, + "zul_Latn-ibo_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 503833 + }, + "zul_Latn-ind_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 544704 + }, + "zul_Latn-jpn_Jpan": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 369358 + }, + "zul_Latn-kor_Hang": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 391137 + }, + "zul_Latn-lit_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 517129 + }, + "zul_Latn-nld_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549647 + }, + "zul_Latn-nso_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549231 + }, + "zul_Latn-orm_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 495163 + }, + "zul_Latn-pol_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 535598 + }, + "zul_Latn-por_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 534947 + }, + "zul_Latn-rus_Cyrl": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 532625 + }, + "zul_Latn-som_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 549024 + }, + "zul_Latn-spa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 545932 + }, + "zul_Latn-ssw_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 545874 + }, + "zul_Latn-swa_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 530241 + }, + "zul_Latn-swe_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 509559 + }, + "zul_Latn-tam_Taml": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 567693 + }, + "zul_Latn-tir_Ethi": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 422970 + }, + "zul_Latn-tsn_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 592015 + }, + "zul_Latn-tur_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 523345 + }, + "zul_Latn-vie_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 528853 + }, + "zul_Latn-wol_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 497535 + }, + "zul_Latn-xho_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 525822 + }, + "zul_Latn-yor_Latn": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 573820 + }, + "zul_Latn-zho_Hant": { + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "num_samples": 1997, + "num_samples_sentence2": 1997, + "number_of_characters": 349210 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json b/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json new file mode 100644 index 000000000..144963c94 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json @@ -0,0 +1,59 @@ +{ + "train": { + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "min_sentence2_length": 3, + "average_sentence2_length": 135.515243902439, + "max_sentence2_length": 1728, + "num_samples": 1640, + "num_samples_sentence2": 1640, + "number_of_characters": 445805, + "hf_subset_descriptive_stats": { + "en-ha": { + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "min_sentence2_length": 4, + "average_sentence2_length": 145.01951219512196, + "max_sentence2_length": 1728, + "num_samples": 410, + "num_samples_sentence2": 410, + "number_of_characters": 115348 + }, + "en-ig": { + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "min_sentence2_length": 5, + "average_sentence2_length": 125.08048780487805, + "max_sentence2_length": 1137, + "num_samples": 410, + "num_samples_sentence2": 410, + "number_of_characters": 107173 + }, + "en-pcm": { + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "min_sentence2_length": 3, + "average_sentence2_length": 131.8658536585366, + "max_sentence2_length": 1552, + "num_samples": 410, + "num_samples_sentence2": 410, + "number_of_characters": 109955 + }, + "en-yo": { + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "min_sentence2_length": 6, + "average_sentence2_length": 140.0951219512195, + "max_sentence2_length": 1338, + "num_samples": 410, + "num_samples_sentence2": 410, + "number_of_characters": 113329 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json b/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json new file mode 100644 index 000000000..de150505b --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json @@ -0,0 +1,13 @@ +{ + "test": { + "min_sentence1_length": 13, + "average_sentence1_length": 82.19736842105263, + "max_sentence1_length": 272, + "min_sentence2_length": 10, + "average_sentence2_length": 82.01754385964912, + "max_sentence2_length": 269, + "num_samples": 228, + "num_samples_sentence2": 228, + "number_of_characters": 37441 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json b/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json index 833e9f17d..9efdf2f8d 100644 --- a/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json @@ -1,123 +1,159 @@ { "train": { + "num_samples": 50200, + "number_of_characters": 14759870, + "unique_pairs": 50140, "min_sentence1_length": 5, "average_sentence1_length": 145.4552390438247, "max_sentence1_length": 873, + "unique_sentence1": 8258, "min_sentence2_length": 5, "average_sentence2_length": 148.56607569721115, "max_sentence2_length": 980, - "num_samples": 50200, - "number_of_characters": 14759870, + "unique_sentence2": 50102, "hf_subset_descriptive_stats": { "ind-abs": { + "num_samples": 1000, + "number_of_characters": 295680, + "unique_pairs": 999, "min_sentence1_length": 5, "average_sentence1_length": 148.366, "max_sentence1_length": 727, + "unique_sentence1": 998, "min_sentence2_length": 6, "average_sentence2_length": 147.314, "max_sentence2_length": 629, - "num_samples": 1000, - "number_of_characters": 295680 + "unique_sentence2": 998 }, "ind-btk": { + "num_samples": 6600, + "number_of_characters": 1927907, + "unique_pairs": 6597, "min_sentence1_length": 5, "average_sentence1_length": 145.36666666666667, "max_sentence1_length": 873, + "unique_sentence1": 6521, "min_sentence2_length": 5, "average_sentence2_length": 146.74045454545455, "max_sentence2_length": 980, - "num_samples": 6600, - "number_of_characters": 1927907 + "unique_sentence2": 6596 }, "ind-bew": { + "num_samples": 6600, + "number_of_characters": 1939300, + "unique_pairs": 6595, "min_sentence1_length": 5, "average_sentence1_length": 145.4280303030303, "max_sentence1_length": 873, + "unique_sentence1": 6512, "min_sentence2_length": 6, "average_sentence2_length": 148.40530303030303, "max_sentence2_length": 840, - "num_samples": 6600, - "number_of_characters": 1939300 + "unique_sentence2": 6590 }, "ind-bhp": { + "num_samples": 1000, + "number_of_characters": 261666, + "unique_pairs": 1000, "min_sentence1_length": 11, "average_sentence1_length": 133.528, "max_sentence1_length": 468, + "unique_sentence1": 999, "min_sentence2_length": 10, "average_sentence2_length": 128.138, "max_sentence2_length": 459, - "num_samples": 1000, - "number_of_characters": 261666 + "unique_sentence2": 999 }, "ind-jav": { + "num_samples": 6600, + "number_of_characters": 1922162, + "unique_pairs": 6594, "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, "max_sentence1_length": 873, + "unique_sentence1": 6512, "min_sentence2_length": 5, "average_sentence2_length": 145.8089393939394, "max_sentence2_length": 854, - "num_samples": 6600, - "number_of_characters": 1922162 + "unique_sentence2": 6585 }, "ind-mad": { + "num_samples": 6600, + "number_of_characters": 1973257, + "unique_pairs": 6598, "min_sentence1_length": 5, "average_sentence1_length": 145.35545454545453, "max_sentence1_length": 873, + "unique_sentence1": 6521, "min_sentence2_length": 5, "average_sentence2_length": 153.6228787878788, "max_sentence2_length": 827, - "num_samples": 6600, - "number_of_characters": 1973257 + "unique_sentence2": 6592 }, "ind-mak": { + "num_samples": 6600, + "number_of_characters": 1953868, + "unique_pairs": 6594, "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, "max_sentence1_length": 873, + "unique_sentence1": 6512, "min_sentence2_length": 6, "average_sentence2_length": 150.6128787878788, "max_sentence2_length": 888, - "num_samples": 6600, - "number_of_characters": 1953868 + "unique_sentence2": 6586 }, "ind-min": { + "num_samples": 6600, + "number_of_characters": 1937033, + "unique_pairs": 6595, "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, "max_sentence1_length": 873, + "unique_sentence1": 6512, "min_sentence2_length": 6, "average_sentence2_length": 148.0621212121212, "max_sentence2_length": 837, - "num_samples": 6600, - "number_of_characters": 1937033 + "unique_sentence2": 6591 }, "ind-mui": { + "num_samples": 1000, + "number_of_characters": 301448, + "unique_pairs": 1000, "min_sentence1_length": 11, "average_sentence1_length": 150.454, "max_sentence1_length": 451, + "unique_sentence1": 997, "min_sentence2_length": 11, "average_sentence2_length": 150.994, "max_sentence2_length": 450, - "num_samples": 1000, - "number_of_characters": 301448 + "unique_sentence2": 1000 }, "ind-rej": { + "num_samples": 1000, + "number_of_characters": 291205, + "unique_pairs": 1000, "min_sentence1_length": 9, "average_sentence1_length": 151.622, "max_sentence1_length": 873, + "unique_sentence1": 998, "min_sentence2_length": 8, "average_sentence2_length": 139.583, "max_sentence2_length": 784, - "num_samples": 1000, - "number_of_characters": 291205 + "unique_sentence2": 1000 }, "ind-sun": { + "num_samples": 6600, + "number_of_characters": 1956344, + "unique_pairs": 6591, "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, "max_sentence1_length": 873, + "unique_sentence1": 6512, "min_sentence2_length": 5, "average_sentence2_length": 150.9880303030303, "max_sentence2_length": 881, - "num_samples": 6600, - "number_of_characters": 1956344 + "unique_sentence2": 6588 } } } diff --git a/mteb/descriptive_stats/BitextMining/PhincBitextMining.json b/mteb/descriptive_stats/BitextMining/PhincBitextMining.json index 73080b250..f4b237d87 100644 --- a/mteb/descriptive_stats/BitextMining/PhincBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/PhincBitextMining.json @@ -1,23 +1,29 @@ { "train": { + "num_samples": 13738, + "number_of_characters": 2069457, + "unique_pairs": 13737, "min_sentence1_length": 1, "average_sentence1_length": 74.02300189256079, "max_sentence1_length": 278, + "unique_sentence1": 13515, "min_sentence2_length": 3, "average_sentence2_length": 76.61442713640996, "max_sentence2_length": 274, - "num_samples": 13738, - "number_of_characters": 2069457, + "unique_sentence2": 13736, "hf_subset_descriptive_stats": { "eng-eng_hin": { + "num_samples": 13738, + "number_of_characters": 2069457, + "unique_pairs": 13737, "min_sentence1_length": 1, "average_sentence1_length": 74.02300189256079, "max_sentence1_length": 278, + "unique_sentence1": 13515, "min_sentence2_length": 3, "average_sentence2_length": 76.61442713640996, "max_sentence2_length": 274, - "num_samples": 13738, - "number_of_characters": 2069457 + "unique_sentence2": 13736 } } } diff --git a/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json b/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json new file mode 100644 index 000000000..655cc7d2e --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json @@ -0,0 +1,37 @@ +{ + "test": { + "min_sentence1_length": 13, + "average_sentence1_length": 78.59148351648352, + "max_sentence1_length": 203, + "min_sentence2_length": 13, + "average_sentence2_length": 78.59148351648352, + "max_sentence2_length": 203, + "num_samples": 3640, + "num_samples_sentence2": 3640, + "number_of_characters": 572146, + "hf_subset_descriptive_stats": { + "kat_Geor-eng_Latn": { + "min_sentence1_length": 30, + "average_sentence1_length": 76.06593406593407, + "max_sentence1_length": 189, + "min_sentence2_length": 13, + "average_sentence2_length": 81.11703296703297, + "max_sentence2_length": 203, + "num_samples": 1820, + "num_samples_sentence2": 1820, + "number_of_characters": 286073 + }, + "eng_Latn-kat_Geor": { + "min_sentence1_length": 13, + "average_sentence1_length": 81.11703296703297, + "max_sentence1_length": 203, + "min_sentence2_length": 30, + "average_sentence2_length": 76.06593406593407, + "max_sentence2_length": 189, + "num_samples": 1820, + "num_samples_sentence2": 1820, + "number_of_characters": 286073 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json b/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json new file mode 100644 index 000000000..77bab2b30 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json @@ -0,0 +1,13 @@ +{ + "test": { + "min_sentence1_length": 11, + "average_sentence1_length": 139.22802734375, + "max_sentence1_length": 1291, + "min_sentence2_length": 11, + "average_sentence2_length": 141.97802734375, + "max_sentence2_length": 1217, + "num_samples": 2048, + "num_samples_sentence2": 2048, + "number_of_characters": 575910 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LanguageClassification.json b/mteb/descriptive_stats/Classification/LanguageClassification.json index 3c446bd33..0142cc3e1 100644 --- a/mteb/descriptive_stats/Classification/LanguageClassification.json +++ b/mteb/descriptive_stats/Classification/LanguageClassification.json @@ -5,6 +5,7 @@ "min_text_length": 14, "average_text_length": 109.546875, "max_text_length": 1270, + "unique_text": 2025, "unique_labels": 20, "labels": { "17": { diff --git a/mteb/descriptive_stats/Retrieval/NFCorpus.json b/mteb/descriptive_stats/Retrieval/NFCorpus.json new file mode 100644 index 000000000..94df0b0cf --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NFCorpus.json @@ -0,0 +1,11 @@ +{ + "test": { + "number_of_characters": 1612.5486310130989, + "num_samples": 3956, + "num_queries": 323, + "num_documents": 3633, + "average_document_length": 0.43787060972495073, + "average_query_length": 0.06738299034784193, + "average_relevant_docs_per_query": 38.18575851393189 + } +} \ No newline at end of file From 0ccfd57cde4e3c7efda02a5b37aac11f529fe2c1 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Mon, 11 Nov 2024 19:15:50 +0300 Subject: [PATCH 3/3] update statistics --- mteb/abstasks/AbsTask.py | 12 +- mteb/abstasks/AbsTaskBitextMining.py | 2 - mteb/abstasks/AbsTaskClassification.py | 15 + mteb/abstasks/AbsTaskClustering.py | 2 - mteb/abstasks/AbsTaskInstructionRetrieval.py | 34 +- .../AbsTaskMultilabelClassification.py | 15 +- mteb/abstasks/AbsTaskPairClassification.py | 3 - mteb/abstasks/AbsTaskReranking.py | 3 - mteb/abstasks/AbsTaskRetrieval.py | 11 +- mteb/abstasks/AbsTaskSTS.py | 3 - mteb/abstasks/AbsTaskSummarization.py | 4 - .../BitextMining/BUCC.v2.json | 40 +- .../BitextMining/BibleNLPBitextMining.json | 18231 ---------------- .../BitextMining/IN22GenBitextMining.json | 4056 ++-- .../BitextMining/IWSLT2017BitextMining.json | 200 +- .../IndicGenBenchFloresBitextMining.json | 944 +- .../BitextMining/NTREXBitextMining.json | 15336 ++++++++----- .../BitextMining/NollySentiBitextMining.json | 40 +- .../NorwegianCourtsBitextMining.json | 8 +- .../TbilisiCityHallBitextMining.json | 24 +- .../BitextMining/VieMedEVBitextMining.json | 8 +- .../LanguageClassification.json | 73 + .../SlovakHateSpeechClassification.json | 20 + .../Clustering/BiorxivClusteringS2S.json | 1 + .../Clustering/MedrxivClusteringP2P.v2.json | 168 + .../Clustering/MedrxivClusteringS2S.v2.json | 168 + .../Clustering/RedditClusteringP2P.v2.json | 1335 ++ .../TwentyNewsgroupsClustering.v2.json | 75 + .../Clustering/WikiClusteringP2P.json | 15 + .../Core17InstructionRetrieval.json | 4 + .../CEDRClassification.json | 39 +- .../MultiEURLEXMultilabelClassification.json | 1828 -- .../PawsXPairClassification.json | 32 + .../PairClassification/TwitterURLCorpus.json | 2 + .../PairClassification/XNLI.json | 60 + .../Reranking/AskUbuntuDupQuestions.json | 7 +- .../Reranking/ESCIReranking.json | 26 +- .../WikipediaRerankingMultilingual.json | 117 +- .../Retrieval/AppsRetrieval.json | 7 +- .../Retrieval/BelebeleRetrieval.json | 2639 ++- .../Retrieval/COIRCodeSearchNetRetrieval.json | 49 +- .../Retrieval/CodeEditSearchRetrieval.json | 98 +- .../Retrieval/CodeFeedbackMT.json | 7 +- .../Retrieval/CodeFeedbackST.json | 7 +- .../Retrieval/CodeSearchNetCCRetrieval.json | 49 +- .../Retrieval/CodeSearchNetRetrieval.json | 49 +- .../Retrieval/CodeTransOceanContest.json | 7 +- .../Retrieval/CodeTransOceanDL.json | 7 +- mteb/descriptive_stats/Retrieval/CosQA.json | 7 +- .../Retrieval/JaqketRetrieval.json | 7 +- .../Retrieval/StackOverflowQA.json | 7 +- .../Retrieval/SyntheticText2SQL.json | 7 +- .../Retrieval/Touche2020.json | 9 +- .../Retrieval/Touche2020Retrieval.v3.json | 9 +- ...lowIRCrossLingualInstructionRetrieval.json | 16 + .../mFollowIRInstructionRetrieval.json | 16 + mteb/descriptive_stats/STS/STS12.json | 2 + mteb/descriptive_stats/STS/STS17.json | 24 + .../Summarization/SummEval.json | 3 + tests/test_benchmark/mock_tasks.py | 652 +- tests/test_tasks/test_metadata.py | 4 +- 61 files changed, 17821 insertions(+), 28822 deletions(-) delete mode 100644 mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json create mode 100644 mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json delete mode 100644 mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index 0bdbdeaf8..8b9edfd52 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -200,7 +200,11 @@ def calculate_metadata_metrics( descriptive_stats = {} hf_subset_stat = "hf_subset_descriptive_stats" - pbar_split = tqdm.tqdm(self.metadata.eval_splits, desc="Processing Splits...") + eval_splits = self.metadata.eval_splits + if self.metadata.type in ["Classification", "MultilabelClassification"]: + eval_splits += ["train"] + + pbar_split = tqdm.tqdm(eval_splits, desc="Processing Splits...") for split in pbar_split: pbar_split.set_postfix_str(f"Split: {split}") logger.info(f"Processing metadata for split {split}") @@ -215,12 +219,8 @@ def calculate_metadata_metrics( if isinstance(self.metadata.eval_langs, dict) else self.metadata.eval_langs ) - if self.metadata.type == "Classification": - eval_langs += ["train"] - pbar_subsets = tqdm.tqdm( - self.metadata.eval_langs, desc="Processing Languages..." - ) + pbar_subsets = tqdm.tqdm(eval_langs, desc="Processing Languages...") for hf_subset in pbar_subsets: pbar_subsets.set_postfix_str(f"Language: {hf_subset}") logger.info(f"Processing metadata for language {hf_subset}") diff --git a/mteb/abstasks/AbsTaskBitextMining.py b/mteb/abstasks/AbsTaskBitextMining.py index eff2c663d..59d64039f 100644 --- a/mteb/abstasks/AbsTaskBitextMining.py +++ b/mteb/abstasks/AbsTaskBitextMining.py @@ -182,12 +182,10 @@ def _calculate_metrics_from_split( num_samples=len(sentence1), number_of_characters=total_s1_len + total_s2_len, unique_pairs=unique_pairs, - min_sentence1_length=min(s1_len), average_sentence1_length=sum(s1_len) / len(sentence1), max_sentence1_length=max(s1_len), unique_sentence1=unique_sentence1, - min_sentence2_length=min(s2_len), average_sentence2_length=total_s2_len / len(sentence2), max_sentence2_length=max(s2_len), diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index 6dd6903be..673a09b12 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -26,6 +26,7 @@ class ClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + num_texts_in_train: Number of texts in the train split min_text_length: Minimum length of text average_text_length: Average length of text @@ -38,10 +39,13 @@ class ClassificationDescriptiveStatistics(DescriptiveStatistics): num_samples: int number_of_characters: int + num_texts_in_train: int | None + min_text_length: int average_text_length: float max_text_length: int unique_text: int + unique_labels: int labels: dict[str, dict[str, int]] @@ -214,25 +218,36 @@ def _undersample_data(self, X, y, samples_per_label: int, idxs=None): def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> ClassificationDescriptiveStatistics: + train_text = [] if hf_subset: text = self.dataset[hf_subset][split]["text"] label = self.dataset[hf_subset][split]["label"] + if split != "train": + train_text = self.dataset[hf_subset]["train"]["text"] elif compute_overall: text = [] label = [] for hf_subset in self.metadata.eval_langs: text.extend(self.dataset[hf_subset][split]["text"]) label.extend(self.dataset[hf_subset][split]["label"]) + if split != "train": + train_text.extend(self.dataset[hf_subset]["train"]["text"]) else: text = self.dataset[split]["text"] label = self.dataset[split]["label"] + if split != "train": + train_text = self.dataset["train"]["text"] text_len = [len(t) for t in text] total_text_len = sum(text_len) label_count = Counter(label) + num_texts_in_train = ( + len(set(text) & set(train_text)) if split != "train" else None + ) return ClassificationDescriptiveStatistics( num_samples=len(text), number_of_characters=total_text_len, + num_texts_in_train=num_texts_in_train, min_text_length=min(text_len), average_text_length=total_text_len / len(text), max_text_length=max(text_len), diff --git a/mteb/abstasks/AbsTaskClustering.py b/mteb/abstasks/AbsTaskClustering.py index 6ce3aa57c..3b5d0f492 100644 --- a/mteb/abstasks/AbsTaskClustering.py +++ b/mteb/abstasks/AbsTaskClustering.py @@ -126,12 +126,10 @@ def _calculate_metrics_from_split( return ClusteringDescriptiveStatistics( num_samples=len(sentences), number_of_characters=total_text_len, - min_text_length=min(text_len), average_text_length=total_text_len / len(sentences), max_text_length=max(text_len), unique_texts=len(set(all_sentences)), - min_labels_per_text=min(label_counter.values()), average_labels_per_text=len(total_labels) / len(sentences), max_labels_per_text=max(label_counter.values()), diff --git a/mteb/abstasks/AbsTaskInstructionRetrieval.py b/mteb/abstasks/AbsTaskInstructionRetrieval.py index bc1f129d5..219426fe6 100644 --- a/mteb/abstasks/AbsTaskInstructionRetrieval.py +++ b/mteb/abstasks/AbsTaskInstructionRetrieval.py @@ -72,7 +72,6 @@ def __init__( self.keep_in_memory = keep_in_memory self.trust_remote_code = trust_remote_code - def load( self, split="test" ) -> tuple[ @@ -712,30 +711,31 @@ def _calculate_metrics_from_split( changed_instructions = self.changed_instructions[split] top_ranked = self.top_ranked[split] - corpus_combined = [doc.get("title", "") + doc["text"] for doc in corpus.values()] + corpus_combined = [ + doc.get("title", "") + doc["text"] for doc in corpus.values() + ] corpus_len = [len(doc) for doc in corpus_combined] total_corpus_len = sum(corpus_len) queries_len = [len(query) for query in queries.values()] total_queries_len = sum(queries_len) - instructions_len = [len(instruction) for instruction in og_instructions.values()] + instructions_len = [ + len(instruction) for instruction in og_instructions.values() + ] total_instructions_len = sum(instructions_len) - changed_instructions_len = [len(instruction) for instruction in changed_instructions.values()] - total_changed_instructions_len = sum( - changed_instructions_len - ) + changed_instructions_len = [ + len(instruction) for instruction in changed_instructions.values() + ] + total_changed_instructions_len = sum(changed_instructions_len) qrels_non_zero = [ - sum(1 for doc_id in docs if docs[doc_id] != 0) for docs in relevant_docs.values() + sum(1 for doc_id in docs if docs[doc_id] != 0) + for docs in relevant_docs.values() ] - num_qrels_non_zero = sum( - qrels_non_zero - ) + num_qrels_non_zero = sum(qrels_non_zero) qrels_per_doc = num_qrels_non_zero / len(relevant_docs) if len(queries) else 0 ranked_per_query = [len(docs) for docs in top_ranked.values()] top_ranked_per_query = ( - sum(ranked_per_query) / len(queries) - if len(queries) - else 0 + sum(ranked_per_query) / len(queries) if len(queries) else 0 ) return InstructionRetrievalDescriptiveStatistics( num_samples=len(queries) + len(corpus), @@ -745,39 +745,33 @@ def _calculate_metrics_from_split( + total_queries_len + total_instructions_len + total_changed_instructions_len, - min_document_length=min(corpus_len), average_document_length=( total_corpus_len / len(corpus) if len(corpus) else 0 ), max_document_length=max(corpus_len), unique_docs=len(set(corpus_combined)), - min_query_length=min(queries_len), average_query_length=( total_queries_len / len(queries) if len(queries) else 0 ), max_query_length=max(queries_len), unique_queries=len(set(queries.values())), - min_instruction_length=min(instructions_len), average_instruction_length=( total_instructions_len / len(queries) if len(queries) else 0 ), max_instruction_length=max(instructions_len), unique_instructions=len(set(og_instructions.values())), - min_changed_instruction_length=min(changed_instructions_len), average_changed_instruction_length=( total_changed_instructions_len / len(queries) if len(queries) else 0 ), max_changed_instruction_length=max(changed_instructions_len), unique_changed_instructions=len(set(changed_instructions.values())), - min_average_relevant_docs_per_query=min(qrels_non_zero), average_relevant_docs_per_query=qrels_per_doc, max_average_relevant_docs_per_query=max(qrels_non_zero), - min_average_top_ranked_per_query=min(ranked_per_query), average_top_ranked_per_query=top_ranked_per_query, max_average_top_ranked_per_query=max(ranked_per_query), diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index 2ae6ae6cf..7b98c6441 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -47,6 +47,7 @@ class MultilabelClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + number_texts_in_train: Number of texts in the train split min_text_length: Minimum length of text average_text_length: Average length of text @@ -62,6 +63,7 @@ class MultilabelClassificationDescriptiveStatistics(DescriptiveStatistics): num_samples: int number_of_characters: int + number_texts_in_train: int | None min_text_length: int average_text_length: float @@ -243,18 +245,25 @@ def _undersample_data_indices(self, y, samples_per_label, idxs=None): def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> MultilabelClassificationDescriptiveStatistics: + train_text = [] if hf_subset: text = self.dataset[hf_subset][split]["text"] label = self.dataset[hf_subset][split]["label"] + if split != "train": + train_text = self.dataset[hf_subset]["train"]["text"] elif compute_overall: text = [] label = [] for hf_subset in self.metadata.eval_langs: text.extend(self.dataset[hf_subset][split]["text"]) label.extend(self.dataset[hf_subset][split]["label"]) + if split != "train": + train_text.extend(self.dataset[hf_subset]["train"]["text"]) else: text = self.dataset[split]["text"] label = self.dataset[split]["label"] + if split != "train": + train_text = self.dataset["train"]["text"] text_len = [len(t) for t in text] total_text_len = sum(text_len) @@ -264,15 +273,17 @@ def _calculate_metrics_from_split( for l in label: total_labels.extend(l if len(l) > 0 else [None]) label_count = Counter(total_labels) + num_texts_in_train = ( + len(set(text) & set(train_text)) if split != "train" else None + ) return MultilabelClassificationDescriptiveStatistics( num_samples=len(text), number_of_characters=total_text_len, - + number_texts_in_train=num_texts_in_train, min_text_length=min(text_len), average_text_length=total_text_len / len(text), max_text_length=max(text_len), unique_texts=len(set(text)), - min_labels_per_text=min(label_len), average_label_per_text=total_label_len / len(label), max_labels_per_text=max(label_len), diff --git a/mteb/abstasks/AbsTaskPairClassification.py b/mteb/abstasks/AbsTaskPairClassification.py index a6ff94fe1..82ba128c2 100644 --- a/mteb/abstasks/AbsTaskPairClassification.py +++ b/mteb/abstasks/AbsTaskPairClassification.py @@ -135,17 +135,14 @@ def _calculate_metrics_from_split( return PairClassificationDescriptiveStatistics( num_samples=len(sentence1), number_of_characters=total_sentence1_len + total_sentence2_len, - min_sentence1_length=min(sentence1_len), avg_sentence1_length=total_sentence1_len / len(sentence1), max_sentence1_length=max(sentence1_len), unique_sentence1=len(set(sentence1)), - min_sentence2_length=min(sentence2_len), avg_sentence2_length=total_sentence2_len / len(sentence2), max_sentence2_length=max(sentence2_len), unique_sentence2=len(set(sentence2)), - unique_labels=len(set(labels)), labels={ str(label): {"count": count} for label, count in label_count.items() diff --git a/mteb/abstasks/AbsTaskReranking.py b/mteb/abstasks/AbsTaskReranking.py index a22aa1951..ab00a53a3 100644 --- a/mteb/abstasks/AbsTaskReranking.py +++ b/mteb/abstasks/AbsTaskReranking.py @@ -135,17 +135,14 @@ def _calculate_metrics_from_split( + total_len_negative, num_positive=len(positive), num_negative=len(negative), - min_query_length=min(len_query), avg_query_length=total_len_query / len(query), max_query_length=max(len_query), unique_query=len(set(query)), - min_positive_length=min(len_positive), avg_positive_length=total_len_positive / len(positive), max_positive_length=max(len_positive), unique_positive=len(set(positive)), - min_negative_length=min(len_negative), avg_negative_length=total_len_negative / len(negative), max_negative_length=max(len_negative), diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index c0371da7f..95746e1a2 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -464,29 +464,22 @@ def _calculate_metrics_from_split( qrels_lengths = [ len(relevant_docs[qid]) for qid in relevant_docs if qid in queries ] - num_qrels = sum( - qrels_lengths - ) + num_qrels = sum(qrels_lengths) qrels_per_doc = num_qrels / len(relevant_docs) if num_queries else 0 - unique_qrels = len(set( - [doc for qid in relevant_docs for doc in relevant_docs[qid]] - )) + unique_qrels = len({doc for qid in relevant_docs for doc in relevant_docs[qid]}) return RetrievalDescriptiveStatistics( number_of_characters=sum(query_len) + sum(doc_len), num_samples=num_documents + num_queries, num_queries=num_queries, num_documents=num_documents, - min_document_length=min(doc_len), average_document_length=sum(doc_len) / num_documents, max_document_length=max(doc_len), unique_documents=len(set(corpus)), - min_query_length=min(query_len), average_query_length=sum(query_len) / num_queries, max_query_length=max(query_len), unique_queries=len(set(queries)), - min_relevant_docs_per_query=min(qrels_lengths), average_relevant_docs_per_query=qrels_per_doc, max_relevant_docs_per_query=max(qrels_lengths), diff --git a/mteb/abstasks/AbsTaskSTS.py b/mteb/abstasks/AbsTaskSTS.py index 1c2cf0aac..d12b88545 100644 --- a/mteb/abstasks/AbsTaskSTS.py +++ b/mteb/abstasks/AbsTaskSTS.py @@ -121,17 +121,14 @@ def _calculate_metrics_from_split( return STSDescriptiveStatistics( num_samples=len(sentence1), number_of_characters=total_sentence1_len + total_sentence2_len, - min_sentence1_length=min(sentence1_len), average_sentence1_len=total_sentence1_len / len(sentence1), max_sentence1_length=max(sentence1_len), unique_sentence1=len(set(sentence1)), - min_sentence2_length=min(sentence2_len), average_sentence2_len=total_sentence2_len / len(sentence2), max_sentence2_length=max(sentence2_len), unique_sentence2=len(set(sentence2)), - min_score=min(score), avg_score=avg_score, max_score=max(score), diff --git a/mteb/abstasks/AbsTaskSummarization.py b/mteb/abstasks/AbsTaskSummarization.py index 40a912637..07fd42057 100644 --- a/mteb/abstasks/AbsTaskSummarization.py +++ b/mteb/abstasks/AbsTaskSummarization.py @@ -162,22 +162,18 @@ def _calculate_metrics_from_split( number_of_characters=total_text_len + total_human_summaries_len + total_machine_summaries_len, - min_text_length=min(text_len), avg_text_length=total_text_len / len(text), max_text_length=max(text_len), unique_texts=len(set(text)), - min_human_summaries_length=min(human_summaries_len), avg_human_summaries_length=total_human_summaries_len / len(text), max_human_summaries_length=max(human_summaries_len), unique_human_summaries=len(set(all_human_summaries)), - min_machine_summaries_length=min(machine_summaries_len), avg_machine_summaries_length=total_machine_summaries_len / len(text), max_machine_summaries_length=max(machine_summaries_len), unique_machine_summaries=len(set(all_machine_summaries)), - min_relevance=min(relevance), avg_relevance=total_relevance / len(relevance), max_relevance=max(relevance), diff --git a/mteb/descriptive_stats/BitextMining/BUCC.v2.json b/mteb/descriptive_stats/BitextMining/BUCC.v2.json index 983ed7ca8..75ef75ced 100644 --- a/mteb/descriptive_stats/BitextMining/BUCC.v2.json +++ b/mteb/descriptive_stats/BitextMining/BUCC.v2.json @@ -1,58 +1,68 @@ { "test": { + "num_samples": 35000, + "number_of_characters": 6640032, + "unique_pairs": 34978, "min_sentence1_length": 16, "average_sentence1_length": 99.10931428571429, "max_sentence1_length": 204, + "unique_sentence1": 34978, "min_sentence2_length": 42, "average_sentence2_length": 90.60588571428572, "max_sentence2_length": 159, - "num_samples": 35000, - "num_samples_sentence2": 35000, - "number_of_characters": 6640032, + "unique_sentence2": 25306, "hf_subset_descriptive_stats": { "de-en": { + "num_samples": 9580, + "number_of_characters": 1919197, + "unique_pairs": 9573, "min_sentence1_length": 50, "average_sentence1_length": 109.07974947807934, "max_sentence1_length": 204, + "unique_sentence1": 9573, "min_sentence2_length": 46, "average_sentence2_length": 91.25396659707724, "max_sentence2_length": 155, - "num_samples": 9580, - "num_samples_sentence2": 9580, - "number_of_characters": 1919197 + "unique_sentence2": 9570 }, "fr-en": { + "num_samples": 9086, + "number_of_characters": 1677545, + "unique_pairs": 9081, "min_sentence1_length": 43, "average_sentence1_length": 99.31785163988553, "max_sentence1_length": 174, + "unique_sentence1": 9081, "min_sentence2_length": 42, "average_sentence2_length": 85.3117983711204, "max_sentence2_length": 159, - "num_samples": 9086, - "num_samples_sentence2": 9086, - "number_of_characters": 1677545 + "unique_sentence2": 9076 }, "ru-en": { + "num_samples": 14435, + "number_of_characters": 2808206, + "unique_pairs": 14425, "min_sentence1_length": 40, "average_sentence1_length": 101.6593003117423, "max_sentence1_length": 186, + "unique_sentence1": 14425, "min_sentence2_length": 45, "average_sentence2_length": 92.88216141323173, "max_sentence2_length": 159, - "num_samples": 14435, - "num_samples_sentence2": 14435, - "number_of_characters": 2808206 + "unique_sentence2": 14424 }, "zh-en": { + "num_samples": 1899, + "number_of_characters": 235084, + "unique_pairs": 1899, "min_sentence1_length": 16, "average_sentence1_length": 28.429699842022117, "max_sentence1_length": 40, + "unique_sentence1": 1899, "min_sentence2_length": 48, "average_sentence2_length": 95.3638757240653, "max_sentence2_length": 159, - "num_samples": 1899, - "num_samples_sentence2": 1899, - "number_of_characters": 235084 + "unique_sentence2": 1899 } } } diff --git a/mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json b/mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json deleted file mode 100644 index 50704e012..000000000 --- a/mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json +++ /dev/null @@ -1,18231 +0,0 @@ -{ - "train": { - "min_sentence1_length": 1, - "average_sentence1_length": 158.52821402221093, - "max_sentence1_length": 4949, - "min_sentence2_length": 1, - "average_sentence2_length": 158.52821402221093, - "max_sentence2_length": 4949, - "num_samples": 417452, - "num_samples_sentence2": 417452, - "number_of_characters": 132355840, - "hf_subset_descriptive_stats": { - "eng_Latn-aai_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 32, - "average_sentence2_length": 146.66796875, - "max_sentence2_length": 322, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66320 - }, - "aai_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 146.66796875, - "max_sentence1_length": 322, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66320 - }, - "eng_Latn-aak_Arab": { - "min_sentence1_length": 21, - "average_sentence1_length": 112.16015625, - "max_sentence1_length": 227, - "min_sentence2_length": 46, - "average_sentence2_length": 292.203125, - "max_sentence2_length": 809, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103517 - }, - "aak_Arab-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 292.203125, - "max_sentence1_length": 809, - "min_sentence2_length": 21, - "average_sentence2_length": 112.16015625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103517 - }, - "eng_Latn-aau_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.42578125, - "max_sentence1_length": 227, - "min_sentence2_length": 34, - "average_sentence2_length": 197.53515625, - "max_sentence2_length": 496, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78838 - }, - "aau_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 197.53515625, - "max_sentence1_length": 496, - "min_sentence2_length": 24, - "average_sentence2_length": 110.42578125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78838 - }, - "eng_Latn-aaz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.76953125, - "max_sentence1_length": 251, - "min_sentence2_length": 33, - "average_sentence2_length": 281.2265625, - "max_sentence2_length": 1407, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101375 - }, - "aaz_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 281.2265625, - "max_sentence1_length": 1407, - "min_sentence2_length": 24, - "average_sentence2_length": 114.76953125, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101375 - }, - "eng_Latn-abt_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 146.171875, - "max_sentence1_length": 341, - "min_sentence2_length": 29, - "average_sentence2_length": 273.06640625, - "max_sentence2_length": 758, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 107325 - }, - "abt_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 273.06640625, - "max_sentence1_length": 758, - "min_sentence2_length": 1, - "average_sentence2_length": 146.171875, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 107325 - }, - "eng_Latn-abx_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.1796875, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 185.4375, - "max_sentence2_length": 606, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76702 - }, - "abx_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 185.4375, - "max_sentence1_length": 606, - "min_sentence2_length": 24, - "average_sentence2_length": 114.1796875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76702 - }, - "eng_Latn-aby_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.16015625, - "max_sentence1_length": 228, - "min_sentence2_length": 42, - "average_sentence2_length": 282.90234375, - "max_sentence2_length": 931, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101648 - }, - "aby_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 282.90234375, - "max_sentence1_length": 931, - "min_sentence2_length": 24, - "average_sentence2_length": 114.16015625, - "max_sentence2_length": 228, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101648 - }, - "eng_Latn-acf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.02734375, - "max_sentence1_length": 827, - "min_sentence2_length": 28, - "average_sentence2_length": 146.59765625, - "max_sentence2_length": 441, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66720 - }, - "acf_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 146.59765625, - "max_sentence1_length": 441, - "min_sentence2_length": 24, - "average_sentence2_length": 114.02734375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66720 - }, - "eng_Latn-acr_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 145.234375, - "max_sentence1_length": 341, - "min_sentence2_length": 53, - "average_sentence2_length": 199.171875, - "max_sentence2_length": 474, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88168 - }, - "acr_Latn-eng_Latn": { - "min_sentence1_length": 53, - "average_sentence1_length": 199.171875, - "max_sentence1_length": 474, - "min_sentence2_length": 35, - "average_sentence2_length": 145.234375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88168 - }, - "eng_Latn-acu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.046875, - "max_sentence1_length": 238, - "min_sentence2_length": 37, - "average_sentence2_length": 249.4921875, - "max_sentence2_length": 641, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92554 - }, - "acu_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 249.4921875, - "max_sentence1_length": 641, - "min_sentence2_length": 24, - "average_sentence2_length": 112.046875, - "max_sentence2_length": 238, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92554 - }, - "eng_Latn-adz_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 108.69140625, - "max_sentence1_length": 248, - "min_sentence2_length": 45, - "average_sentence2_length": 146.26171875, - "max_sentence2_length": 456, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65268 - }, - "adz_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 146.26171875, - "max_sentence1_length": 456, - "min_sentence2_length": 23, - "average_sentence2_length": 108.69140625, - "max_sentence2_length": 248, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65268 - }, - "eng_Latn-aer_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.55859375, - "max_sentence1_length": 227, - "min_sentence2_length": 48, - "average_sentence2_length": 463.94921875, - "max_sentence2_length": 1597, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 147074 - }, - "aer_Latn-eng_Latn": { - "min_sentence1_length": 48, - "average_sentence1_length": 463.94921875, - "max_sentence1_length": 1597, - "min_sentence2_length": 24, - "average_sentence2_length": 110.55859375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 147074 - }, - "eng_Latn-aey_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.3203125, - "max_sentence1_length": 227, - "min_sentence2_length": 39, - "average_sentence2_length": 208.2109375, - "max_sentence2_length": 769, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81800 - }, - "aey_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 208.2109375, - "max_sentence1_length": 769, - "min_sentence2_length": 31, - "average_sentence2_length": 111.3203125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81800 - }, - "eng_Latn-agd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.18359375, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 180.50390625, - "max_sentence2_length": 442, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75440 - }, - "agd_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 180.50390625, - "max_sentence1_length": 442, - "min_sentence2_length": 24, - "average_sentence2_length": 114.18359375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75440 - }, - "eng_Latn-agg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.45703125, - "max_sentence1_length": 227, - "min_sentence2_length": 44, - "average_sentence2_length": 247.20703125, - "max_sentence2_length": 852, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92330 - }, - "agg_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 247.20703125, - "max_sentence1_length": 852, - "min_sentence2_length": 24, - "average_sentence2_length": 113.45703125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92330 - }, - "eng_Latn-agm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.86328125, - "max_sentence1_length": 827, - "min_sentence2_length": 60, - "average_sentence2_length": 359.12890625, - "max_sentence2_length": 1291, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 121086 - }, - "agm_Latn-eng_Latn": { - "min_sentence1_length": 60, - "average_sentence1_length": 359.12890625, - "max_sentence1_length": 1291, - "min_sentence2_length": 24, - "average_sentence2_length": 113.86328125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 121086 - }, - "eng_Latn-agn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.05078125, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 174.1015625, - "max_sentence2_length": 455, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72743 - }, - "agn_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 174.1015625, - "max_sentence1_length": 455, - "min_sentence2_length": 24, - "average_sentence2_length": 110.05078125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72743 - }, - "eng_Latn-agr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.0078125, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 164.63671875, - "max_sentence2_length": 402, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71589 - }, - "agr_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 164.63671875, - "max_sentence1_length": 402, - "min_sentence2_length": 24, - "average_sentence2_length": 115.0078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71589 - }, - "eng_Latn-agt_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 112.78125, - "max_sentence1_length": 271, - "min_sentence2_length": 37, - "average_sentence2_length": 255.2734375, - "max_sentence2_length": 1289, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94222 - }, - "agt_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 255.2734375, - "max_sentence1_length": 1289, - "min_sentence2_length": 31, - "average_sentence2_length": 112.78125, - "max_sentence2_length": 271, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94222 - }, - "eng_Latn-agu_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.25, - "max_sentence1_length": 827, - "min_sentence2_length": 42, - "average_sentence2_length": 186.59375, - "max_sentence2_length": 486, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77016 - }, - "agu_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 186.59375, - "max_sentence1_length": 486, - "min_sentence2_length": 31, - "average_sentence2_length": 114.25, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77016 - }, - "eng_Latn-aia_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.66015625, - "max_sentence1_length": 827, - "min_sentence2_length": 34, - "average_sentence2_length": 211.5546875, - "max_sentence2_length": 753, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83511 - }, - "aia_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 211.5546875, - "max_sentence1_length": 753, - "min_sentence2_length": 24, - "average_sentence2_length": 114.66015625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83511 - }, - "eng_Latn-aii_Syrc": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 26, - "average_sentence2_length": 118.921875, - "max_sentence2_length": 264, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59217 - }, - "aii_Syrc-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 118.921875, - "max_sentence1_length": 264, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59217 - }, - "eng_Latn-aka_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 21, - "average_sentence2_length": 108.4921875, - "max_sentence2_length": 240, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56547 - }, - "aka_Latn-eng_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 108.4921875, - "max_sentence1_length": 240, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56547 - }, - "eng_Latn-ake_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.05078125, - "max_sentence1_length": 827, - "min_sentence2_length": 33, - "average_sentence2_length": 144.27734375, - "max_sentence2_length": 348, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65876 - }, - "ake_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 144.27734375, - "max_sentence1_length": 348, - "min_sentence2_length": 24, - "average_sentence2_length": 113.05078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65876 - }, - "eng_Latn-alp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.28515625, - "max_sentence1_length": 227, - "min_sentence2_length": 29, - "average_sentence2_length": 193.80078125, - "max_sentence2_length": 605, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78102 - }, - "alp_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 193.80078125, - "max_sentence1_length": 605, - "min_sentence2_length": 24, - "average_sentence2_length": 111.28515625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78102 - }, - "eng_Latn-alq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.6328125, - "max_sentence1_length": 827, - "min_sentence2_length": 37, - "average_sentence2_length": 210.26953125, - "max_sentence2_length": 629, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83431 - }, - "alq_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 210.26953125, - "max_sentence1_length": 629, - "min_sentence2_length": 24, - "average_sentence2_length": 115.6328125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83431 - }, - "eng_Latn-als_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 114.828125, - "max_sentence2_length": 265, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58436 - }, - "als_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 114.828125, - "max_sentence1_length": 265, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58436 - }, - "eng_Latn-aly_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 116.77734375, - "max_sentence1_length": 827, - "min_sentence2_length": 57, - "average_sentence2_length": 306.80859375, - "max_sentence2_length": 2026, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 108438 - }, - "aly_Latn-eng_Latn": { - "min_sentence1_length": 57, - "average_sentence1_length": 306.80859375, - "max_sentence1_length": 2026, - "min_sentence2_length": 24, - "average_sentence2_length": 116.77734375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 108438 - }, - "eng_Latn-ame_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.58984375, - "max_sentence1_length": 227, - "min_sentence2_length": 45, - "average_sentence2_length": 358.25, - "max_sentence2_length": 1180, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 119767 - }, - "ame_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 358.25, - "max_sentence1_length": 1180, - "min_sentence2_length": 24, - "average_sentence2_length": 109.58984375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 119767 - }, - "eng_Latn-amf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.21484375, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 147.92578125, - "max_sentence2_length": 402, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66084 - }, - "amf_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 147.92578125, - "max_sentence1_length": 402, - "min_sentence2_length": 24, - "average_sentence2_length": 110.21484375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66084 - }, - "eng_Latn-amk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.08984375, - "max_sentence1_length": 827, - "min_sentence2_length": 48, - "average_sentence2_length": 207.99609375, - "max_sentence2_length": 556, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82454 - }, - "amk_Latn-eng_Latn": { - "min_sentence1_length": 48, - "average_sentence1_length": 207.99609375, - "max_sentence1_length": 556, - "min_sentence2_length": 24, - "average_sentence2_length": 114.08984375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82454 - }, - "eng_Latn-amm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.375, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 215.09375, - "max_sentence2_length": 714, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83832 - }, - "amm_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 215.09375, - "max_sentence1_length": 714, - "min_sentence2_length": 24, - "average_sentence2_length": 112.375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83832 - }, - "eng_Latn-amn_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 29, - "average_sentence2_length": 180.0234375, - "max_sentence2_length": 484, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83291 - }, - "amn_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 180.0234375, - "max_sentence1_length": 484, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83291 - }, - "eng_Latn-amo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.08203125, - "max_sentence1_length": 827, - "min_sentence2_length": 7, - "average_sentence2_length": 111.30078125, - "max_sentence2_length": 294, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57698 - }, - "amo_Latn-eng_Latn": { - "min_sentence1_length": 7, - "average_sentence1_length": 111.30078125, - "max_sentence1_length": 294, - "min_sentence2_length": 24, - "average_sentence2_length": 114.08203125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57698 - }, - "eng_Latn-amp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.0625, - "max_sentence1_length": 227, - "min_sentence2_length": 42, - "average_sentence2_length": 264.0, - "max_sentence2_length": 1162, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96272 - }, - "amp_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 264.0, - "max_sentence1_length": 1162, - "min_sentence2_length": 24, - "average_sentence2_length": 112.0625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96272 - }, - "eng_Latn-amr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.26953125, - "max_sentence1_length": 227, - "min_sentence2_length": 45, - "average_sentence2_length": 272.71875, - "max_sentence2_length": 805, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98557 - }, - "amr_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 272.71875, - "max_sentence1_length": 805, - "min_sentence2_length": 24, - "average_sentence2_length": 112.26953125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98557 - }, - "eng_Latn-amu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.24609375, - "max_sentence1_length": 227, - "min_sentence2_length": 40, - "average_sentence2_length": 189.109375, - "max_sentence2_length": 505, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76635 - }, - "amu_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 189.109375, - "max_sentence1_length": 505, - "min_sentence2_length": 24, - "average_sentence2_length": 110.24609375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76635 - }, - "eng_Latn-amx_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.9609375, - "max_sentence1_length": 243, - "min_sentence2_length": 37, - "average_sentence2_length": 265.48046875, - "max_sentence2_length": 925, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96369 - }, - "amx_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 265.48046875, - "max_sentence1_length": 925, - "min_sentence2_length": 24, - "average_sentence2_length": 110.9609375, - "max_sentence2_length": 243, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96369 - }, - "eng_Latn-anh_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 110.45045045045045, - "max_sentence1_length": 257, - "min_sentence2_length": 58, - "average_sentence2_length": 218.67567567567568, - "max_sentence2_length": 1063, - "num_samples": 111, - "num_samples_sentence2": 111, - "number_of_characters": 36533 - }, - "anh_Latn-eng_Latn": { - "min_sentence1_length": 58, - "average_sentence1_length": 218.67567567567568, - "max_sentence1_length": 1063, - "min_sentence2_length": 50, - "average_sentence2_length": 110.45045045045045, - "max_sentence2_length": 257, - "num_samples": 111, - "num_samples_sentence2": 111, - "number_of_characters": 36533 - }, - "eng_Latn-anv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.015625, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 155.27734375, - "max_sentence2_length": 441, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68939 - }, - "anv_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 155.27734375, - "max_sentence1_length": 441, - "min_sentence2_length": 24, - "average_sentence2_length": 114.015625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68939 - }, - "eng_Latn-aoi_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 103.12890625, - "max_sentence1_length": 245, - "min_sentence2_length": 84, - "average_sentence2_length": 442.04296875, - "max_sentence2_length": 1797, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 139564 - }, - "aoi_Latn-eng_Latn": { - "min_sentence1_length": 84, - "average_sentence1_length": 442.04296875, - "max_sentence1_length": 1797, - "min_sentence2_length": 23, - "average_sentence2_length": 103.12890625, - "max_sentence2_length": 245, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 139564 - }, - "eng_Latn-aoj_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 44, - "average_sentence2_length": 250.44921875, - "max_sentence2_length": 607, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101320 - }, - "aoj_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 250.44921875, - "max_sentence1_length": 607, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101320 - }, - "eng_Latn-aom_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.6953125, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 229.5234375, - "max_sentence2_length": 756, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88120 - }, - "aom_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 229.5234375, - "max_sentence1_length": 756, - "min_sentence2_length": 24, - "average_sentence2_length": 114.6953125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88120 - }, - "eng_Latn-aon_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 110.48046875, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 311.30859375, - "max_sentence2_length": 1125, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 107978 - }, - "aon_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 311.30859375, - "max_sentence1_length": 1125, - "min_sentence2_length": 21, - "average_sentence2_length": 110.48046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 107978 - }, - "eng_Latn-apb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.05078125, - "max_sentence1_length": 246, - "min_sentence2_length": 45, - "average_sentence2_length": 249.72265625, - "max_sentence2_length": 1260, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92614 - }, - "apb_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 249.72265625, - "max_sentence1_length": 1260, - "min_sentence2_length": 24, - "average_sentence2_length": 112.05078125, - "max_sentence2_length": 246, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92614 - }, - "eng_Latn-ape_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 146.4375, - "max_sentence1_length": 341, - "min_sentence2_length": 69, - "average_sentence2_length": 310.66796875, - "max_sentence2_length": 772, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 117019 - }, - "ape_Latn-eng_Latn": { - "min_sentence1_length": 69, - "average_sentence1_length": 310.66796875, - "max_sentence1_length": 772, - "min_sentence2_length": 35, - "average_sentence2_length": 146.4375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 117019 - }, - "eng_Latn-apn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.38671875, - "max_sentence1_length": 227, - "min_sentence2_length": 61, - "average_sentence2_length": 448.11328125, - "max_sentence2_length": 1608, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 144000 - }, - "apn_Latn-eng_Latn": { - "min_sentence1_length": 61, - "average_sentence1_length": 448.11328125, - "max_sentence1_length": 1608, - "min_sentence2_length": 24, - "average_sentence2_length": 114.38671875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 144000 - }, - "eng_Latn-apr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.44140625, - "max_sentence1_length": 227, - "min_sentence2_length": 54, - "average_sentence2_length": 269.625, - "max_sentence2_length": 956, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97297 - }, - "apr_Latn-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 269.625, - "max_sentence1_length": 956, - "min_sentence2_length": 24, - "average_sentence2_length": 110.44140625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97297 - }, - "eng_Latn-apu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 43, - "average_sentence2_length": 198.68359375, - "max_sentence2_length": 719, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80398 - }, - "apu_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 198.68359375, - "max_sentence1_length": 719, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80398 - }, - "eng_Latn-apw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 150.6875, - "max_sentence2_length": 321, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67616 - }, - "apw_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 150.6875, - "max_sentence1_length": 321, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67616 - }, - "eng_Latn-apz_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.77734375, - "max_sentence1_length": 227, - "min_sentence2_length": 47, - "average_sentence2_length": 314.078125, - "max_sentence2_length": 1291, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 109019 - }, - "apz_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 314.078125, - "max_sentence1_length": 1291, - "min_sentence2_length": 31, - "average_sentence2_length": 111.77734375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 109019 - }, - "eng_Latn-arb_Arab": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 54, - "average_sentence2_length": 162.15625, - "max_sentence2_length": 381, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79287 - }, - "arb_Arab-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 162.15625, - "max_sentence1_length": 381, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79287 - }, - "eng_Latn-are_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.390625, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 182.50390625, - "max_sentence2_length": 559, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76005 - }, - "are_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 182.50390625, - "max_sentence1_length": 559, - "min_sentence2_length": 24, - "average_sentence2_length": 114.390625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76005 - }, - "eng_Latn-arl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.2421875, - "max_sentence1_length": 227, - "min_sentence2_length": 55, - "average_sentence2_length": 294.5234375, - "max_sentence2_length": 1347, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103620 - }, - "arl_Latn-eng_Latn": { - "min_sentence1_length": 55, - "average_sentence1_length": 294.5234375, - "max_sentence1_length": 1347, - "min_sentence2_length": 24, - "average_sentence2_length": 110.2421875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103620 - }, - "eng_Latn-arn_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 113.11328125, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 170.55859375, - "max_sentence2_length": 485, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72620 - }, - "arn_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 170.55859375, - "max_sentence1_length": 485, - "min_sentence2_length": 31, - "average_sentence2_length": 113.11328125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72620 - }, - "eng_Latn-arp_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 107.47311827956989, - "max_sentence1_length": 245, - "min_sentence2_length": 34, - "average_sentence2_length": 119.58064516129032, - "max_sentence2_length": 272, - "num_samples": 93, - "num_samples_sentence2": 93, - "number_of_characters": 21116 - }, - "arp_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 119.58064516129032, - "max_sentence1_length": 272, - "min_sentence2_length": 37, - "average_sentence2_length": 107.47311827956989, - "max_sentence2_length": 245, - "num_samples": 93, - "num_samples_sentence2": 93, - "number_of_characters": 21116 - }, - "eng_Latn-asm_Beng": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 22, - "average_sentence2_length": 122.984375, - "max_sentence2_length": 307, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60257 - }, - "asm_Beng-eng_Latn": { - "min_sentence1_length": 22, - "average_sentence1_length": 122.984375, - "max_sentence1_length": 307, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60257 - }, - "eng_Latn-aso_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.3828125, - "max_sentence1_length": 227, - "min_sentence2_length": 41, - "average_sentence2_length": 309.41015625, - "max_sentence2_length": 1257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106955 - }, - "aso_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 309.41015625, - "max_sentence1_length": 1257, - "min_sentence2_length": 24, - "average_sentence2_length": 108.3828125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106955 - }, - "eng_Latn-ata_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.84375, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 198.95703125, - "max_sentence2_length": 571, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79821 - }, - "ata_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 198.95703125, - "max_sentence1_length": 571, - "min_sentence2_length": 24, - "average_sentence2_length": 112.84375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79821 - }, - "eng_Latn-atb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.2265625, - "max_sentence1_length": 227, - "min_sentence2_length": 30, - "average_sentence2_length": 170.921875, - "max_sentence2_length": 439, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71974 - }, - "atb_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 170.921875, - "max_sentence1_length": 439, - "min_sentence2_length": 24, - "average_sentence2_length": 110.2265625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71974 - }, - "eng_Latn-atd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.36328125, - "max_sentence1_length": 227, - "min_sentence2_length": 44, - "average_sentence2_length": 235.46875, - "max_sentence2_length": 815, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88789 - }, - "atd_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 235.46875, - "max_sentence1_length": 815, - "min_sentence2_length": 24, - "average_sentence2_length": 111.36328125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88789 - }, - "eng_Latn-atg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.75390625, - "max_sentence1_length": 227, - "min_sentence2_length": 24, - "average_sentence2_length": 135.8359375, - "max_sentence2_length": 372, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62871 - }, - "atg_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 135.8359375, - "max_sentence1_length": 372, - "min_sentence2_length": 24, - "average_sentence2_length": 109.75390625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62871 - }, - "eng_Latn-att_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 38, - "average_sentence2_length": 217.02734375, - "max_sentence2_length": 622, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84332 - }, - "att_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 217.02734375, - "max_sentence1_length": 622, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84332 - }, - "eng_Latn-auc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.109375, - "max_sentence1_length": 827, - "min_sentence2_length": 58, - "average_sentence2_length": 262.625, - "max_sentence2_length": 912, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96444 - }, - "auc_Latn-eng_Latn": { - "min_sentence1_length": 58, - "average_sentence1_length": 262.625, - "max_sentence1_length": 912, - "min_sentence2_length": 24, - "average_sentence2_length": 114.109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96444 - }, - "eng_Latn-aui_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 115.68359375, - "max_sentence1_length": 269, - "min_sentence2_length": 56, - "average_sentence2_length": 176.31640625, - "max_sentence2_length": 760, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74752 - }, - "aui_Latn-eng_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 176.31640625, - "max_sentence1_length": 760, - "min_sentence2_length": 38, - "average_sentence2_length": 115.68359375, - "max_sentence2_length": 269, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74752 - }, - "eng_Latn-auy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.5234375, - "max_sentence1_length": 227, - "min_sentence2_length": 42, - "average_sentence2_length": 205.5, - "max_sentence2_length": 632, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80390 - }, - "auy_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 205.5, - "max_sentence1_length": 632, - "min_sentence2_length": 24, - "average_sentence2_length": 108.5234375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80390 - }, - "eng_Latn-avt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.71484375, - "max_sentence1_length": 227, - "min_sentence2_length": 40, - "average_sentence2_length": 200.4765625, - "max_sentence2_length": 657, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79665 - }, - "avt_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 200.4765625, - "max_sentence1_length": 657, - "min_sentence2_length": 24, - "average_sentence2_length": 110.71484375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79665 - }, - "eng_Latn-awb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.54296875, - "max_sentence1_length": 227, - "min_sentence2_length": 30, - "average_sentence2_length": 178.2421875, - "max_sentence2_length": 492, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73673 - }, - "awb_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 178.2421875, - "max_sentence1_length": 492, - "min_sentence2_length": 24, - "average_sentence2_length": 109.54296875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73673 - }, - "eng_Latn-awk_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 106.98989898989899, - "max_sentence1_length": 245, - "min_sentence2_length": 49, - "average_sentence2_length": 126.5959595959596, - "max_sentence2_length": 250, - "num_samples": 99, - "num_samples_sentence2": 99, - "number_of_characters": 23125 - }, - "awk_Latn-eng_Latn": { - "min_sentence1_length": 49, - "average_sentence1_length": 126.5959595959596, - "max_sentence1_length": 250, - "min_sentence2_length": 37, - "average_sentence2_length": 106.98989898989899, - "max_sentence2_length": 245, - "num_samples": 99, - "num_samples_sentence2": 99, - "number_of_characters": 23125 - }, - "eng_Latn-awx_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 120.97265625, - "max_sentence1_length": 251, - "min_sentence2_length": 17, - "average_sentence2_length": 134.5234375, - "max_sentence2_length": 439, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65407 - }, - "awx_Latn-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 134.5234375, - "max_sentence1_length": 439, - "min_sentence2_length": 23, - "average_sentence2_length": 120.97265625, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65407 - }, - "eng_Latn-azb_Arab": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.0859375, - "max_sentence1_length": 227, - "min_sentence2_length": 17, - "average_sentence2_length": 108.828125, - "max_sentence2_length": 333, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56042 - }, - "azb_Arab-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 108.828125, - "max_sentence1_length": 333, - "min_sentence2_length": 24, - "average_sentence2_length": 110.0859375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56042 - }, - "eng_Latn-azg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.5390625, - "max_sentence1_length": 827, - "min_sentence2_length": 52, - "average_sentence2_length": 236.22265625, - "max_sentence2_length": 641, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89539 - }, - "azg_Latn-eng_Latn": { - "min_sentence1_length": 52, - "average_sentence1_length": 236.22265625, - "max_sentence1_length": 641, - "min_sentence2_length": 24, - "average_sentence2_length": 113.5390625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89539 - }, - "eng_Latn-azz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.234375, - "max_sentence1_length": 227, - "min_sentence2_length": 74, - "average_sentence2_length": 282.5859375, - "max_sentence2_length": 618, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100818 - }, - "azz_Latn-eng_Latn": { - "min_sentence1_length": 74, - "average_sentence1_length": 282.5859375, - "max_sentence1_length": 618, - "min_sentence2_length": 24, - "average_sentence2_length": 111.234375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100818 - }, - "eng_Latn-bao_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.05078125, - "max_sentence1_length": 227, - "min_sentence2_length": 41, - "average_sentence2_length": 177.0390625, - "max_sentence2_length": 444, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73239 - }, - "bao_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 177.0390625, - "max_sentence1_length": 444, - "min_sentence2_length": 24, - "average_sentence2_length": 109.05078125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73239 - }, - "eng_Latn-bba_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.53125, - "max_sentence1_length": 227, - "min_sentence2_length": 22, - "average_sentence2_length": 122.30078125, - "max_sentence2_length": 582, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59605 - }, - "bba_Latn-eng_Latn": { - "min_sentence1_length": 22, - "average_sentence1_length": 122.30078125, - "max_sentence1_length": 582, - "min_sentence2_length": 24, - "average_sentence2_length": 110.53125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59605 - }, - "eng_Latn-bbb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.83984375, - "max_sentence1_length": 232, - "min_sentence2_length": 45, - "average_sentence2_length": 251.58984375, - "max_sentence2_length": 671, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92270 - }, - "bbb_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 251.58984375, - "max_sentence1_length": 671, - "min_sentence2_length": 24, - "average_sentence2_length": 108.83984375, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92270 - }, - "eng_Latn-bbr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.89453125, - "max_sentence1_length": 216, - "min_sentence2_length": 39, - "average_sentence2_length": 220.9140625, - "max_sentence2_length": 1295, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85199 - }, - "bbr_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 220.9140625, - "max_sentence1_length": 1295, - "min_sentence2_length": 24, - "average_sentence2_length": 111.89453125, - "max_sentence2_length": 216, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85199 - }, - "eng_Latn-bch_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.9765625, - "max_sentence1_length": 227, - "min_sentence2_length": 38, - "average_sentence2_length": 194.5625, - "max_sentence2_length": 699, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78218 - }, - "bch_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 194.5625, - "max_sentence1_length": 699, - "min_sentence2_length": 24, - "average_sentence2_length": 110.9765625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78218 - }, - "eng_Latn-bco_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 111.3359375, - "max_sentence1_length": 243, - "min_sentence2_length": 34, - "average_sentence2_length": 230.51953125, - "max_sentence2_length": 688, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87515 - }, - "bco_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 230.51953125, - "max_sentence1_length": 688, - "min_sentence2_length": 21, - "average_sentence2_length": 111.3359375, - "max_sentence2_length": 243, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87515 - }, - "eng_Latn-bdd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.73828125, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 164.11328125, - "max_sentence2_length": 432, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70106 - }, - "bdd_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 164.11328125, - "max_sentence1_length": 432, - "min_sentence2_length": 24, - "average_sentence2_length": 109.73828125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70106 - }, - "eng_Latn-bea_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 114.08666666666667, - "max_sentence1_length": 257, - "min_sentence2_length": 45, - "average_sentence2_length": 124.71333333333334, - "max_sentence2_length": 289, - "num_samples": 150, - "num_samples_sentence2": 150, - "number_of_characters": 35820 - }, - "bea_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 124.71333333333334, - "max_sentence1_length": 289, - "min_sentence2_length": 45, - "average_sentence2_length": 114.08666666666667, - "max_sentence2_length": 257, - "num_samples": 150, - "num_samples_sentence2": 150, - "number_of_characters": 35820 - }, - "eng_Latn-bef_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 110.01171875, - "max_sentence1_length": 227, - "min_sentence2_length": 57, - "average_sentence2_length": 243.50390625, - "max_sentence2_length": 693, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90500 - }, - "bef_Latn-eng_Latn": { - "min_sentence1_length": 57, - "average_sentence1_length": 243.50390625, - "max_sentence1_length": 693, - "min_sentence2_length": 32, - "average_sentence2_length": 110.01171875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90500 - }, - "eng_Latn-bel_Cyrl": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 40, - "average_sentence2_length": 127.609375, - "max_sentence2_length": 264, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70443 - }, - "bel_Cyrl-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 127.609375, - "max_sentence1_length": 264, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70443 - }, - "eng_Latn-ben_Beng": { - "min_sentence1_length": 21, - "average_sentence1_length": 114.9375, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 111.85546875, - "max_sentence2_length": 238, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58059 - }, - "ben_Beng-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 111.85546875, - "max_sentence1_length": 238, - "min_sentence2_length": 21, - "average_sentence2_length": 114.9375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58059 - }, - "eng_Latn-beo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.91796875, - "max_sentence1_length": 827, - "min_sentence2_length": 1, - "average_sentence2_length": 176.2578125, - "max_sentence2_length": 515, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74029 - }, - "beo_Latn-eng_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 176.2578125, - "max_sentence1_length": 515, - "min_sentence2_length": 24, - "average_sentence2_length": 112.91796875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74029 - }, - "eng_Latn-beu_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 114.171875, - "max_sentence1_length": 257, - "min_sentence2_length": 44, - "average_sentence2_length": 273.8359375, - "max_sentence2_length": 1204, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99330 - }, - "beu_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 273.8359375, - "max_sentence1_length": 1204, - "min_sentence2_length": 38, - "average_sentence2_length": 114.171875, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99330 - }, - "eng_Latn-bgs_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.97265625, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 167.515625, - "max_sentence2_length": 596, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72317 - }, - "bgs_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 167.515625, - "max_sentence1_length": 596, - "min_sentence2_length": 24, - "average_sentence2_length": 114.97265625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72317 - }, - "eng_Latn-bgt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.68359375, - "max_sentence1_length": 376, - "min_sentence2_length": 39, - "average_sentence2_length": 230.9296875, - "max_sentence2_length": 876, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88221 - }, - "bgt_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 230.9296875, - "max_sentence1_length": 876, - "min_sentence2_length": 24, - "average_sentence2_length": 113.68359375, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88221 - }, - "eng_Latn-bhg_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 111.7578125, - "max_sentence1_length": 243, - "min_sentence2_length": 46, - "average_sentence2_length": 186.07421875, - "max_sentence2_length": 589, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76245 - }, - "bhg_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 186.07421875, - "max_sentence1_length": 589, - "min_sentence2_length": 38, - "average_sentence2_length": 111.7578125, - "max_sentence2_length": 243, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76245 - }, - "eng_Latn-bhl_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.8671875, - "max_sentence1_length": 227, - "min_sentence2_length": 60, - "average_sentence2_length": 268.83984375, - "max_sentence2_length": 1263, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97461 - }, - "bhl_Latn-eng_Latn": { - "min_sentence1_length": 60, - "average_sentence1_length": 268.83984375, - "max_sentence1_length": 1263, - "min_sentence2_length": 31, - "average_sentence2_length": 111.8671875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97461 - }, - "eng_Latn-big_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.85546875, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 245.296875, - "max_sentence2_length": 1052, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91431 - }, - "big_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 245.296875, - "max_sentence1_length": 1052, - "min_sentence2_length": 31, - "average_sentence2_length": 111.85546875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91431 - }, - "eng_Latn-bjk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.4609375, - "max_sentence1_length": 227, - "min_sentence2_length": 33, - "average_sentence2_length": 196.8828125, - "max_sentence2_length": 654, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78424 - }, - "bjk_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 196.8828125, - "max_sentence1_length": 654, - "min_sentence2_length": 24, - "average_sentence2_length": 109.4609375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78424 - }, - "eng_Latn-bjp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.14453125, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 151.33203125, - "max_sentence2_length": 412, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68218 - }, - "bjp_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 151.33203125, - "max_sentence1_length": 412, - "min_sentence2_length": 24, - "average_sentence2_length": 115.14453125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68218 - }, - "eng_Latn-bjr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.734375, - "max_sentence1_length": 239, - "min_sentence2_length": 39, - "average_sentence2_length": 306.8671875, - "max_sentence2_length": 1198, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 107418 - }, - "bjr_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 306.8671875, - "max_sentence1_length": 1198, - "min_sentence2_length": 24, - "average_sentence2_length": 112.734375, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 107418 - }, - "eng_Latn-bjv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 34, - "average_sentence2_length": 141.203125, - "max_sentence2_length": 331, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65683 - }, - "bjv_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 141.203125, - "max_sentence1_length": 331, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65683 - }, - "eng_Latn-bjz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.48828125, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 269.55078125, - "max_sentence2_length": 907, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97546 - }, - "bjz_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 269.55078125, - "max_sentence1_length": 907, - "min_sentence2_length": 24, - "average_sentence2_length": 111.48828125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97546 - }, - "eng_Latn-bkd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.11328125, - "max_sentence1_length": 227, - "min_sentence2_length": 32, - "average_sentence2_length": 154.0, - "max_sentence2_length": 436, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67869 - }, - "bkd_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 154.0, - "max_sentence1_length": 436, - "min_sentence2_length": 24, - "average_sentence2_length": 111.11328125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67869 - }, - "eng_Latn-bki_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.66015625, - "max_sentence1_length": 827, - "min_sentence2_length": 28, - "average_sentence2_length": 266.97265625, - "max_sentence2_length": 1015, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97698 - }, - "bki_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 266.97265625, - "max_sentence1_length": 1015, - "min_sentence2_length": 31, - "average_sentence2_length": 114.66015625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97698 - }, - "eng_Latn-bkq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.3125, - "max_sentence1_length": 216, - "min_sentence2_length": 26, - "average_sentence2_length": 237.59375, - "max_sentence2_length": 901, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89576 - }, - "bkq_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 237.59375, - "max_sentence1_length": 901, - "min_sentence2_length": 24, - "average_sentence2_length": 112.3125, - "max_sentence2_length": 216, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89576 - }, - "eng_Latn-bkx_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 112.90625, - "max_sentence1_length": 257, - "min_sentence2_length": 54, - "average_sentence2_length": 216.9296875, - "max_sentence2_length": 789, - "num_samples": 128, - "num_samples_sentence2": 128, - "number_of_characters": 42219 - }, - "bkx_Latn-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 216.9296875, - "max_sentence1_length": 789, - "min_sentence2_length": 45, - "average_sentence2_length": 112.90625, - "max_sentence2_length": 257, - "num_samples": 128, - "num_samples_sentence2": 128, - "number_of_characters": 42219 - }, - "eng_Latn-blw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.12890625, - "max_sentence1_length": 227, - "min_sentence2_length": 39, - "average_sentence2_length": 209.64453125, - "max_sentence2_length": 667, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81606 - }, - "blw_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 209.64453125, - "max_sentence1_length": 667, - "min_sentence2_length": 24, - "average_sentence2_length": 109.12890625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81606 - }, - "eng_Latn-blz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.46875, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 162.29296875, - "max_sentence2_length": 397, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71107 - }, - "blz_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 162.29296875, - "max_sentence1_length": 397, - "min_sentence2_length": 24, - "average_sentence2_length": 115.46875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71107 - }, - "eng_Latn-bmh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.51171875, - "max_sentence1_length": 273, - "min_sentence2_length": 40, - "average_sentence2_length": 225.7734375, - "max_sentence2_length": 1302, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86345 - }, - "bmh_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 225.7734375, - "max_sentence1_length": 1302, - "min_sentence2_length": 24, - "average_sentence2_length": 111.51171875, - "max_sentence2_length": 273, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86345 - }, - "eng_Latn-bmk_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 111.0, - "max_sentence1_length": 257, - "min_sentence2_length": 44, - "average_sentence2_length": 157.64566929133858, - "max_sentence2_length": 722, - "num_samples": 127, - "num_samples_sentence2": 127, - "number_of_characters": 34118 - }, - "bmk_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 157.64566929133858, - "max_sentence1_length": 722, - "min_sentence2_length": 39, - "average_sentence2_length": 111.0, - "max_sentence2_length": 257, - "num_samples": 127, - "num_samples_sentence2": 127, - "number_of_characters": 34118 - }, - "eng_Latn-bmr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.98828125, - "max_sentence1_length": 376, - "min_sentence2_length": 45, - "average_sentence2_length": 152.140625, - "max_sentence2_length": 348, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67617 - }, - "bmr_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 152.140625, - "max_sentence1_length": 348, - "min_sentence2_length": 24, - "average_sentence2_length": 111.98828125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67617 - }, - "eng_Latn-bmu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.6640625, - "max_sentence1_length": 376, - "min_sentence2_length": 42, - "average_sentence2_length": 217.7734375, - "max_sentence2_length": 623, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84080 - }, - "bmu_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 217.7734375, - "max_sentence1_length": 623, - "min_sentence2_length": 24, - "average_sentence2_length": 110.6640625, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84080 - }, - "eng_Latn-bnp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.5703125, - "max_sentence1_length": 227, - "min_sentence2_length": 35, - "average_sentence2_length": 164.7890625, - "max_sentence2_length": 753, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70748 - }, - "bnp_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 164.7890625, - "max_sentence1_length": 753, - "min_sentence2_length": 24, - "average_sentence2_length": 111.5703125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70748 - }, - "eng_Latn-boa_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 115.5859375, - "max_sentence1_length": 273, - "min_sentence2_length": 35, - "average_sentence2_length": 199.6328125, - "max_sentence2_length": 612, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80696 - }, - "boa_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 199.6328125, - "max_sentence1_length": 612, - "min_sentence2_length": 21, - "average_sentence2_length": 115.5859375, - "max_sentence2_length": 273, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80696 - }, - "eng_Latn-boj_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 43, - "average_sentence2_length": 218.3359375, - "max_sentence2_length": 614, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93099 - }, - "boj_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 218.3359375, - "max_sentence1_length": 614, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93099 - }, - "eng_Latn-bon_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 110.4765625, - "max_sentence1_length": 227, - "min_sentence2_length": 38, - "average_sentence2_length": 209.63671875, - "max_sentence2_length": 808, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81949 - }, - "bon_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 209.63671875, - "max_sentence1_length": 808, - "min_sentence2_length": 31, - "average_sentence2_length": 110.4765625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81949 - }, - "eng_Latn-box_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.04296875, - "max_sentence1_length": 827, - "min_sentence2_length": 37, - "average_sentence2_length": 151.21484375, - "max_sentence2_length": 408, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68162 - }, - "box_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 151.21484375, - "max_sentence1_length": 408, - "min_sentence2_length": 24, - "average_sentence2_length": 115.04296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68162 - }, - "eng_Latn-bpr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.71484375, - "max_sentence1_length": 376, - "min_sentence2_length": 28, - "average_sentence2_length": 124.25, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60407 - }, - "bpr_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 124.25, - "max_sentence1_length": 376, - "min_sentence2_length": 24, - "average_sentence2_length": 111.71484375, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60407 - }, - "eng_Latn-bps_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.96875, - "max_sentence1_length": 227, - "min_sentence2_length": 30, - "average_sentence2_length": 147.59765625, - "max_sentence2_length": 536, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66193 - }, - "bps_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 147.59765625, - "max_sentence1_length": 536, - "min_sentence2_length": 24, - "average_sentence2_length": 110.96875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66193 - }, - "eng_Latn-bqc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.90625, - "max_sentence1_length": 227, - "min_sentence2_length": 21, - "average_sentence2_length": 84.66015625, - "max_sentence2_length": 215, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 49809 - }, - "bqc_Latn-eng_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 84.66015625, - "max_sentence1_length": 215, - "min_sentence2_length": 24, - "average_sentence2_length": 109.90625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 49809 - }, - "eng_Latn-bqp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.90625, - "max_sentence1_length": 227, - "min_sentence2_length": 22, - "average_sentence2_length": 98.44140625, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 53337 - }, - "bqp_Latn-eng_Latn": { - "min_sentence1_length": 22, - "average_sentence1_length": 98.44140625, - "max_sentence1_length": 251, - "min_sentence2_length": 24, - "average_sentence2_length": 109.90625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 53337 - }, - "eng_Latn-bre_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 107.06640625, - "max_sentence1_length": 245, - "min_sentence2_length": 32, - "average_sentence2_length": 108.109375, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 55085 - }, - "bre_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 108.109375, - "max_sentence1_length": 251, - "min_sentence2_length": 40, - "average_sentence2_length": 107.06640625, - "max_sentence2_length": 245, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 55085 - }, - "eng_Latn-bsj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.875, - "max_sentence1_length": 827, - "min_sentence2_length": 24, - "average_sentence2_length": 110.1328125, - "max_sentence2_length": 401, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57602 - }, - "bsj_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.1328125, - "max_sentence1_length": 401, - "min_sentence2_length": 24, - "average_sentence2_length": 114.875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57602 - }, - "eng_Latn-bsn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.49609375, - "max_sentence1_length": 230, - "min_sentence2_length": 44, - "average_sentence2_length": 284.43359375, - "max_sentence2_length": 974, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101102 - }, - "bsn_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 284.43359375, - "max_sentence1_length": 974, - "min_sentence2_length": 24, - "average_sentence2_length": 110.49609375, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101102 - }, - "eng_Latn-bsp_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 113.30859375, - "max_sentence1_length": 245, - "min_sentence2_length": 27, - "average_sentence2_length": 113.1953125, - "max_sentence2_length": 276, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57985 - }, - "bsp_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 113.1953125, - "max_sentence1_length": 276, - "min_sentence2_length": 37, - "average_sentence2_length": 113.30859375, - "max_sentence2_length": 245, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57985 - }, - "eng_Latn-bss_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 37, - "average_sentence2_length": 154.640625, - "max_sentence2_length": 497, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68361 - }, - "bss_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 154.640625, - "max_sentence1_length": 497, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68361 - }, - "eng_Latn-buk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.93359375, - "max_sentence1_length": 227, - "min_sentence2_length": 30, - "average_sentence2_length": 194.6328125, - "max_sentence2_length": 586, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77969 - }, - "buk_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 194.6328125, - "max_sentence1_length": 586, - "min_sentence2_length": 24, - "average_sentence2_length": 109.93359375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77969 - }, - "eng_Latn-bus_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.90625, - "max_sentence1_length": 227, - "min_sentence2_length": 22, - "average_sentence2_length": 98.015625, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 53228 - }, - "bus_Latn-eng_Latn": { - "min_sentence1_length": 22, - "average_sentence1_length": 98.015625, - "max_sentence1_length": 239, - "min_sentence2_length": 24, - "average_sentence2_length": 109.90625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 53228 - }, - "eng_Latn-bvd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.15234375, - "max_sentence1_length": 827, - "min_sentence2_length": 43, - "average_sentence2_length": 204.75, - "max_sentence2_length": 615, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81383 - }, - "bvd_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 204.75, - "max_sentence1_length": 615, - "min_sentence2_length": 24, - "average_sentence2_length": 113.15234375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81383 - }, - "eng_Latn-bvr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.515625, - "max_sentence1_length": 827, - "min_sentence2_length": 49, - "average_sentence2_length": 306.55859375, - "max_sentence2_length": 773, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 107539 - }, - "bvr_Latn-eng_Latn": { - "min_sentence1_length": 49, - "average_sentence1_length": 306.55859375, - "max_sentence1_length": 773, - "min_sentence2_length": 24, - "average_sentence2_length": 113.515625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 107539 - }, - "eng_Latn-bxh_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 115.453125, - "max_sentence1_length": 257, - "min_sentence2_length": 23, - "average_sentence2_length": 133.3359375, - "max_sentence2_length": 387, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63690 - }, - "bxh_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 133.3359375, - "max_sentence1_length": 387, - "min_sentence2_length": 38, - "average_sentence2_length": 115.453125, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63690 - }, - "eng_Latn-byr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 107.50390625, - "max_sentence1_length": 227, - "min_sentence2_length": 33, - "average_sentence2_length": 225.57421875, - "max_sentence2_length": 556, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85268 - }, - "byr_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 225.57421875, - "max_sentence1_length": 556, - "min_sentence2_length": 24, - "average_sentence2_length": 107.50390625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85268 - }, - "eng_Latn-byx_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.93359375, - "max_sentence1_length": 227, - "min_sentence2_length": 42, - "average_sentence2_length": 276.94140625, - "max_sentence2_length": 1031, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98784 - }, - "byx_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 276.94140625, - "max_sentence1_length": 1031, - "min_sentence2_length": 24, - "average_sentence2_length": 108.93359375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98784 - }, - "eng_Latn-bzd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.390625, - "max_sentence1_length": 227, - "min_sentence2_length": 24, - "average_sentence2_length": 202.2890625, - "max_sentence2_length": 587, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80046 - }, - "bzd_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 202.2890625, - "max_sentence1_length": 587, - "min_sentence2_length": 24, - "average_sentence2_length": 110.390625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80046 - }, - "eng_Latn-bzh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.41796875, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 166.80859375, - "max_sentence2_length": 511, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72250 - }, - "bzh_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 166.80859375, - "max_sentence1_length": 511, - "min_sentence2_length": 24, - "average_sentence2_length": 115.41796875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72250 - }, - "eng_Latn-bzj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.90625, - "max_sentence1_length": 227, - "min_sentence2_length": 30, - "average_sentence2_length": 146.23046875, - "max_sentence2_length": 447, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65571 - }, - "bzj_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 146.23046875, - "max_sentence1_length": 447, - "min_sentence2_length": 24, - "average_sentence2_length": 109.90625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65571 - }, - "eng_Latn-caa_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 229.0078125, - "max_sentence2_length": 628, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88161 - }, - "caa_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 229.0078125, - "max_sentence1_length": 628, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88161 - }, - "eng_Latn-cab_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.90234375, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 163.0703125, - "max_sentence2_length": 518, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69881 - }, - "cab_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 163.0703125, - "max_sentence1_length": 518, - "min_sentence2_length": 24, - "average_sentence2_length": 109.90234375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69881 - }, - "eng_Latn-cac_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.96484375, - "max_sentence1_length": 227, - "min_sentence2_length": 27, - "average_sentence2_length": 176.08203125, - "max_sentence2_length": 431, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72972 - }, - "cac_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 176.08203125, - "max_sentence1_length": 431, - "min_sentence2_length": 24, - "average_sentence2_length": 108.96484375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72972 - }, - "eng_Latn-caf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.8125, - "max_sentence1_length": 827, - "min_sentence2_length": 34, - "average_sentence2_length": 174.11328125, - "max_sentence2_length": 433, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73709 - }, - "caf_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 174.11328125, - "max_sentence1_length": 433, - "min_sentence2_length": 24, - "average_sentence2_length": 113.8125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73709 - }, - "eng_Latn-cak_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.15234375, - "max_sentence1_length": 341, - "min_sentence2_length": 78, - "average_sentence2_length": 242.734375, - "max_sentence2_length": 584, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99811 - }, - "cak_Latn-eng_Latn": { - "min_sentence1_length": 78, - "average_sentence1_length": 242.734375, - "max_sentence1_length": 584, - "min_sentence2_length": 56, - "average_sentence2_length": 147.15234375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99811 - }, - "eng_Latn-cao_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.76953125, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 175.046875, - "max_sentence2_length": 445, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73169 - }, - "cao_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 175.046875, - "max_sentence1_length": 445, - "min_sentence2_length": 24, - "average_sentence2_length": 110.76953125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73169 - }, - "eng_Latn-cap_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.47265625, - "max_sentence1_length": 227, - "min_sentence2_length": 46, - "average_sentence2_length": 224.6953125, - "max_sentence2_length": 667, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85803 - }, - "cap_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 224.6953125, - "max_sentence1_length": 667, - "min_sentence2_length": 24, - "average_sentence2_length": 110.47265625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85803 - }, - "eng_Latn-car_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.234375, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 147.98828125, - "max_sentence2_length": 386, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67129 - }, - "car_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 147.98828125, - "max_sentence1_length": 386, - "min_sentence2_length": 24, - "average_sentence2_length": 114.234375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67129 - }, - "eng_Latn-cav_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.59765625, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 313.08984375, - "max_sentence2_length": 1077, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 109488 - }, - "cav_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 313.08984375, - "max_sentence1_length": 1077, - "min_sentence2_length": 24, - "average_sentence2_length": 114.59765625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 109488 - }, - "eng_Latn-cax_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.59375, - "max_sentence1_length": 227, - "min_sentence2_length": 39, - "average_sentence2_length": 179.125, - "max_sentence2_length": 409, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74168 - }, - "cax_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 179.125, - "max_sentence1_length": 409, - "min_sentence2_length": 24, - "average_sentence2_length": 110.59375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74168 - }, - "eng_Latn-cbc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.39453125, - "max_sentence1_length": 827, - "min_sentence2_length": 61, - "average_sentence2_length": 284.9296875, - "max_sentence2_length": 1210, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102227 - }, - "cbc_Latn-eng_Latn": { - "min_sentence1_length": 61, - "average_sentence1_length": 284.9296875, - "max_sentence1_length": 1210, - "min_sentence2_length": 24, - "average_sentence2_length": 114.39453125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102227 - }, - "eng_Latn-cbi_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 113.79296875, - "max_sentence1_length": 827, - "min_sentence2_length": 37, - "average_sentence2_length": 187.0, - "max_sentence2_length": 733, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77003 - }, - "cbi_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 187.0, - "max_sentence1_length": 733, - "min_sentence2_length": 31, - "average_sentence2_length": 113.79296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77003 - }, - "eng_Latn-cbk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.02734375, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 196.265625, - "max_sentence2_length": 532, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79435 - }, - "cbk_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 196.265625, - "max_sentence1_length": 532, - "min_sentence2_length": 24, - "average_sentence2_length": 114.02734375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79435 - }, - "eng_Latn-cbr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.453125, - "max_sentence1_length": 238, - "min_sentence2_length": 30, - "average_sentence2_length": 212.15234375, - "max_sentence2_length": 814, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82587 - }, - "cbr_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 212.15234375, - "max_sentence1_length": 814, - "min_sentence2_length": 24, - "average_sentence2_length": 110.453125, - "max_sentence2_length": 238, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82587 - }, - "eng_Latn-cbs_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 112.0546875, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 250.90234375, - "max_sentence2_length": 1059, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92917 - }, - "cbs_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 250.90234375, - "max_sentence1_length": 1059, - "min_sentence2_length": 31, - "average_sentence2_length": 112.0546875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92917 - }, - "eng_Latn-cbt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.8671875, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 236.9453125, - "max_sentence2_length": 635, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90064 - }, - "cbt_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 236.9453125, - "max_sentence1_length": 635, - "min_sentence2_length": 24, - "average_sentence2_length": 114.8671875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90064 - }, - "eng_Latn-cbu_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 107.87890625, - "max_sentence1_length": 227, - "min_sentence2_length": 24, - "average_sentence2_length": 269.4375, - "max_sentence2_length": 1004, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96593 - }, - "cbu_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 269.4375, - "max_sentence1_length": 1004, - "min_sentence2_length": 21, - "average_sentence2_length": 107.87890625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96593 - }, - "eng_Latn-cbv_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.125, - "max_sentence1_length": 227, - "min_sentence2_length": 39, - "average_sentence2_length": 271.37890625, - "max_sentence2_length": 927, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97921 - }, - "cbv_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 271.37890625, - "max_sentence1_length": 927, - "min_sentence2_length": 31, - "average_sentence2_length": 111.125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97921 - }, - "eng_Latn-cco_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.796875, - "max_sentence1_length": 827, - "min_sentence2_length": 63, - "average_sentence2_length": 259.8671875, - "max_sentence2_length": 671, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 95658 - }, - "cco_Latn-eng_Latn": { - "min_sentence1_length": 63, - "average_sentence1_length": 259.8671875, - "max_sentence1_length": 671, - "min_sentence2_length": 24, - "average_sentence2_length": 113.796875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 95658 - }, - "eng_Latn-ceb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 31, - "average_sentence2_length": 139.43359375, - "max_sentence2_length": 381, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64468 - }, - "ceb_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 139.43359375, - "max_sentence1_length": 381, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64468 - }, - "eng_Latn-cek_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.72265625, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 138.73046875, - "max_sentence2_length": 301, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65140 - }, - "cek_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 138.73046875, - "max_sentence1_length": 301, - "min_sentence2_length": 24, - "average_sentence2_length": 115.72265625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65140 - }, - "eng_Latn-ces_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 146.3203125, - "max_sentence1_length": 341, - "min_sentence2_length": 36, - "average_sentence2_length": 125.69921875, - "max_sentence2_length": 301, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69637 - }, - "ces_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 125.69921875, - "max_sentence1_length": 301, - "min_sentence2_length": 56, - "average_sentence2_length": 146.3203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69637 - }, - "eng_Latn-cgc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.75390625, - "max_sentence1_length": 227, - "min_sentence2_length": 47, - "average_sentence2_length": 224.30078125, - "max_sentence2_length": 618, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85774 - }, - "cgc_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 224.30078125, - "max_sentence1_length": 618, - "min_sentence2_length": 24, - "average_sentence2_length": 110.75390625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85774 - }, - "eng_Latn-cha_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 108.57421875, - "max_sentence1_length": 243, - "min_sentence2_length": 35, - "average_sentence2_length": 113.0078125, - "max_sentence2_length": 258, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56725 - }, - "cha_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 113.0078125, - "max_sentence1_length": 258, - "min_sentence2_length": 38, - "average_sentence2_length": 108.57421875, - "max_sentence2_length": 243, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56725 - }, - "eng_Latn-chd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.58203125, - "max_sentence1_length": 227, - "min_sentence2_length": 38, - "average_sentence2_length": 227.484375, - "max_sentence2_length": 789, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86545 - }, - "chd_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 227.484375, - "max_sentence1_length": 789, - "min_sentence2_length": 24, - "average_sentence2_length": 110.58203125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86545 - }, - "eng_Latn-chf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.53125, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 202.71484375, - "max_sentence2_length": 987, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81471 - }, - "chf_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 202.71484375, - "max_sentence1_length": 987, - "min_sentence2_length": 24, - "average_sentence2_length": 115.53125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81471 - }, - "eng_Latn-chk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 28, - "average_sentence2_length": 135.45703125, - "max_sentence2_length": 330, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63450 - }, - "chk_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 135.45703125, - "max_sentence1_length": 330, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63450 - }, - "eng_Latn-chq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.43359375, - "max_sentence1_length": 227, - "min_sentence2_length": 29, - "average_sentence2_length": 176.98828125, - "max_sentence2_length": 602, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73580 - }, - "chq_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 176.98828125, - "max_sentence1_length": 602, - "min_sentence2_length": 24, - "average_sentence2_length": 110.43359375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73580 - }, - "eng_Latn-chz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 172.62890625, - "max_sentence2_length": 439, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73728 - }, - "chz_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 172.62890625, - "max_sentence1_length": 439, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73728 - }, - "eng_Latn-cjo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.18359375, - "max_sentence1_length": 263, - "min_sentence2_length": 49, - "average_sentence2_length": 285.125, - "max_sentence2_length": 1225, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101711 - }, - "cjo_Latn-eng_Latn": { - "min_sentence1_length": 49, - "average_sentence1_length": 285.125, - "max_sentence1_length": 1225, - "min_sentence2_length": 24, - "average_sentence2_length": 112.18359375, - "max_sentence2_length": 263, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101711 - }, - "eng_Latn-cjv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.625, - "max_sentence1_length": 227, - "min_sentence2_length": 50, - "average_sentence2_length": 248.8359375, - "max_sentence2_length": 908, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91766 - }, - "cjv_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 248.8359375, - "max_sentence1_length": 908, - "min_sentence2_length": 24, - "average_sentence2_length": 109.625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91766 - }, - "eng_Latn-ckb_Arab": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 20, - "average_sentence2_length": 102.04296875, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 55658 - }, - "ckb_Arab-eng_Latn": { - "min_sentence1_length": 20, - "average_sentence1_length": 102.04296875, - "max_sentence1_length": 232, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 55658 - }, - "eng_Latn-cle_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 52, - "average_sentence2_length": 197.43359375, - "max_sentence2_length": 439, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79316 - }, - "cle_Latn-eng_Latn": { - "min_sentence1_length": 52, - "average_sentence1_length": 197.43359375, - "max_sentence1_length": 439, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79316 - }, - "eng_Latn-clu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.1640625, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 177.93359375, - "max_sentence2_length": 843, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74521 - }, - "clu_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 177.93359375, - "max_sentence1_length": 843, - "min_sentence2_length": 24, - "average_sentence2_length": 113.1640625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74521 - }, - "eng_Latn-cme_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.1875, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 143.66796875, - "max_sentence2_length": 436, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66267 - }, - "cme_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 143.66796875, - "max_sentence1_length": 436, - "min_sentence2_length": 24, - "average_sentence2_length": 115.1875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66267 - }, - "eng_Latn-cmn_Hans": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 12, - "average_sentence2_length": 40.19140625, - "max_sentence2_length": 106, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 47494 - }, - "cmn_Hans-eng_Latn": { - "min_sentence1_length": 12, - "average_sentence1_length": 40.19140625, - "max_sentence1_length": 106, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 47494 - }, - "eng_Latn-cni_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.59765625, - "max_sentence1_length": 263, - "min_sentence2_length": 43, - "average_sentence2_length": 240.62890625, - "max_sentence2_length": 1132, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90170 - }, - "cni_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 240.62890625, - "max_sentence1_length": 1132, - "min_sentence2_length": 24, - "average_sentence2_length": 111.59765625, - "max_sentence2_length": 263, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90170 - }, - "eng_Latn-cnl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.390625, - "max_sentence1_length": 827, - "min_sentence2_length": 34, - "average_sentence2_length": 229.64453125, - "max_sentence2_length": 668, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88073 - }, - "cnl_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 229.64453125, - "max_sentence1_length": 668, - "min_sentence2_length": 24, - "average_sentence2_length": 114.390625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88073 - }, - "eng_Latn-cnt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.20703125, - "max_sentence1_length": 827, - "min_sentence2_length": 40, - "average_sentence2_length": 177.01953125, - "max_sentence2_length": 384, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74810 - }, - "cnt_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 177.01953125, - "max_sentence1_length": 384, - "min_sentence2_length": 24, - "average_sentence2_length": 115.20703125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74810 - }, - "eng_Latn-cof_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.8203125, - "max_sentence1_length": 256, - "min_sentence2_length": 32, - "average_sentence2_length": 281.3515625, - "max_sentence2_length": 1760, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100652 - }, - "cof_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 281.3515625, - "max_sentence1_length": 1760, - "min_sentence2_length": 31, - "average_sentence2_length": 111.8203125, - "max_sentence2_length": 256, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100652 - }, - "eng_Latn-con_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.8359375, - "max_sentence1_length": 227, - "min_sentence2_length": 34, - "average_sentence2_length": 187.04296875, - "max_sentence2_length": 681, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76257 - }, - "con_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 187.04296875, - "max_sentence1_length": 681, - "min_sentence2_length": 24, - "average_sentence2_length": 110.8359375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76257 - }, - "eng_Latn-cop_Copt": { - "min_sentence1_length": 56, - "average_sentence1_length": 146.3203125, - "max_sentence1_length": 341, - "min_sentence2_length": 34, - "average_sentence2_length": 137.5390625, - "max_sentence2_length": 279, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72668 - }, - "cop_Copt-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 137.5390625, - "max_sentence1_length": 279, - "min_sentence2_length": 56, - "average_sentence2_length": 146.3203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72668 - }, - "eng_Latn-cot_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.234375, - "max_sentence1_length": 827, - "min_sentence2_length": 45, - "average_sentence2_length": 271.953125, - "max_sentence2_length": 882, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98864 - }, - "cot_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 271.953125, - "max_sentence1_length": 882, - "min_sentence2_length": 24, - "average_sentence2_length": 114.234375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98864 - }, - "eng_Latn-cpa_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 44, - "average_sentence2_length": 167.7421875, - "max_sentence2_length": 387, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72477 - }, - "cpa_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 167.7421875, - "max_sentence1_length": 387, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72477 - }, - "eng_Latn-cpb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.8125, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 162.61328125, - "max_sentence2_length": 426, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70765 - }, - "cpb_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 162.61328125, - "max_sentence1_length": 426, - "min_sentence2_length": 24, - "average_sentence2_length": 113.8125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70765 - }, - "eng_Latn-cpc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.8125, - "max_sentence1_length": 827, - "min_sentence2_length": 42, - "average_sentence2_length": 166.1484375, - "max_sentence2_length": 431, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71670 - }, - "cpc_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 166.1484375, - "max_sentence1_length": 431, - "min_sentence2_length": 24, - "average_sentence2_length": 113.8125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71670 - }, - "eng_Latn-cpu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.8125, - "max_sentence1_length": 827, - "min_sentence2_length": 42, - "average_sentence2_length": 172.44140625, - "max_sentence2_length": 441, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73281 - }, - "cpu_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 172.44140625, - "max_sentence1_length": 441, - "min_sentence2_length": 24, - "average_sentence2_length": 113.8125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73281 - }, - "eng_Latn-cpy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 41, - "average_sentence2_length": 181.5, - "max_sentence2_length": 453, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75504 - }, - "cpy_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 181.5, - "max_sentence1_length": 453, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75504 - }, - "eng_Latn-crn_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 108.921875, - "max_sentence1_length": 227, - "min_sentence2_length": 54, - "average_sentence2_length": 314.71875, - "max_sentence2_length": 1123, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 108452 - }, - "crn_Latn-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 314.71875, - "max_sentence1_length": 1123, - "min_sentence2_length": 21, - "average_sentence2_length": 108.921875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 108452 - }, - "eng_Latn-crx_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.8125, - "max_sentence1_length": 827, - "min_sentence2_length": 33, - "average_sentence2_length": 175.640625, - "max_sentence2_length": 457, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74100 - }, - "crx_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 175.640625, - "max_sentence1_length": 457, - "min_sentence2_length": 24, - "average_sentence2_length": 113.8125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74100 - }, - "eng_Latn-cso_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.13671875, - "max_sentence1_length": 827, - "min_sentence2_length": 47, - "average_sentence2_length": 218.03125, - "max_sentence2_length": 535, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84779 - }, - "cso_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 218.03125, - "max_sentence1_length": 535, - "min_sentence2_length": 24, - "average_sentence2_length": 113.13671875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84779 - }, - "eng_Latn-csy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 31, - "average_sentence2_length": 130.73828125, - "max_sentence2_length": 283, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62242 - }, - "csy_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 130.73828125, - "max_sentence1_length": 283, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62242 - }, - "eng_Latn-cta_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.4296875, - "max_sentence1_length": 827, - "min_sentence2_length": 39, - "average_sentence2_length": 279.05859375, - "max_sentence2_length": 909, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100733 - }, - "cta_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 279.05859375, - "max_sentence1_length": 909, - "min_sentence2_length": 24, - "average_sentence2_length": 114.4296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100733 - }, - "eng_Latn-cth_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 41, - "average_sentence2_length": 135.921875, - "max_sentence2_length": 291, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63569 - }, - "cth_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 135.921875, - "max_sentence1_length": 291, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63569 - }, - "eng_Latn-ctp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.09765625, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 266.01171875, - "max_sentence2_length": 864, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96284 - }, - "ctp_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 266.01171875, - "max_sentence1_length": 864, - "min_sentence2_length": 24, - "average_sentence2_length": 110.09765625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96284 - }, - "eng_Latn-ctu_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 61, - "average_sentence2_length": 238.31640625, - "max_sentence2_length": 670, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98784 - }, - "ctu_Latn-eng_Latn": { - "min_sentence1_length": 61, - "average_sentence1_length": 238.31640625, - "max_sentence1_length": 670, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98784 - }, - "eng_Latn-cub_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.7109375, - "max_sentence1_length": 232, - "min_sentence2_length": 30, - "average_sentence2_length": 276.57421875, - "max_sentence2_length": 1218, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99401 - }, - "cub_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 276.57421875, - "max_sentence1_length": 1218, - "min_sentence2_length": 24, - "average_sentence2_length": 111.7109375, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99401 - }, - "eng_Latn-cuc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.234375, - "max_sentence1_length": 227, - "min_sentence2_length": 35, - "average_sentence2_length": 160.85546875, - "max_sentence2_length": 404, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69143 - }, - "cuc_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 160.85546875, - "max_sentence1_length": 404, - "min_sentence2_length": 24, - "average_sentence2_length": 109.234375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69143 - }, - "eng_Latn-cui_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.5078125, - "max_sentence1_length": 227, - "min_sentence2_length": 79, - "average_sentence2_length": 333.36328125, - "max_sentence2_length": 1448, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 113631 - }, - "cui_Latn-eng_Latn": { - "min_sentence1_length": 79, - "average_sentence1_length": 333.36328125, - "max_sentence1_length": 1448, - "min_sentence2_length": 24, - "average_sentence2_length": 110.5078125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 113631 - }, - "eng_Latn-cuk_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 146.3203125, - "max_sentence1_length": 341, - "min_sentence2_length": 63, - "average_sentence2_length": 198.34375, - "max_sentence2_length": 513, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88234 - }, - "cuk_Latn-eng_Latn": { - "min_sentence1_length": 63, - "average_sentence1_length": 198.34375, - "max_sentence1_length": 513, - "min_sentence2_length": 56, - "average_sentence2_length": 146.3203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88234 - }, - "eng_Latn-cut_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.66015625, - "max_sentence1_length": 216, - "min_sentence2_length": 34, - "average_sentence2_length": 185.71484375, - "max_sentence2_length": 531, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75616 - }, - "cut_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 185.71484375, - "max_sentence1_length": 531, - "min_sentence2_length": 24, - "average_sentence2_length": 109.66015625, - "max_sentence2_length": 216, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75616 - }, - "eng_Latn-cux_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 44, - "average_sentence2_length": 233.81640625, - "max_sentence2_length": 599, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89392 - }, - "cux_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 233.81640625, - "max_sentence1_length": 599, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89392 - }, - "eng_Latn-cwe_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 23, - "average_sentence2_length": 133.17578125, - "max_sentence2_length": 317, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62866 - }, - "cwe_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 133.17578125, - "max_sentence1_length": 317, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62866 - }, - "eng_Latn-cya_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 43, - "average_sentence2_length": 163.6953125, - "max_sentence2_length": 346, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71441 - }, - "cya_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 163.6953125, - "max_sentence1_length": 346, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71441 - }, - "eng_Latn-daa_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.8984375, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 151.21484375, - "max_sentence2_length": 365, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67869 - }, - "daa_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 151.21484375, - "max_sentence1_length": 365, - "min_sentence2_length": 24, - "average_sentence2_length": 113.8984375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67869 - }, - "eng_Latn-dad_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 28, - "average_sentence2_length": 128.60546875, - "max_sentence2_length": 308, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62202 - }, - "dad_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 128.60546875, - "max_sentence1_length": 308, - "min_sentence2_length": 24, - "average_sentence2_length": 114.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62202 - }, - "eng_Latn-dah_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 110.046875, - "max_sentence1_length": 227, - "min_sentence2_length": 47, - "average_sentence2_length": 275.80859375, - "max_sentence2_length": 796, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98779 - }, - "dah_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 275.80859375, - "max_sentence1_length": 796, - "min_sentence2_length": 31, - "average_sentence2_length": 110.046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98779 - }, - "eng_Latn-dan_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 21, - "average_sentence2_length": 109.00390625, - "max_sentence2_length": 226, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56678 - }, - "dan_Latn-eng_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 109.00390625, - "max_sentence1_length": 226, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56678 - }, - "eng_Latn-ded_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.8671875, - "max_sentence1_length": 230, - "min_sentence2_length": 25, - "average_sentence2_length": 178.6875, - "max_sentence2_length": 777, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73870 - }, - "ded_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 178.6875, - "max_sentence1_length": 777, - "min_sentence2_length": 24, - "average_sentence2_length": 109.8671875, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73870 - }, - "eng_Latn-deu_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 55, - "average_sentence2_length": 156.78515625, - "max_sentence2_length": 392, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77912 - }, - "deu_Latn-eng_Latn": { - "min_sentence1_length": 55, - "average_sentence1_length": 156.78515625, - "max_sentence1_length": 392, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77912 - }, - "eng_Latn-dgc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.234375, - "max_sentence1_length": 251, - "min_sentence2_length": 39, - "average_sentence2_length": 164.83203125, - "max_sentence2_length": 394, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70417 - }, - "dgc_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 164.83203125, - "max_sentence1_length": 394, - "min_sentence2_length": 24, - "average_sentence2_length": 110.234375, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70417 - }, - "eng_Latn-dgr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.671875, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 160.18359375, - "max_sentence2_length": 492, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69339 - }, - "dgr_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 160.18359375, - "max_sentence1_length": 492, - "min_sentence2_length": 24, - "average_sentence2_length": 110.671875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69339 - }, - "eng_Latn-dgz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.01171875, - "max_sentence1_length": 376, - "min_sentence2_length": 37, - "average_sentence2_length": 175.05078125, - "max_sentence2_length": 597, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73488 - }, - "dgz_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 175.05078125, - "max_sentence1_length": 597, - "min_sentence2_length": 24, - "average_sentence2_length": 112.01171875, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73488 - }, - "eng_Latn-dhg_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 110.27734375, - "max_sentence1_length": 215, - "min_sentence2_length": 52, - "average_sentence2_length": 268.19921875, - "max_sentence2_length": 1116, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96890 - }, - "dhg_Latn-eng_Latn": { - "min_sentence1_length": 52, - "average_sentence1_length": 268.19921875, - "max_sentence1_length": 1116, - "min_sentence2_length": 28, - "average_sentence2_length": 110.27734375, - "max_sentence2_length": 215, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96890 - }, - "eng_Latn-dif_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 21, - "average_sentence2_length": 135.3515625, - "max_sentence2_length": 345, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63423 - }, - "dif_Latn-eng_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 135.3515625, - "max_sentence1_length": 345, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63423 - }, - "eng_Latn-dik_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.18359375, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 126.83984375, - "max_sentence2_length": 350, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61958 - }, - "dik_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 126.83984375, - "max_sentence1_length": 350, - "min_sentence2_length": 24, - "average_sentence2_length": 115.18359375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61958 - }, - "eng_Latn-dji_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 121.51578947368421, - "max_sentence1_length": 259, - "min_sentence2_length": 50, - "average_sentence2_length": 255.96315789473684, - "max_sentence2_length": 933, - "num_samples": 190, - "num_samples_sentence2": 190, - "number_of_characters": 71721 - }, - "dji_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 255.96315789473684, - "max_sentence1_length": 933, - "min_sentence2_length": 26, - "average_sentence2_length": 121.51578947368421, - "max_sentence2_length": 259, - "num_samples": 190, - "num_samples_sentence2": 190, - "number_of_characters": 71721 - }, - "eng_Latn-djk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.34765625, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 181.88671875, - "max_sentence2_length": 717, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75836 - }, - "djk_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 181.88671875, - "max_sentence1_length": 717, - "min_sentence2_length": 24, - "average_sentence2_length": 114.34765625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75836 - }, - "eng_Latn-djr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.65234375, - "max_sentence1_length": 227, - "min_sentence2_length": 59, - "average_sentence2_length": 406.55078125, - "max_sentence2_length": 1457, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 132660 - }, - "djr_Latn-eng_Latn": { - "min_sentence1_length": 59, - "average_sentence1_length": 406.55078125, - "max_sentence1_length": 1457, - "min_sentence2_length": 24, - "average_sentence2_length": 111.65234375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 132660 - }, - "eng_Latn-dob_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.67578125, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 162.1171875, - "max_sentence2_length": 427, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69579 - }, - "dob_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 162.1171875, - "max_sentence1_length": 427, - "min_sentence2_length": 24, - "average_sentence2_length": 109.67578125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69579 - }, - "eng_Latn-dop_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.33203125, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 140.765625, - "max_sentence2_length": 333, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64281 - }, - "dop_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 140.765625, - "max_sentence1_length": 333, - "min_sentence2_length": 24, - "average_sentence2_length": 110.33203125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64281 - }, - "eng_Latn-dov_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 19, - "average_sentence2_length": 116.83984375, - "max_sentence2_length": 294, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58684 - }, - "dov_Latn-eng_Latn": { - "min_sentence1_length": 19, - "average_sentence1_length": 116.83984375, - "max_sentence1_length": 294, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58684 - }, - "eng_Latn-dwr_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 46, - "average_sentence2_length": 173.80859375, - "max_sentence2_length": 480, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81700 - }, - "dwr_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 173.80859375, - "max_sentence1_length": 480, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81700 - }, - "eng_Latn-dww_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 110.01953125, - "max_sentence1_length": 231, - "min_sentence2_length": 31, - "average_sentence2_length": 185.31640625, - "max_sentence2_length": 606, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75606 - }, - "dww_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 185.31640625, - "max_sentence1_length": 606, - "min_sentence2_length": 31, - "average_sentence2_length": 110.01953125, - "max_sentence2_length": 231, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75606 - }, - "eng_Latn-dwy_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 113.89473684210526, - "max_sentence1_length": 257, - "min_sentence2_length": 3, - "average_sentence2_length": 312.593984962406, - "max_sentence2_length": 1213, - "num_samples": 133, - "num_samples_sentence2": 133, - "number_of_characters": 56723 - }, - "dwy_Latn-eng_Latn": { - "min_sentence1_length": 3, - "average_sentence1_length": 312.593984962406, - "max_sentence1_length": 1213, - "min_sentence2_length": 42, - "average_sentence2_length": 113.89473684210526, - "max_sentence2_length": 257, - "num_samples": 133, - "num_samples_sentence2": 133, - "number_of_characters": 56723 - }, - "eng_Latn-ebk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.203125, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 184.87109375, - "max_sentence2_length": 492, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75795 - }, - "ebk_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 184.87109375, - "max_sentence1_length": 492, - "min_sentence2_length": 24, - "average_sentence2_length": 111.203125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75795 - }, - "eng_Latn-eko_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 105.97265625, - "max_sentence1_length": 217, - "min_sentence2_length": 17, - "average_sentence2_length": 120.234375, - "max_sentence2_length": 280, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57909 - }, - "eko_Latn-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 120.234375, - "max_sentence1_length": 280, - "min_sentence2_length": 37, - "average_sentence2_length": 105.97265625, - "max_sentence2_length": 217, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57909 - }, - "eng_Latn-emi_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.42578125, - "max_sentence1_length": 827, - "min_sentence2_length": 29, - "average_sentence2_length": 160.08203125, - "max_sentence2_length": 475, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70274 - }, - "emi_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 160.08203125, - "max_sentence1_length": 475, - "min_sentence2_length": 24, - "average_sentence2_length": 114.42578125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70274 - }, - "eng_Latn-emp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.54296875, - "max_sentence1_length": 227, - "min_sentence2_length": 39, - "average_sentence2_length": 185.9765625, - "max_sentence2_length": 529, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75909 - }, - "emp_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 185.9765625, - "max_sentence1_length": 529, - "min_sentence2_length": 24, - "average_sentence2_length": 110.54296875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75909 - }, - "eng_Latn-enq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.29296875, - "max_sentence1_length": 227, - "min_sentence2_length": 52, - "average_sentence2_length": 234.73046875, - "max_sentence2_length": 800, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89350 - }, - "enq_Latn-eng_Latn": { - "min_sentence1_length": 52, - "average_sentence1_length": 234.73046875, - "max_sentence1_length": 800, - "min_sentence2_length": 24, - "average_sentence2_length": 114.29296875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89350 - }, - "eng_Latn-epo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 23, - "average_sentence2_length": 100.90625, - "max_sentence2_length": 240, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 54872 - }, - "epo_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 100.90625, - "max_sentence1_length": 240, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 54872 - }, - "eng_Latn-eri_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.0703125, - "max_sentence1_length": 827, - "min_sentence2_length": 45, - "average_sentence2_length": 223.96875, - "max_sentence2_length": 854, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86538 - }, - "eri_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 223.96875, - "max_sentence1_length": 854, - "min_sentence2_length": 24, - "average_sentence2_length": 114.0703125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86538 - }, - "eng_Latn-ese_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.25, - "max_sentence1_length": 227, - "min_sentence2_length": 58, - "average_sentence2_length": 298.2109375, - "max_sentence2_length": 934, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104310 - }, - "ese_Latn-eng_Latn": { - "min_sentence1_length": 58, - "average_sentence1_length": 298.2109375, - "max_sentence1_length": 934, - "min_sentence2_length": 24, - "average_sentence2_length": 109.25, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104310 - }, - "eng_Latn-esk_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.61328125, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 156.62890625, - "max_sentence2_length": 463, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69438 - }, - "esk_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 156.62890625, - "max_sentence1_length": 463, - "min_sentence2_length": 31, - "average_sentence2_length": 114.61328125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69438 - }, - "eng_Latn-etr_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 109.7265625, - "max_sentence1_length": 243, - "min_sentence2_length": 27, - "average_sentence2_length": 160.1171875, - "max_sentence2_length": 424, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69080 - }, - "etr_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 160.1171875, - "max_sentence1_length": 424, - "min_sentence2_length": 31, - "average_sentence2_length": 109.7265625, - "max_sentence2_length": 243, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69080 - }, - "eng_Latn-ewe_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 24, - "average_sentence2_length": 129.68359375, - "max_sentence2_length": 305, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62734 - }, - "ewe_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 129.68359375, - "max_sentence1_length": 305, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62734 - }, - "eng_Latn-faa_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.94921875, - "max_sentence1_length": 227, - "min_sentence2_length": 58, - "average_sentence2_length": 272.8984375, - "max_sentence2_length": 925, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98521 - }, - "faa_Latn-eng_Latn": { - "min_sentence1_length": 58, - "average_sentence1_length": 272.8984375, - "max_sentence1_length": 925, - "min_sentence2_length": 24, - "average_sentence2_length": 111.94921875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98521 - }, - "eng_Latn-fai_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.20703125, - "max_sentence1_length": 227, - "min_sentence2_length": 38, - "average_sentence2_length": 193.04296875, - "max_sentence2_length": 578, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77632 - }, - "fai_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 193.04296875, - "max_sentence1_length": 578, - "min_sentence2_length": 24, - "average_sentence2_length": 110.20703125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77632 - }, - "eng_Latn-far_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.6328125, - "max_sentence1_length": 227, - "min_sentence2_length": 40, - "average_sentence2_length": 192.46484375, - "max_sentence2_length": 640, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77337 - }, - "far_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 192.46484375, - "max_sentence1_length": 640, - "min_sentence2_length": 24, - "average_sentence2_length": 109.6328125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77337 - }, - "eng_Latn-ffm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.828125, - "max_sentence1_length": 827, - "min_sentence2_length": 1, - "average_sentence2_length": 122.6015625, - "max_sentence2_length": 361, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60526 - }, - "ffm_Latn-eng_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 122.6015625, - "max_sentence1_length": 361, - "min_sentence2_length": 24, - "average_sentence2_length": 113.828125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60526 - }, - "eng_Latn-for_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.94921875, - "max_sentence1_length": 227, - "min_sentence2_length": 27, - "average_sentence2_length": 156.578125, - "max_sentence2_length": 363, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68231 - }, - "for_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 156.578125, - "max_sentence1_length": 363, - "min_sentence2_length": 24, - "average_sentence2_length": 109.94921875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68231 - }, - "eng_Latn-fra_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 44, - "average_sentence2_length": 155.83203125, - "max_sentence2_length": 386, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77098 - }, - "fra_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 155.83203125, - "max_sentence1_length": 386, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77098 - }, - "eng_Latn-fue_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 116.5546875, - "max_sentence1_length": 827, - "min_sentence2_length": 28, - "average_sentence2_length": 109.19921875, - "max_sentence2_length": 247, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57793 - }, - "fue_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 109.19921875, - "max_sentence1_length": 247, - "min_sentence2_length": 31, - "average_sentence2_length": 116.5546875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57793 - }, - "eng_Latn-fuf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.234375, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 119.56640625, - "max_sentence2_length": 264, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59597 - }, - "fuf_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 119.56640625, - "max_sentence1_length": 264, - "min_sentence2_length": 24, - "average_sentence2_length": 113.234375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59597 - }, - "eng_Latn-fuh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.3046875, - "max_sentence1_length": 827, - "min_sentence2_length": 1, - "average_sentence2_length": 120.66015625, - "max_sentence2_length": 295, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60407 - }, - "fuh_Latn-eng_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 120.66015625, - "max_sentence1_length": 295, - "min_sentence2_length": 24, - "average_sentence2_length": 115.3046875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60407 - }, - "eng_Latn-gah_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.3125, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 189.37890625, - "max_sentence2_length": 539, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76721 - }, - "gah_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 189.37890625, - "max_sentence1_length": 539, - "min_sentence2_length": 24, - "average_sentence2_length": 110.3125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76721 - }, - "eng_Latn-gai_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.75390625, - "max_sentence1_length": 227, - "min_sentence2_length": 40, - "average_sentence2_length": 241.88671875, - "max_sentence2_length": 919, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90020 - }, - "gai_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 241.88671875, - "max_sentence1_length": 919, - "min_sentence2_length": 24, - "average_sentence2_length": 109.75390625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90020 - }, - "eng_Latn-gam_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.83984375, - "max_sentence1_length": 376, - "min_sentence2_length": 40, - "average_sentence2_length": 183.171875, - "max_sentence2_length": 607, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75523 - }, - "gam_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 183.171875, - "max_sentence1_length": 607, - "min_sentence2_length": 24, - "average_sentence2_length": 111.83984375, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75523 - }, - "eng_Latn-gaw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.5, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 200.12109375, - "max_sentence2_length": 771, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79263 - }, - "gaw_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 200.12109375, - "max_sentence1_length": 771, - "min_sentence2_length": 24, - "average_sentence2_length": 109.5, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79263 - }, - "eng_Latn-gdn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.8984375, - "max_sentence1_length": 227, - "min_sentence2_length": 52, - "average_sentence2_length": 292.9765625, - "max_sentence2_length": 1210, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103392 - }, - "gdn_Latn-eng_Latn": { - "min_sentence1_length": 52, - "average_sentence1_length": 292.9765625, - "max_sentence1_length": 1210, - "min_sentence2_length": 24, - "average_sentence2_length": 110.8984375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103392 - }, - "eng_Latn-gdr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.28515625, - "max_sentence1_length": 227, - "min_sentence2_length": 45, - "average_sentence2_length": 219.1953125, - "max_sentence2_length": 620, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84347 - }, - "gdr_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 219.1953125, - "max_sentence1_length": 620, - "min_sentence2_length": 24, - "average_sentence2_length": 110.28515625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84347 - }, - "eng_Latn-geb_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 115.3046875, - "max_sentence1_length": 827, - "min_sentence2_length": 41, - "average_sentence2_length": 248.74609375, - "max_sentence2_length": 830, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93197 - }, - "geb_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 248.74609375, - "max_sentence1_length": 830, - "min_sentence2_length": 21, - "average_sentence2_length": 115.3046875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93197 - }, - "eng_Latn-gfk_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 146.45703125, - "max_sentence1_length": 341, - "min_sentence2_length": 41, - "average_sentence2_length": 215.09375, - "max_sentence2_length": 553, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92557 - }, - "gfk_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 215.09375, - "max_sentence1_length": 553, - "min_sentence2_length": 1, - "average_sentence2_length": 146.45703125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92557 - }, - "eng_Latn-ghs_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 107.6953125, - "max_sentence1_length": 248, - "min_sentence2_length": 42, - "average_sentence2_length": 213.65234375, - "max_sentence2_length": 845, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82265 - }, - "ghs_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 213.65234375, - "max_sentence1_length": 845, - "min_sentence2_length": 21, - "average_sentence2_length": 107.6953125, - "max_sentence2_length": 248, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82265 - }, - "eng_Latn-glk_Arab": { - "min_sentence1_length": 40, - "average_sentence1_length": 101.6774193548387, - "max_sentence1_length": 209, - "min_sentence2_length": 31, - "average_sentence2_length": 93.04301075268818, - "max_sentence2_length": 205, - "num_samples": 93, - "num_samples_sentence2": 93, - "number_of_characters": 18109 - }, - "glk_Arab-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 93.04301075268818, - "max_sentence1_length": 205, - "min_sentence2_length": 40, - "average_sentence2_length": 101.6774193548387, - "max_sentence2_length": 209, - "num_samples": 93, - "num_samples_sentence2": 93, - "number_of_characters": 18109 - }, - "eng_Latn-gmv_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 41, - "average_sentence2_length": 152.296875, - "max_sentence2_length": 399, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76193 - }, - "gmv_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 152.296875, - "max_sentence1_length": 399, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76193 - }, - "eng_Latn-gng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.6640625, - "max_sentence1_length": 827, - "min_sentence2_length": 24, - "average_sentence2_length": 125.60546875, - "max_sentence2_length": 372, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61253 - }, - "gng_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 125.60546875, - "max_sentence1_length": 372, - "min_sentence2_length": 24, - "average_sentence2_length": 113.6640625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61253 - }, - "eng_Latn-gnn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.2265625, - "max_sentence1_length": 232, - "min_sentence2_length": 64, - "average_sentence2_length": 520.5859375, - "max_sentence2_length": 2125, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 161744 - }, - "gnn_Latn-eng_Latn": { - "min_sentence1_length": 64, - "average_sentence1_length": 520.5859375, - "max_sentence1_length": 2125, - "min_sentence2_length": 24, - "average_sentence2_length": 111.2265625, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 161744 - }, - "eng_Latn-gnw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.3984375, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 167.72265625, - "max_sentence2_length": 575, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72479 - }, - "gnw_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 167.72265625, - "max_sentence1_length": 575, - "min_sentence2_length": 24, - "average_sentence2_length": 115.3984375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72479 - }, - "eng_Latn-gof_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 34, - "average_sentence2_length": 141.16015625, - "max_sentence2_length": 385, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73342 - }, - "gof_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 141.16015625, - "max_sentence1_length": 385, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73342 - }, - "eng_Latn-grc_Grek": { - "min_sentence1_length": 56, - "average_sentence1_length": 146.3203125, - "max_sentence1_length": 341, - "min_sentence2_length": 44, - "average_sentence2_length": 144.91015625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74555 - }, - "grc_Grek-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 144.91015625, - "max_sentence1_length": 341, - "min_sentence2_length": 56, - "average_sentence2_length": 146.3203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74555 - }, - "eng_Latn-gub_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 110.65625, - "max_sentence1_length": 227, - "min_sentence2_length": 31, - "average_sentence2_length": 287.85546875, - "max_sentence2_length": 1950, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102019 - }, - "gub_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 287.85546875, - "max_sentence1_length": 1950, - "min_sentence2_length": 31, - "average_sentence2_length": 110.65625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102019 - }, - "eng_Latn-guh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.51953125, - "max_sentence1_length": 227, - "min_sentence2_length": 41, - "average_sentence2_length": 334.57421875, - "max_sentence2_length": 1331, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 113944 - }, - "guh_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 334.57421875, - "max_sentence1_length": 1331, - "min_sentence2_length": 24, - "average_sentence2_length": 110.51953125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 113944 - }, - "eng_Latn-gui_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.3984375, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 169.70703125, - "max_sentence2_length": 557, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72987 - }, - "gui_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 169.70703125, - "max_sentence1_length": 557, - "min_sentence2_length": 24, - "average_sentence2_length": 115.3984375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72987 - }, - "eng_Latn-guj_Gujr": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 19, - "average_sentence2_length": 105.0625, - "max_sentence2_length": 237, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 55669 - }, - "guj_Gujr-eng_Latn": { - "min_sentence1_length": 19, - "average_sentence1_length": 105.0625, - "max_sentence1_length": 237, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 55669 - }, - "eng_Latn-gul_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.296875, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 159.125, - "max_sentence2_length": 391, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70252 - }, - "gul_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 159.125, - "max_sentence1_length": 391, - "min_sentence2_length": 24, - "average_sentence2_length": 115.296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70252 - }, - "eng_Latn-gum_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.81640625, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 183.4296875, - "max_sentence2_length": 417, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75071 - }, - "gum_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 183.4296875, - "max_sentence1_length": 417, - "min_sentence2_length": 24, - "average_sentence2_length": 109.81640625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75071 - }, - "eng_Latn-gun_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 17, - "average_sentence2_length": 133.4765625, - "max_sentence2_length": 314, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62943 - }, - "gun_Latn-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 133.4765625, - "max_sentence1_length": 314, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62943 - }, - "eng_Latn-guo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.98046875, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 233.2109375, - "max_sentence2_length": 700, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88369 - }, - "guo_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 233.2109375, - "max_sentence1_length": 700, - "min_sentence2_length": 24, - "average_sentence2_length": 111.98046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88369 - }, - "eng_Latn-gup_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.01171875, - "max_sentence1_length": 227, - "min_sentence2_length": 75, - "average_sentence2_length": 300.48828125, - "max_sentence2_length": 1198, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 105600 - }, - "gup_Latn-eng_Latn": { - "min_sentence1_length": 75, - "average_sentence1_length": 300.48828125, - "max_sentence1_length": 1198, - "min_sentence2_length": 24, - "average_sentence2_length": 112.01171875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 105600 - }, - "eng_Latn-gux_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 45, - "average_sentence2_length": 155.8671875, - "max_sentence2_length": 379, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77107 - }, - "gux_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 155.8671875, - "max_sentence1_length": 379, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77107 - }, - "eng_Latn-gvc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.828125, - "max_sentence1_length": 227, - "min_sentence2_length": 33, - "average_sentence2_length": 251.8515625, - "max_sentence2_length": 832, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93358 - }, - "gvc_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 251.8515625, - "max_sentence1_length": 832, - "min_sentence2_length": 24, - "average_sentence2_length": 112.828125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93358 - }, - "eng_Latn-gvf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.30078125, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 225.60546875, - "max_sentence2_length": 783, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87272 - }, - "gvf_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 225.60546875, - "max_sentence1_length": 783, - "min_sentence2_length": 24, - "average_sentence2_length": 115.30078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87272 - }, - "eng_Latn-gvn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.87890625, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 244.57421875, - "max_sentence2_length": 1121, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91252 - }, - "gvn_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 244.57421875, - "max_sentence1_length": 1121, - "min_sentence2_length": 24, - "average_sentence2_length": 111.87890625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91252 - }, - "eng_Latn-gvs_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.875, - "max_sentence1_length": 227, - "min_sentence2_length": 34, - "average_sentence2_length": 192.9140625, - "max_sentence2_length": 663, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78026 - }, - "gvs_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 192.9140625, - "max_sentence1_length": 663, - "min_sentence2_length": 24, - "average_sentence2_length": 111.875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78026 - }, - "eng_Latn-gwi_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.51171875, - "max_sentence1_length": 227, - "min_sentence2_length": 61, - "average_sentence2_length": 209.765625, - "max_sentence2_length": 749, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82247 - }, - "gwi_Latn-eng_Latn": { - "min_sentence1_length": 61, - "average_sentence1_length": 209.765625, - "max_sentence1_length": 749, - "min_sentence2_length": 24, - "average_sentence2_length": 111.51171875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82247 - }, - "eng_Latn-gym_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 115.24609375, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 255.47265625, - "max_sentence2_length": 810, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94904 - }, - "gym_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 255.47265625, - "max_sentence1_length": 810, - "min_sentence2_length": 31, - "average_sentence2_length": 115.24609375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94904 - }, - "eng_Latn-gyr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.3203125, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 165.68359375, - "max_sentence2_length": 643, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71681 - }, - "gyr_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 165.68359375, - "max_sentence1_length": 643, - "min_sentence2_length": 24, - "average_sentence2_length": 114.3203125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71681 - }, - "eng_Latn-hat_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 39, - "average_sentence2_length": 136.62109375, - "max_sentence2_length": 328, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72750 - }, - "hat_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 136.62109375, - "max_sentence1_length": 328, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72750 - }, - "eng_Latn-hau_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 44, - "average_sentence2_length": 144.5625, - "max_sentence2_length": 317, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74783 - }, - "hau_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 144.5625, - "max_sentence1_length": 317, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74783 - }, - "eng_Latn-haw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 20, - "average_sentence2_length": 125.9140625, - "max_sentence2_length": 289, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61274 - }, - "haw_Latn-eng_Latn": { - "min_sentence1_length": 20, - "average_sentence1_length": 125.9140625, - "max_sentence1_length": 289, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61274 - }, - "eng_Latn-hbo_Hebr": { - "min_sentence1_length": 32, - "average_sentence1_length": 151.23828125, - "max_sentence1_length": 305, - "min_sentence2_length": 38, - "average_sentence2_length": 140.0703125, - "max_sentence2_length": 249, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74575 - }, - "hbo_Hebr-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 140.0703125, - "max_sentence1_length": 249, - "min_sentence2_length": 32, - "average_sentence2_length": 151.23828125, - "max_sentence2_length": 305, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74575 - }, - "eng_Latn-hch_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.94921875, - "max_sentence1_length": 827, - "min_sentence2_length": 22, - "average_sentence2_length": 153.8828125, - "max_sentence2_length": 334, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68309 - }, - "hch_Latn-eng_Latn": { - "min_sentence1_length": 22, - "average_sentence1_length": 153.8828125, - "max_sentence1_length": 334, - "min_sentence2_length": 24, - "average_sentence2_length": 112.94921875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68309 - }, - "eng_Latn-heb_Hebr": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 8, - "average_sentence2_length": 66.01171875, - "max_sentence2_length": 145, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 45939 - }, - "heb_Hebr-eng_Latn": { - "min_sentence1_length": 8, - "average_sentence1_length": 66.01171875, - "max_sentence1_length": 145, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 45939 - }, - "eng_Latn-heg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.07421875, - "max_sentence1_length": 251, - "min_sentence2_length": 40, - "average_sentence2_length": 272.94921875, - "max_sentence2_length": 1307, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99334 - }, - "heg_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 272.94921875, - "max_sentence1_length": 1307, - "min_sentence2_length": 24, - "average_sentence2_length": 115.07421875, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99334 - }, - "eng_Latn-hin_Deva": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 17, - "average_sentence2_length": 112.6328125, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57607 - }, - "hin_Deva-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 112.6328125, - "max_sentence1_length": 251, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57607 - }, - "eng_Latn-hix_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.28515625, - "max_sentence1_length": 230, - "min_sentence2_length": 26, - "average_sentence2_length": 326.07421875, - "max_sentence2_length": 1365, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 111196 - }, - "hix_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 326.07421875, - "max_sentence1_length": 1365, - "min_sentence2_length": 24, - "average_sentence2_length": 108.28515625, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 111196 - }, - "eng_Latn-hla_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.8203125, - "max_sentence1_length": 232, - "min_sentence2_length": 45, - "average_sentence2_length": 215.89453125, - "max_sentence2_length": 721, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84151 - }, - "hla_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 215.89453125, - "max_sentence1_length": 721, - "min_sentence2_length": 24, - "average_sentence2_length": 112.8203125, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84151 - }, - "eng_Latn-hlt_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 41, - "average_sentence2_length": 158.6796875, - "max_sentence2_length": 362, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77827 - }, - "hlt_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 158.6796875, - "max_sentence1_length": 362, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77827 - }, - "eng_Latn-hmo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.375, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 189.68359375, - "max_sentence2_length": 389, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78095 - }, - "hmo_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 189.68359375, - "max_sentence1_length": 389, - "min_sentence2_length": 24, - "average_sentence2_length": 115.375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78095 - }, - "eng_Latn-hns_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.45703125, - "max_sentence1_length": 230, - "min_sentence2_length": 31, - "average_sentence2_length": 179.94140625, - "max_sentence2_length": 917, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74854 - }, - "hns_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 179.94140625, - "max_sentence1_length": 917, - "min_sentence2_length": 24, - "average_sentence2_length": 112.45703125, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74854 - }, - "eng_Latn-hop_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.4609375, - "max_sentence1_length": 239, - "min_sentence2_length": 31, - "average_sentence2_length": 145.7421875, - "max_sentence2_length": 568, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65844 - }, - "hop_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 145.7421875, - "max_sentence1_length": 568, - "min_sentence2_length": 24, - "average_sentence2_length": 111.4609375, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65844 - }, - "eng_Latn-hot_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.5078125, - "max_sentence1_length": 227, - "min_sentence2_length": 35, - "average_sentence2_length": 172.14453125, - "max_sentence2_length": 638, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71847 - }, - "hot_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 172.14453125, - "max_sentence1_length": 638, - "min_sentence2_length": 24, - "average_sentence2_length": 108.5078125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71847 - }, - "eng_Latn-hrv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.0703125, - "max_sentence1_length": 827, - "min_sentence2_length": 1, - "average_sentence2_length": 88.1328125, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 51252 - }, - "hrv_Latn-eng_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 88.1328125, - "max_sentence1_length": 232, - "min_sentence2_length": 24, - "average_sentence2_length": 112.0703125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 51252 - }, - "eng_Latn-hto_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.77734375, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 206.19140625, - "max_sentence2_length": 990, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81912 - }, - "hto_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 206.19140625, - "max_sentence1_length": 990, - "min_sentence2_length": 24, - "average_sentence2_length": 113.77734375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81912 - }, - "eng_Latn-hub_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.31640625, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 189.55859375, - "max_sentence2_length": 946, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76256 - }, - "hub_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 189.55859375, - "max_sentence1_length": 946, - "min_sentence2_length": 24, - "average_sentence2_length": 108.31640625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76256 - }, - "eng_Latn-hui_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 108.33203125, - "max_sentence1_length": 242, - "min_sentence2_length": 46, - "average_sentence2_length": 213.171875, - "max_sentence2_length": 597, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82305 - }, - "hui_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 213.171875, - "max_sentence1_length": 597, - "min_sentence2_length": 23, - "average_sentence2_length": 108.33203125, - "max_sentence2_length": 242, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82305 - }, - "eng_Latn-hun_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4453125, - "max_sentence1_length": 827, - "min_sentence2_length": 24, - "average_sentence2_length": 109.4375, - "max_sentence2_length": 305, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57058 - }, - "hun_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.4375, - "max_sentence1_length": 305, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4453125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57058 - }, - "eng_Latn-hus_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 60, - "average_sentence2_length": 214.11328125, - "max_sentence2_length": 558, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92588 - }, - "hus_Latn-eng_Latn": { - "min_sentence1_length": 60, - "average_sentence1_length": 214.11328125, - "max_sentence1_length": 558, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92588 - }, - "eng_Latn-huu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.05859375, - "max_sentence1_length": 228, - "min_sentence2_length": 27, - "average_sentence2_length": 193.46875, - "max_sentence2_length": 812, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77703 - }, - "huu_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 193.46875, - "max_sentence1_length": 812, - "min_sentence2_length": 24, - "average_sentence2_length": 110.05859375, - "max_sentence2_length": 228, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77703 - }, - "eng_Latn-huv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 40, - "average_sentence2_length": 173.2578125, - "max_sentence2_length": 400, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73889 - }, - "huv_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 173.2578125, - "max_sentence1_length": 400, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73889 - }, - "eng_Latn-hvn_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 113.0952380952381, - "max_sentence1_length": 257, - "min_sentence2_length": 61, - "average_sentence2_length": 212.65079365079364, - "max_sentence2_length": 763, - "num_samples": 126, - "num_samples_sentence2": 126, - "number_of_characters": 41044 - }, - "hvn_Latn-eng_Latn": { - "min_sentence1_length": 61, - "average_sentence1_length": 212.65079365079364, - "max_sentence1_length": 763, - "min_sentence2_length": 45, - "average_sentence2_length": 113.0952380952381, - "max_sentence2_length": 257, - "num_samples": 126, - "num_samples_sentence2": 126, - "number_of_characters": 41044 - }, - "eng_Latn-ian_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.0234375, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 230.6953125, - "max_sentence2_length": 718, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87736 - }, - "ian_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 230.6953125, - "max_sentence1_length": 718, - "min_sentence2_length": 24, - "average_sentence2_length": 112.0234375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87736 - }, - "eng_Latn-ign_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.6796875, - "max_sentence1_length": 271, - "min_sentence2_length": 58, - "average_sentence2_length": 362.24609375, - "max_sentence2_length": 1229, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 122093 - }, - "ign_Latn-eng_Latn": { - "min_sentence1_length": 58, - "average_sentence1_length": 362.24609375, - "max_sentence1_length": 1229, - "min_sentence2_length": 24, - "average_sentence2_length": 114.6796875, - "max_sentence2_length": 271, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 122093 - }, - "eng_Latn-ikk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 34, - "average_sentence2_length": 130.39453125, - "max_sentence2_length": 338, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62154 - }, - "ikk_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 130.39453125, - "max_sentence1_length": 338, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62154 - }, - "eng_Latn-ikw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 28, - "average_sentence2_length": 117.47265625, - "max_sentence2_length": 288, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58846 - }, - "ikw_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 117.47265625, - "max_sentence1_length": 288, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58846 - }, - "eng_Latn-ilo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 25, - "average_sentence2_length": 140.42578125, - "max_sentence2_length": 314, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64722 - }, - "ilo_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 140.42578125, - "max_sentence1_length": 314, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64722 - }, - "eng_Latn-imo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.90234375, - "max_sentence1_length": 227, - "min_sentence2_length": 61, - "average_sentence2_length": 304.0078125, - "max_sentence2_length": 1169, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106217 - }, - "imo_Latn-eng_Latn": { - "min_sentence1_length": 61, - "average_sentence1_length": 304.0078125, - "max_sentence1_length": 1169, - "min_sentence2_length": 24, - "average_sentence2_length": 110.90234375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106217 - }, - "eng_Latn-inb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.0390625, - "max_sentence1_length": 827, - "min_sentence2_length": 51, - "average_sentence2_length": 195.54296875, - "max_sentence2_length": 520, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79253 - }, - "inb_Latn-eng_Latn": { - "min_sentence1_length": 51, - "average_sentence1_length": 195.54296875, - "max_sentence1_length": 520, - "min_sentence2_length": 24, - "average_sentence2_length": 114.0390625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79253 - }, - "eng_Latn-ind_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 146.52734375, - "max_sentence1_length": 341, - "min_sentence2_length": 41, - "average_sentence2_length": 181.54296875, - "max_sentence2_length": 504, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83986 - }, - "ind_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 181.54296875, - "max_sentence1_length": 504, - "min_sentence2_length": 35, - "average_sentence2_length": 146.52734375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83986 - }, - "eng_Latn-ino_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.29296875, - "max_sentence1_length": 230, - "min_sentence2_length": 54, - "average_sentence2_length": 299.77734375, - "max_sentence2_length": 940, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104722 - }, - "ino_Latn-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 299.77734375, - "max_sentence1_length": 940, - "min_sentence2_length": 24, - "average_sentence2_length": 109.29296875, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104722 - }, - "eng_Latn-iou_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.2890625, - "max_sentence1_length": 239, - "min_sentence2_length": 45, - "average_sentence2_length": 218.77734375, - "max_sentence2_length": 810, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84753 - }, - "iou_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 218.77734375, - "max_sentence1_length": 810, - "min_sentence2_length": 24, - "average_sentence2_length": 112.2890625, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84753 - }, - "eng_Latn-ipi_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 116.83203125, - "max_sentence1_length": 273, - "min_sentence2_length": 113, - "average_sentence2_length": 594.37109375, - "max_sentence2_length": 1750, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 182068 - }, - "ipi_Latn-eng_Latn": { - "min_sentence1_length": 113, - "average_sentence1_length": 594.37109375, - "max_sentence1_length": 1750, - "min_sentence2_length": 24, - "average_sentence2_length": 116.83203125, - "max_sentence2_length": 273, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 182068 - }, - "eng_Latn-isn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 27, - "average_sentence2_length": 121.984375, - "max_sentence2_length": 325, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60001 - }, - "isn_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 121.984375, - "max_sentence1_length": 325, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60001 - }, - "eng_Latn-ita_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 146.515625, - "max_sentence1_length": 341, - "min_sentence2_length": 40, - "average_sentence2_length": 146.9140625, - "max_sentence2_length": 381, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75118 - }, - "ita_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 146.9140625, - "max_sentence1_length": 381, - "min_sentence2_length": 56, - "average_sentence2_length": 146.515625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75118 - }, - "eng_Latn-iws_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 30, - "average_sentence2_length": 296.91015625, - "max_sentence2_length": 854, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104782 - }, - "iws_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 296.91015625, - "max_sentence1_length": 854, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104782 - }, - "eng_Latn-ixl_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 146.31640625, - "max_sentence1_length": 341, - "min_sentence2_length": 47, - "average_sentence2_length": 244.671875, - "max_sentence2_length": 565, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100093 - }, - "ixl_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 244.671875, - "max_sentence1_length": 565, - "min_sentence2_length": 35, - "average_sentence2_length": 146.31640625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100093 - }, - "eng_Latn-jac_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 116.2421875, - "max_sentence1_length": 827, - "min_sentence2_length": 34, - "average_sentence2_length": 182.84765625, - "max_sentence2_length": 573, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76567 - }, - "jac_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 182.84765625, - "max_sentence1_length": 573, - "min_sentence2_length": 31, - "average_sentence2_length": 116.2421875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76567 - }, - "eng_Latn-jae_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.0625, - "max_sentence1_length": 827, - "min_sentence2_length": 24, - "average_sentence2_length": 127.1328125, - "max_sentence2_length": 294, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61746 - }, - "jae_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 127.1328125, - "max_sentence1_length": 294, - "min_sentence2_length": 24, - "average_sentence2_length": 114.0625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61746 - }, - "eng_Latn-jao_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 116.4724409448819, - "max_sentence1_length": 248, - "min_sentence2_length": 59, - "average_sentence2_length": 414.5511811023622, - "max_sentence2_length": 1292, - "num_samples": 127, - "num_samples_sentence2": 127, - "number_of_characters": 67440 - }, - "jao_Latn-eng_Latn": { - "min_sentence1_length": 59, - "average_sentence1_length": 414.5511811023622, - "max_sentence1_length": 1292, - "min_sentence2_length": 34, - "average_sentence2_length": 116.4724409448819, - "max_sentence2_length": 248, - "num_samples": 127, - "num_samples_sentence2": 127, - "number_of_characters": 67440 - }, - "eng_Latn-jic_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.5078125, - "max_sentence1_length": 827, - "min_sentence2_length": 40, - "average_sentence2_length": 236.2578125, - "max_sentence2_length": 691, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89796 - }, - "jic_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 236.2578125, - "max_sentence1_length": 691, - "min_sentence2_length": 24, - "average_sentence2_length": 114.5078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89796 - }, - "eng_Latn-jid_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.1484375, - "max_sentence1_length": 827, - "min_sentence2_length": 16, - "average_sentence2_length": 96.96484375, - "max_sentence2_length": 269, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 54301 - }, - "jid_Latn-eng_Latn": { - "min_sentence1_length": 16, - "average_sentence1_length": 96.96484375, - "max_sentence1_length": 269, - "min_sentence2_length": 24, - "average_sentence2_length": 115.1484375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 54301 - }, - "eng_Latn-jiv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.49609375, - "max_sentence1_length": 230, - "min_sentence2_length": 38, - "average_sentence2_length": 175.2890625, - "max_sentence2_length": 584, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73161 - }, - "jiv_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 175.2890625, - "max_sentence1_length": 584, - "min_sentence2_length": 24, - "average_sentence2_length": 110.49609375, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73161 - }, - "eng_Latn-jni_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.625, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 111.0703125, - "max_sentence2_length": 255, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57522 - }, - "jni_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 111.0703125, - "max_sentence1_length": 255, - "min_sentence2_length": 24, - "average_sentence2_length": 113.625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57522 - }, - "eng_Latn-jpn_Jpan": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.05859375, - "max_sentence1_length": 227, - "min_sentence2_length": 13, - "average_sentence2_length": 57.05078125, - "max_sentence2_length": 116, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 43036 - }, - "jpn_Jpan-eng_Latn": { - "min_sentence1_length": 13, - "average_sentence1_length": 57.05078125, - "max_sentence1_length": 116, - "min_sentence2_length": 24, - "average_sentence2_length": 111.05859375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 43036 - }, - "eng_Latn-jvn_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 112.91015625, - "max_sentence1_length": 227, - "min_sentence2_length": 39, - "average_sentence2_length": 230.34765625, - "max_sentence2_length": 904, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87874 - }, - "jvn_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 230.34765625, - "max_sentence1_length": 904, - "min_sentence2_length": 31, - "average_sentence2_length": 112.91015625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87874 - }, - "eng_Latn-kan_Knda": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.9609375, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 126.96484375, - "max_sentence2_length": 407, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61165 - }, - "kan_Knda-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 126.96484375, - "max_sentence1_length": 407, - "min_sentence2_length": 24, - "average_sentence2_length": 111.9609375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61165 - }, - "eng_Latn-kaq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.203125, - "max_sentence1_length": 227, - "min_sentence2_length": 32, - "average_sentence2_length": 171.16015625, - "max_sentence2_length": 551, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72285 - }, - "kaq_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 171.16015625, - "max_sentence1_length": 551, - "min_sentence2_length": 24, - "average_sentence2_length": 111.203125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72285 - }, - "eng_Latn-kbc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.796875, - "max_sentence1_length": 827, - "min_sentence2_length": 51, - "average_sentence2_length": 264.6015625, - "max_sentence2_length": 948, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97126 - }, - "kbc_Latn-eng_Latn": { - "min_sentence1_length": 51, - "average_sentence1_length": 264.6015625, - "max_sentence1_length": 948, - "min_sentence2_length": 24, - "average_sentence2_length": 114.796875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97126 - }, - "eng_Latn-kbh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.74609375, - "max_sentence1_length": 227, - "min_sentence2_length": 27, - "average_sentence2_length": 253.4609375, - "max_sentence2_length": 903, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93237 - }, - "kbh_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 253.4609375, - "max_sentence1_length": 903, - "min_sentence2_length": 24, - "average_sentence2_length": 110.74609375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93237 - }, - "eng_Latn-kbm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.171875, - "max_sentence1_length": 227, - "min_sentence2_length": 38, - "average_sentence2_length": 209.3125, - "max_sentence2_length": 748, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81788 - }, - "kbm_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 209.3125, - "max_sentence1_length": 748, - "min_sentence2_length": 24, - "average_sentence2_length": 110.171875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81788 - }, - "eng_Latn-kbq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.42578125, - "max_sentence1_length": 827, - "min_sentence2_length": 44, - "average_sentence2_length": 151.98046875, - "max_sentence2_length": 381, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68456 - }, - "kbq_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 151.98046875, - "max_sentence1_length": 381, - "min_sentence2_length": 24, - "average_sentence2_length": 115.42578125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68456 - }, - "eng_Latn-kdc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 29, - "average_sentence2_length": 127.23828125, - "max_sentence2_length": 285, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61346 - }, - "kdc_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 127.23828125, - "max_sentence1_length": 285, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61346 - }, - "eng_Latn-kde_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.59375, - "max_sentence1_length": 376, - "min_sentence2_length": 27, - "average_sentence2_length": 155.28125, - "max_sentence2_length": 424, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68320 - }, - "kde_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 155.28125, - "max_sentence1_length": 424, - "min_sentence2_length": 24, - "average_sentence2_length": 111.59375, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68320 - }, - "eng_Latn-kdl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.4921875, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 181.47265625, - "max_sentence2_length": 664, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74999 - }, - "kdl_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 181.47265625, - "max_sentence1_length": 664, - "min_sentence2_length": 24, - "average_sentence2_length": 111.4921875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74999 - }, - "eng_Latn-kek_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.296875, - "max_sentence1_length": 827, - "min_sentence2_length": 40, - "average_sentence2_length": 182.1640625, - "max_sentence2_length": 517, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76150 - }, - "kek_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 182.1640625, - "max_sentence1_length": 517, - "min_sentence2_length": 24, - "average_sentence2_length": 115.296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76150 - }, - "eng_Latn-ken_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.41796875, - "max_sentence1_length": 227, - "min_sentence2_length": 29, - "average_sentence2_length": 152.58203125, - "max_sentence2_length": 419, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67584 - }, - "ken_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 152.58203125, - "max_sentence1_length": 419, - "min_sentence2_length": 24, - "average_sentence2_length": 111.41796875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67584 - }, - "eng_Latn-kew_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.6328125, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 182.625, - "max_sentence2_length": 531, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74818 - }, - "kew_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 182.625, - "max_sentence1_length": 531, - "min_sentence2_length": 24, - "average_sentence2_length": 109.6328125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74818 - }, - "eng_Latn-kgf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.9609375, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 150.54296875, - "max_sentence2_length": 389, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66689 - }, - "kgf_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 150.54296875, - "max_sentence1_length": 389, - "min_sentence2_length": 24, - "average_sentence2_length": 109.9609375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66689 - }, - "eng_Latn-kgk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.0625, - "max_sentence1_length": 271, - "min_sentence2_length": 46, - "average_sentence2_length": 345.99609375, - "max_sentence2_length": 1240, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 117007 - }, - "kgk_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 345.99609375, - "max_sentence1_length": 1240, - "min_sentence2_length": 24, - "average_sentence2_length": 111.0625, - "max_sentence2_length": 271, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 117007 - }, - "eng_Latn-kgp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.8984375, - "max_sentence1_length": 228, - "min_sentence2_length": 17, - "average_sentence2_length": 180.0546875, - "max_sentence2_length": 529, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74740 - }, - "kgp_Latn-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 180.0546875, - "max_sentence1_length": 529, - "min_sentence2_length": 24, - "average_sentence2_length": 111.8984375, - "max_sentence2_length": 228, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74740 - }, - "eng_Latn-khs_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.5859375, - "max_sentence1_length": 227, - "min_sentence2_length": 52, - "average_sentence2_length": 277.28125, - "max_sentence2_length": 953, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99038 - }, - "khs_Latn-eng_Latn": { - "min_sentence1_length": 52, - "average_sentence1_length": 277.28125, - "max_sentence1_length": 953, - "min_sentence2_length": 24, - "average_sentence2_length": 109.5859375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99038 - }, - "eng_Latn-khz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.4921875, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 166.2734375, - "max_sentence2_length": 750, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71876 - }, - "khz_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 166.2734375, - "max_sentence1_length": 750, - "min_sentence2_length": 24, - "average_sentence2_length": 114.4921875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71876 - }, - "eng_Latn-kik_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.34765625, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 132.87890625, - "max_sentence2_length": 353, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62010 - }, - "kik_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 132.87890625, - "max_sentence1_length": 353, - "min_sentence2_length": 24, - "average_sentence2_length": 109.34765625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62010 - }, - "eng_Latn-kiw_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 105.37349397590361, - "max_sentence1_length": 245, - "min_sentence2_length": 38, - "average_sentence2_length": 133.75903614457832, - "max_sentence2_length": 262, - "num_samples": 83, - "num_samples_sentence2": 83, - "number_of_characters": 19848 - }, - "kiw_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 133.75903614457832, - "max_sentence1_length": 262, - "min_sentence2_length": 45, - "average_sentence2_length": 105.37349397590361, - "max_sentence2_length": 245, - "num_samples": 83, - "num_samples_sentence2": 83, - "number_of_characters": 19848 - }, - "eng_Latn-kiz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.8828125, - "max_sentence1_length": 827, - "min_sentence2_length": 16, - "average_sentence2_length": 131.95703125, - "max_sentence2_length": 386, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62935 - }, - "kiz_Latn-eng_Latn": { - "min_sentence1_length": 16, - "average_sentence1_length": 131.95703125, - "max_sentence1_length": 386, - "min_sentence2_length": 24, - "average_sentence2_length": 113.8828125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62935 - }, - "eng_Latn-kje_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.15234375, - "max_sentence1_length": 227, - "min_sentence2_length": 30, - "average_sentence2_length": 224.1015625, - "max_sentence2_length": 857, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85569 - }, - "kje_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 224.1015625, - "max_sentence1_length": 857, - "min_sentence2_length": 24, - "average_sentence2_length": 110.15234375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85569 - }, - "eng_Latn-kjs_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.6328125, - "max_sentence1_length": 227, - "min_sentence2_length": 38, - "average_sentence2_length": 187.8359375, - "max_sentence2_length": 530, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76152 - }, - "kjs_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 187.8359375, - "max_sentence1_length": 530, - "min_sentence2_length": 24, - "average_sentence2_length": 109.6328125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76152 - }, - "eng_Latn-kkc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.05859375, - "max_sentence1_length": 243, - "min_sentence2_length": 33, - "average_sentence2_length": 231.08984375, - "max_sentence2_length": 836, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88358 - }, - "kkc_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 231.08984375, - "max_sentence1_length": 836, - "min_sentence2_length": 24, - "average_sentence2_length": 114.05859375, - "max_sentence2_length": 243, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88358 - }, - "eng_Latn-kkl_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 111.1640625, - "max_sentence1_length": 243, - "min_sentence2_length": 44, - "average_sentence2_length": 365.1328125, - "max_sentence2_length": 1372, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 121932 - }, - "kkl_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 365.1328125, - "max_sentence1_length": 1372, - "min_sentence2_length": 25, - "average_sentence2_length": 111.1640625, - "max_sentence2_length": 243, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 121932 - }, - "eng_Latn-klt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.5546875, - "max_sentence1_length": 243, - "min_sentence2_length": 43, - "average_sentence2_length": 203.6015625, - "max_sentence2_length": 765, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81192 - }, - "klt_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 203.6015625, - "max_sentence1_length": 765, - "min_sentence2_length": 24, - "average_sentence2_length": 113.5546875, - "max_sentence2_length": 243, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81192 - }, - "eng_Latn-klv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.8046875, - "max_sentence1_length": 227, - "min_sentence2_length": 31, - "average_sentence2_length": 168.53125, - "max_sentence2_length": 465, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71510 - }, - "klv_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 168.53125, - "max_sentence1_length": 465, - "min_sentence2_length": 24, - "average_sentence2_length": 110.8046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71510 - }, - "eng_Latn-kmg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 28, - "average_sentence2_length": 119.46484375, - "max_sentence2_length": 253, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59356 - }, - "kmg_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 119.46484375, - "max_sentence1_length": 253, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59356 - }, - "eng_Latn-kmh_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.40625, - "max_sentence1_length": 341, - "min_sentence2_length": 37, - "average_sentence2_length": 198.69140625, - "max_sentence2_length": 544, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88089 - }, - "kmh_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 198.69140625, - "max_sentence1_length": 544, - "min_sentence2_length": 1, - "average_sentence2_length": 145.40625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88089 - }, - "eng_Latn-kmk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.44921875, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 166.76171875, - "max_sentence2_length": 763, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72246 - }, - "kmk_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 166.76171875, - "max_sentence1_length": 763, - "min_sentence2_length": 24, - "average_sentence2_length": 115.44921875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72246 - }, - "eng_Latn-kmo_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 112.0859375, - "max_sentence1_length": 248, - "min_sentence2_length": 45, - "average_sentence2_length": 198.87109375, - "max_sentence2_length": 627, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79605 - }, - "kmo_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 198.87109375, - "max_sentence1_length": 627, - "min_sentence2_length": 31, - "average_sentence2_length": 112.0859375, - "max_sentence2_length": 248, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79605 - }, - "eng_Latn-kms_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.30078125, - "max_sentence1_length": 827, - "min_sentence2_length": 37, - "average_sentence2_length": 203.09375, - "max_sentence2_length": 631, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81253 - }, - "kms_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 203.09375, - "max_sentence1_length": 631, - "min_sentence2_length": 24, - "average_sentence2_length": 114.30078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81253 - }, - "eng_Latn-kmu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.50390625, - "max_sentence1_length": 227, - "min_sentence2_length": 53, - "average_sentence2_length": 256.25, - "max_sentence2_length": 873, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93633 - }, - "kmu_Latn-eng_Latn": { - "min_sentence1_length": 53, - "average_sentence1_length": 256.25, - "max_sentence1_length": 873, - "min_sentence2_length": 24, - "average_sentence2_length": 109.50390625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93633 - }, - "eng_Latn-kne_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.421875, - "max_sentence1_length": 239, - "min_sentence2_length": 27, - "average_sentence2_length": 187.78125, - "max_sentence2_length": 678, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76340 - }, - "kne_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 187.78125, - "max_sentence1_length": 678, - "min_sentence2_length": 24, - "average_sentence2_length": 110.421875, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76340 - }, - "eng_Latn-knf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.7578125, - "max_sentence1_length": 227, - "min_sentence2_length": 17, - "average_sentence2_length": 122.140625, - "max_sentence2_length": 328, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59366 - }, - "knf_Latn-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 122.140625, - "max_sentence1_length": 328, - "min_sentence2_length": 24, - "average_sentence2_length": 109.7578125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59366 - }, - "eng_Latn-knj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.7578125, - "max_sentence1_length": 227, - "min_sentence2_length": 29, - "average_sentence2_length": 193.25, - "max_sentence2_length": 528, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77826 - }, - "knj_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 193.25, - "max_sentence1_length": 528, - "min_sentence2_length": 24, - "average_sentence2_length": 110.7578125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77826 - }, - "eng_Latn-knv_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.7265625, - "max_sentence1_length": 341, - "min_sentence2_length": 75, - "average_sentence2_length": 310.953125, - "max_sentence2_length": 789, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 117422 - }, - "knv_Latn-eng_Latn": { - "min_sentence1_length": 75, - "average_sentence1_length": 310.953125, - "max_sentence1_length": 789, - "min_sentence2_length": 56, - "average_sentence2_length": 147.7265625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 117422 - }, - "eng_Latn-kos_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 30, - "average_sentence2_length": 129.9453125, - "max_sentence2_length": 322, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62039 - }, - "kos_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 129.9453125, - "max_sentence1_length": 322, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62039 - }, - "eng_Latn-kpf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.43359375, - "max_sentence1_length": 227, - "min_sentence2_length": 27, - "average_sentence2_length": 143.1171875, - "max_sentence2_length": 520, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64909 - }, - "kpf_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 143.1171875, - "max_sentence1_length": 520, - "min_sentence2_length": 24, - "average_sentence2_length": 110.43359375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64909 - }, - "eng_Latn-kpg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.41015625, - "max_sentence1_length": 227, - "min_sentence2_length": 34, - "average_sentence2_length": 165.4765625, - "max_sentence2_length": 502, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70883 - }, - "kpg_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 165.4765625, - "max_sentence1_length": 502, - "min_sentence2_length": 24, - "average_sentence2_length": 111.41015625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70883 - }, - "eng_Latn-kpj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.46875, - "max_sentence1_length": 249, - "min_sentence2_length": 31, - "average_sentence2_length": 163.3203125, - "max_sentence2_length": 583, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70346 - }, - "kpj_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 163.3203125, - "max_sentence1_length": 583, - "min_sentence2_length": 24, - "average_sentence2_length": 111.46875, - "max_sentence2_length": 249, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70346 - }, - "eng_Latn-kpr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.37109375, - "max_sentence1_length": 227, - "min_sentence2_length": 29, - "average_sentence2_length": 281.9296875, - "max_sentence2_length": 1215, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100685 - }, - "kpr_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 281.9296875, - "max_sentence1_length": 1215, - "min_sentence2_length": 24, - "average_sentence2_length": 111.37109375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100685 - }, - "eng_Latn-kpw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.72265625, - "max_sentence1_length": 227, - "min_sentence2_length": 32, - "average_sentence2_length": 204.51953125, - "max_sentence2_length": 617, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80446 - }, - "kpw_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 204.51953125, - "max_sentence1_length": 617, - "min_sentence2_length": 24, - "average_sentence2_length": 109.72265625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80446 - }, - "eng_Latn-kpx_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.4765625, - "max_sentence1_length": 216, - "min_sentence2_length": 36, - "average_sentence2_length": 183.6015625, - "max_sentence2_length": 549, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75284 - }, - "kpx_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 183.6015625, - "max_sentence1_length": 549, - "min_sentence2_length": 24, - "average_sentence2_length": 110.4765625, - "max_sentence2_length": 216, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75284 - }, - "eng_Latn-kqa_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 105.74603174603175, - "max_sentence1_length": 257, - "min_sentence2_length": 75, - "average_sentence2_length": 229.20634920634922, - "max_sentence2_length": 718, - "num_samples": 63, - "num_samples_sentence2": 63, - "number_of_characters": 21102 - }, - "kqa_Latn-eng_Latn": { - "min_sentence1_length": 75, - "average_sentence1_length": 229.20634920634922, - "max_sentence1_length": 718, - "min_sentence2_length": 42, - "average_sentence2_length": 105.74603174603175, - "max_sentence2_length": 257, - "num_samples": 63, - "num_samples_sentence2": 63, - "number_of_characters": 21102 - }, - "eng_Latn-kqc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.890625, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 137.24609375, - "max_sentence2_length": 357, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64803 - }, - "kqc_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 137.24609375, - "max_sentence1_length": 357, - "min_sentence2_length": 24, - "average_sentence2_length": 115.890625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64803 - }, - "eng_Latn-kqf_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 114.31640625, - "max_sentence1_length": 257, - "min_sentence2_length": 40, - "average_sentence2_length": 148.671875, - "max_sentence2_length": 670, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67325 - }, - "kqf_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 148.671875, - "max_sentence1_length": 670, - "min_sentence2_length": 38, - "average_sentence2_length": 114.31640625, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67325 - }, - "eng_Latn-kql_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 112.87857142857143, - "max_sentence1_length": 257, - "min_sentence2_length": 34, - "average_sentence2_length": 160.8357142857143, - "max_sentence2_length": 501, - "num_samples": 140, - "num_samples_sentence2": 140, - "number_of_characters": 38320 - }, - "kql_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 160.8357142857143, - "max_sentence1_length": 501, - "min_sentence2_length": 45, - "average_sentence2_length": 112.87857142857143, - "max_sentence2_length": 257, - "num_samples": 140, - "num_samples_sentence2": 140, - "number_of_characters": 38320 - }, - "eng_Latn-kqw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.9453125, - "max_sentence1_length": 227, - "min_sentence2_length": 34, - "average_sentence2_length": 166.953125, - "max_sentence2_length": 437, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70886 - }, - "kqw_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 166.953125, - "max_sentence1_length": 437, - "min_sentence2_length": 24, - "average_sentence2_length": 109.9453125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70886 - }, - "eng_Latn-ksd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 138.44140625, - "max_sentence2_length": 387, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64976 - }, - "ksd_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 138.44140625, - "max_sentence1_length": 387, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64976 - }, - "eng_Latn-ksj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.5078125, - "max_sentence1_length": 245, - "min_sentence2_length": 43, - "average_sentence2_length": 153.34765625, - "max_sentence2_length": 471, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68059 - }, - "ksj_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 153.34765625, - "max_sentence1_length": 471, - "min_sentence2_length": 24, - "average_sentence2_length": 112.5078125, - "max_sentence2_length": 245, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68059 - }, - "eng_Latn-ksr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.6640625, - "max_sentence1_length": 376, - "min_sentence2_length": 39, - "average_sentence2_length": 223.3125, - "max_sentence2_length": 652, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85498 - }, - "ksr_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 223.3125, - "max_sentence1_length": 652, - "min_sentence2_length": 24, - "average_sentence2_length": 110.6640625, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85498 - }, - "eng_Latn-ktm_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 113.71484375, - "max_sentence1_length": 257, - "min_sentence2_length": 36, - "average_sentence2_length": 160.20703125, - "max_sentence2_length": 676, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70124 - }, - "ktm_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 160.20703125, - "max_sentence1_length": 676, - "min_sentence2_length": 23, - "average_sentence2_length": 113.71484375, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70124 - }, - "eng_Latn-kto_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.33984375, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 250.73828125, - "max_sentence2_length": 1025, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92436 - }, - "kto_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 250.73828125, - "max_sentence1_length": 1025, - "min_sentence2_length": 24, - "average_sentence2_length": 110.33984375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92436 - }, - "eng_Latn-kud_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 116.34765625, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 164.359375, - "max_sentence2_length": 449, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71861 - }, - "kud_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 164.359375, - "max_sentence1_length": 449, - "min_sentence2_length": 24, - "average_sentence2_length": 116.34765625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71861 - }, - "eng_Latn-kue_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 52, - "average_sentence2_length": 171.66796875, - "max_sentence2_length": 416, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72720 - }, - "kue_Latn-eng_Latn": { - "min_sentence1_length": 52, - "average_sentence1_length": 171.66796875, - "max_sentence1_length": 416, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72720 - }, - "eng_Latn-kup_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.52734375, - "max_sentence1_length": 232, - "min_sentence2_length": 72, - "average_sentence2_length": 294.80078125, - "max_sentence2_length": 801, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103764 - }, - "kup_Latn-eng_Latn": { - "min_sentence1_length": 72, - "average_sentence1_length": 294.80078125, - "max_sentence1_length": 801, - "min_sentence2_length": 24, - "average_sentence2_length": 110.52734375, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103764 - }, - "eng_Latn-kvg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.64453125, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 179.86328125, - "max_sentence2_length": 422, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74370 - }, - "kvg_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 179.86328125, - "max_sentence1_length": 422, - "min_sentence2_length": 24, - "average_sentence2_length": 110.64453125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74370 - }, - "eng_Latn-kvn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.171875, - "max_sentence1_length": 827, - "min_sentence2_length": 51, - "average_sentence2_length": 182.14453125, - "max_sentence2_length": 451, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75857 - }, - "kvn_Latn-eng_Latn": { - "min_sentence1_length": 51, - "average_sentence1_length": 182.14453125, - "max_sentence1_length": 451, - "min_sentence2_length": 24, - "average_sentence2_length": 114.171875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75857 - }, - "eng_Latn-kwd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.453125, - "max_sentence1_length": 827, - "min_sentence2_length": 48, - "average_sentence2_length": 221.33984375, - "max_sentence2_length": 826, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85451 - }, - "kwd_Latn-eng_Latn": { - "min_sentence1_length": 48, - "average_sentence1_length": 221.33984375, - "max_sentence1_length": 826, - "min_sentence2_length": 24, - "average_sentence2_length": 112.453125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85451 - }, - "eng_Latn-kwf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.73046875, - "max_sentence1_length": 827, - "min_sentence2_length": 46, - "average_sentence2_length": 214.3515625, - "max_sentence2_length": 719, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83989 - }, - "kwf_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 214.3515625, - "max_sentence1_length": 719, - "min_sentence2_length": 24, - "average_sentence2_length": 113.73046875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83989 - }, - "eng_Latn-kwi_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.2109375, - "max_sentence1_length": 827, - "min_sentence2_length": 33, - "average_sentence2_length": 178.421875, - "max_sentence2_length": 526, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74914 - }, - "kwi_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 178.421875, - "max_sentence1_length": 526, - "min_sentence2_length": 31, - "average_sentence2_length": 114.2109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74914 - }, - "eng_Latn-kwj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.15234375, - "max_sentence1_length": 227, - "min_sentence2_length": 49, - "average_sentence2_length": 220.26953125, - "max_sentence2_length": 697, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84588 - }, - "kwj_Latn-eng_Latn": { - "min_sentence1_length": 49, - "average_sentence1_length": 220.26953125, - "max_sentence1_length": 697, - "min_sentence2_length": 24, - "average_sentence2_length": 110.15234375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84588 - }, - "eng_Latn-kyc_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 114.67578125, - "max_sentence1_length": 827, - "min_sentence2_length": 37, - "average_sentence2_length": 182.01953125, - "max_sentence2_length": 766, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75954 - }, - "kyc_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 182.01953125, - "max_sentence1_length": 766, - "min_sentence2_length": 21, - "average_sentence2_length": 114.67578125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75954 - }, - "eng_Latn-kyf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4921875, - "max_sentence1_length": 827, - "min_sentence2_length": 23, - "average_sentence2_length": 143.33203125, - "max_sentence2_length": 476, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65747 - }, - "kyf_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 143.33203125, - "max_sentence1_length": 476, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4921875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65747 - }, - "eng_Latn-kyg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.09375, - "max_sentence1_length": 227, - "min_sentence2_length": 56, - "average_sentence2_length": 234.01953125, - "max_sentence2_length": 726, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88349 - }, - "kyg_Latn-eng_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 234.01953125, - "max_sentence1_length": 726, - "min_sentence2_length": 24, - "average_sentence2_length": 111.09375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88349 - }, - "eng_Latn-kyq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.11328125, - "max_sentence1_length": 227, - "min_sentence2_length": 40, - "average_sentence2_length": 166.59375, - "max_sentence2_length": 586, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70837 - }, - "kyq_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 166.59375, - "max_sentence1_length": 586, - "min_sentence2_length": 24, - "average_sentence2_length": 110.11328125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70837 - }, - "eng_Latn-kyz_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 113.44921875, - "max_sentence1_length": 246, - "min_sentence2_length": 50, - "average_sentence2_length": 406.671875, - "max_sentence2_length": 1885, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 133151 - }, - "kyz_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 406.671875, - "max_sentence1_length": 1885, - "min_sentence2_length": 31, - "average_sentence2_length": 113.44921875, - "max_sentence2_length": 246, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 133151 - }, - "eng_Latn-kze_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.52734375, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 210.296875, - "max_sentence2_length": 632, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81619 - }, - "kze_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 210.296875, - "max_sentence1_length": 632, - "min_sentence2_length": 24, - "average_sentence2_length": 108.52734375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81619 - }, - "eng_Latn-lac_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 112.69921875, - "max_sentence1_length": 827, - "min_sentence2_length": 24, - "average_sentence2_length": 251.0625, - "max_sentence2_length": 821, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93123 - }, - "lac_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 251.0625, - "max_sentence1_length": 821, - "min_sentence2_length": 31, - "average_sentence2_length": 112.69921875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93123 - }, - "eng_Latn-lat_Latn": { - "min_sentence1_length": 20, - "average_sentence1_length": 122.43359375, - "max_sentence1_length": 422, - "min_sentence2_length": 20, - "average_sentence2_length": 110.3046875, - "max_sentence2_length": 365, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59581 - }, - "lat_Latn-eng_Latn": { - "min_sentence1_length": 20, - "average_sentence1_length": 110.3046875, - "max_sentence1_length": 365, - "min_sentence2_length": 20, - "average_sentence2_length": 122.43359375, - "max_sentence2_length": 422, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59581 - }, - "eng_Latn-lbb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.8828125, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 168.0859375, - "max_sentence2_length": 456, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71416 - }, - "lbb_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 168.0859375, - "max_sentence1_length": 456, - "min_sentence2_length": 24, - "average_sentence2_length": 110.8828125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71416 - }, - "eng_Latn-lbk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.3125, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 171.08984375, - "max_sentence2_length": 529, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72807 - }, - "lbk_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 171.08984375, - "max_sentence1_length": 529, - "min_sentence2_length": 24, - "average_sentence2_length": 113.3125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72807 - }, - "eng_Latn-lcm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 37, - "average_sentence2_length": 154.9921875, - "max_sentence2_length": 347, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68451 - }, - "lcm_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 154.9921875, - "max_sentence1_length": 347, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68451 - }, - "eng_Latn-leu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.5546875, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 189.1875, - "max_sentence2_length": 676, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78014 - }, - "leu_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 189.1875, - "max_sentence1_length": 676, - "min_sentence2_length": 24, - "average_sentence2_length": 115.5546875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78014 - }, - "eng_Latn-lex_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.51171875, - "max_sentence1_length": 827, - "min_sentence2_length": 44, - "average_sentence2_length": 247.8828125, - "max_sentence2_length": 675, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92773 - }, - "lex_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 247.8828125, - "max_sentence1_length": 675, - "min_sentence2_length": 24, - "average_sentence2_length": 114.51171875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92773 - }, - "eng_Latn-lgl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.73046875, - "max_sentence1_length": 827, - "min_sentence2_length": 37, - "average_sentence2_length": 187.2421875, - "max_sentence2_length": 651, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77049 - }, - "lgl_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 187.2421875, - "max_sentence1_length": 651, - "min_sentence2_length": 24, - "average_sentence2_length": 113.73046875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77049 - }, - "eng_Latn-lid_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.03515625, - "max_sentence1_length": 227, - "min_sentence2_length": 30, - "average_sentence2_length": 241.02734375, - "max_sentence2_length": 922, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90128 - }, - "lid_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 241.02734375, - "max_sentence1_length": 922, - "min_sentence2_length": 24, - "average_sentence2_length": 111.03515625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90128 - }, - "eng_Latn-lif_Deva": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 65, - "average_sentence2_length": 243.16015625, - "max_sentence2_length": 598, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99454 - }, - "lif_Deva-eng_Latn": { - "min_sentence1_length": 65, - "average_sentence1_length": 243.16015625, - "max_sentence1_length": 598, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99454 - }, - "eng_Latn-lin_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 21, - "average_sentence2_length": 138.46484375, - "max_sentence2_length": 325, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64220 - }, - "lin_Latn-eng_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 138.46484375, - "max_sentence1_length": 325, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64220 - }, - "eng_Latn-lit_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.9921875, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 108.26953125, - "max_sentence2_length": 264, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56899 - }, - "lit_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 108.26953125, - "max_sentence1_length": 264, - "min_sentence2_length": 24, - "average_sentence2_length": 113.9921875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56899 - }, - "eng_Latn-llg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.48046875, - "max_sentence1_length": 251, - "min_sentence2_length": 28, - "average_sentence2_length": 273.5078125, - "max_sentence2_length": 1339, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99325 - }, - "llg_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 273.5078125, - "max_sentence1_length": 1339, - "min_sentence2_length": 24, - "average_sentence2_length": 114.48046875, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99325 - }, - "eng_Latn-lug_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 29, - "average_sentence2_length": 118.4453125, - "max_sentence2_length": 288, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59095 - }, - "lug_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 118.4453125, - "max_sentence1_length": 288, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59095 - }, - "eng_Latn-luo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 23, - "average_sentence2_length": 115.45703125, - "max_sentence2_length": 312, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59092 - }, - "luo_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 115.45703125, - "max_sentence1_length": 312, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59092 - }, - "eng_Latn-lww_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.66015625, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 260.0234375, - "max_sentence2_length": 992, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 95919 - }, - "lww_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 260.0234375, - "max_sentence1_length": 992, - "min_sentence2_length": 31, - "average_sentence2_length": 114.66015625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 95919 - }, - "eng_Latn-maa_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.65234375, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 181.85546875, - "max_sentence2_length": 479, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75650 - }, - "maa_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 181.85546875, - "max_sentence1_length": 479, - "min_sentence2_length": 24, - "average_sentence2_length": 113.65234375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75650 - }, - "eng_Latn-maj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 46, - "average_sentence2_length": 161.359375, - "max_sentence2_length": 353, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70081 - }, - "maj_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 161.359375, - "max_sentence1_length": 353, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70081 - }, - "eng_Latn-mal_Mlym": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.140625, - "max_sentence1_length": 341, - "min_sentence2_length": 52, - "average_sentence2_length": 156.7265625, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77790 - }, - "mal_Mlym-eng_Latn": { - "min_sentence1_length": 52, - "average_sentence1_length": 156.7265625, - "max_sentence1_length": 376, - "min_sentence2_length": 56, - "average_sentence2_length": 147.140625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77790 - }, - "eng_Latn-mam_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 145.8828125, - "max_sentence1_length": 341, - "min_sentence2_length": 46, - "average_sentence2_length": 176.44140625, - "max_sentence2_length": 399, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82515 - }, - "mam_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 176.44140625, - "max_sentence1_length": 399, - "min_sentence2_length": 35, - "average_sentence2_length": 145.8828125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82515 - }, - "eng_Latn-maq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.89453125, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 193.59765625, - "max_sentence2_length": 460, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77694 - }, - "maq_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 193.59765625, - "max_sentence1_length": 460, - "min_sentence2_length": 24, - "average_sentence2_length": 109.89453125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77694 - }, - "eng_Latn-mar_Deva": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.921875, - "max_sentence1_length": 227, - "min_sentence2_length": 23, - "average_sentence2_length": 118.359375, - "max_sentence2_length": 295, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58184 - }, - "mar_Deva-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 118.359375, - "max_sentence1_length": 295, - "min_sentence2_length": 24, - "average_sentence2_length": 108.921875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58184 - }, - "eng_Latn-mau_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 169.7890625, - "max_sentence2_length": 442, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73001 - }, - "mau_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 169.7890625, - "max_sentence1_length": 442, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73001 - }, - "eng_Latn-mav_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 45, - "average_sentence2_length": 337.78515625, - "max_sentence2_length": 1352, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 115246 - }, - "mav_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 337.78515625, - "max_sentence1_length": 1352, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 115246 - }, - "eng_Latn-maz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.00390625, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 224.94140625, - "max_sentence2_length": 949, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86514 - }, - "maz_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 224.94140625, - "max_sentence1_length": 949, - "min_sentence2_length": 24, - "average_sentence2_length": 113.00390625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86514 - }, - "eng_Latn-mbb_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 128.66015625, - "max_sentence1_length": 422, - "min_sentence2_length": 26, - "average_sentence2_length": 247.30078125, - "max_sentence2_length": 815, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96246 - }, - "mbb_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 247.30078125, - "max_sentence1_length": 815, - "min_sentence2_length": 25, - "average_sentence2_length": 128.66015625, - "max_sentence2_length": 422, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 96246 - }, - "eng_Latn-mbc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.953125, - "max_sentence1_length": 376, - "min_sentence2_length": 51, - "average_sentence2_length": 235.421875, - "max_sentence2_length": 640, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89184 - }, - "mbc_Latn-eng_Latn": { - "min_sentence1_length": 51, - "average_sentence1_length": 235.421875, - "max_sentence1_length": 640, - "min_sentence2_length": 24, - "average_sentence2_length": 112.953125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89184 - }, - "eng_Latn-mbh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.6328125, - "max_sentence1_length": 376, - "min_sentence2_length": 39, - "average_sentence2_length": 216.8984375, - "max_sentence2_length": 828, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83848 - }, - "mbh_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 216.8984375, - "max_sentence1_length": 828, - "min_sentence2_length": 24, - "average_sentence2_length": 110.6328125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83848 - }, - "eng_Latn-mbj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 116.0703125, - "max_sentence1_length": 827, - "min_sentence2_length": 33, - "average_sentence2_length": 219.76953125, - "max_sentence2_length": 796, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85975 - }, - "mbj_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 219.76953125, - "max_sentence1_length": 796, - "min_sentence2_length": 24, - "average_sentence2_length": 116.0703125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85975 - }, - "eng_Latn-mbl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.20703125, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 169.68359375, - "max_sentence2_length": 515, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72932 - }, - "mbl_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 169.68359375, - "max_sentence1_length": 515, - "min_sentence2_length": 24, - "average_sentence2_length": 115.20703125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72932 - }, - "eng_Latn-mbs_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.09765625, - "max_sentence1_length": 376, - "min_sentence2_length": 34, - "average_sentence2_length": 196.75, - "max_sentence2_length": 742, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79065 - }, - "mbs_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 196.75, - "max_sentence1_length": 742, - "min_sentence2_length": 24, - "average_sentence2_length": 112.09765625, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79065 - }, - "eng_Latn-mbt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.73046875, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 168.234375, - "max_sentence2_length": 461, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71671 - }, - "mbt_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 168.234375, - "max_sentence1_length": 461, - "min_sentence2_length": 24, - "average_sentence2_length": 111.73046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71671 - }, - "eng_Latn-mca_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.296875, - "max_sentence1_length": 827, - "min_sentence2_length": 44, - "average_sentence2_length": 191.4609375, - "max_sentence2_length": 448, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78530 - }, - "mca_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 191.4609375, - "max_sentence1_length": 448, - "min_sentence2_length": 24, - "average_sentence2_length": 115.296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78530 - }, - "eng_Latn-mcb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.671875, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 250.12109375, - "max_sentence2_length": 799, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93643 - }, - "mcb_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 250.12109375, - "max_sentence1_length": 799, - "min_sentence2_length": 24, - "average_sentence2_length": 115.671875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93643 - }, - "eng_Latn-mcd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.93359375, - "max_sentence1_length": 271, - "min_sentence2_length": 30, - "average_sentence2_length": 341.234375, - "max_sentence2_length": 1750, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 117035 - }, - "mcd_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 341.234375, - "max_sentence1_length": 1750, - "min_sentence2_length": 24, - "average_sentence2_length": 115.93359375, - "max_sentence2_length": 271, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 117035 - }, - "eng_Latn-mcf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.546875, - "max_sentence1_length": 827, - "min_sentence2_length": 40, - "average_sentence2_length": 238.4375, - "max_sentence2_length": 842, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89852 - }, - "mcf_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 238.4375, - "max_sentence1_length": 842, - "min_sentence2_length": 24, - "average_sentence2_length": 112.546875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89852 - }, - "eng_Latn-mco_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 112.02734375, - "max_sentence1_length": 227, - "min_sentence2_length": 34, - "average_sentence2_length": 244.5859375, - "max_sentence2_length": 778, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91293 - }, - "mco_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 244.5859375, - "max_sentence1_length": 778, - "min_sentence2_length": 31, - "average_sentence2_length": 112.02734375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91293 - }, - "eng_Latn-mcp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 51, - "average_sentence2_length": 155.43359375, - "max_sentence2_length": 476, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68564 - }, - "mcp_Latn-eng_Latn": { - "min_sentence1_length": 51, - "average_sentence1_length": 155.43359375, - "max_sentence1_length": 476, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68564 - }, - "eng_Latn-mcq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 39, - "average_sentence2_length": 142.0390625, - "max_sentence2_length": 379, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65402 - }, - "mcq_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 142.0390625, - "max_sentence1_length": 379, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65402 - }, - "eng_Latn-mcr_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 110.4453125, - "max_sentence1_length": 225, - "min_sentence2_length": 45, - "average_sentence2_length": 245.9921875, - "max_sentence2_length": 845, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91248 - }, - "mcr_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 245.9921875, - "max_sentence1_length": 845, - "min_sentence2_length": 31, - "average_sentence2_length": 110.4453125, - "max_sentence2_length": 225, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91248 - }, - "eng_Latn-mdy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.859375, - "max_sentence1_length": 827, - "min_sentence2_length": 17, - "average_sentence2_length": 80.9921875, - "max_sentence2_length": 240, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 50138 - }, - "mdy_Latn-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 80.9921875, - "max_sentence1_length": 240, - "min_sentence2_length": 24, - "average_sentence2_length": 114.859375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 50138 - }, - "eng_Latn-med_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 110.2578125, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 231.96875, - "max_sentence2_length": 914, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87610 - }, - "med_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 231.96875, - "max_sentence1_length": 914, - "min_sentence2_length": 25, - "average_sentence2_length": 110.2578125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87610 - }, - "eng_Latn-mee_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.90234375, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 226.9140625, - "max_sentence2_length": 624, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87505 - }, - "mee_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 226.9140625, - "max_sentence1_length": 624, - "min_sentence2_length": 31, - "average_sentence2_length": 114.90234375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87505 - }, - "eng_Latn-mek_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.62890625, - "max_sentence1_length": 227, - "min_sentence2_length": 34, - "average_sentence2_length": 285.19140625, - "max_sentence2_length": 880, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101586 - }, - "mek_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 285.19140625, - "max_sentence1_length": 880, - "min_sentence2_length": 24, - "average_sentence2_length": 111.62890625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101586 - }, - "eng_Latn-meq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.39453125, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 190.38671875, - "max_sentence2_length": 727, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78280 - }, - "meq_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 190.38671875, - "max_sentence1_length": 727, - "min_sentence2_length": 24, - "average_sentence2_length": 115.39453125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78280 - }, - "eng_Latn-met_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.3359375, - "max_sentence1_length": 827, - "min_sentence2_length": 40, - "average_sentence2_length": 165.375, - "max_sentence2_length": 717, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71606 - }, - "met_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 165.375, - "max_sentence1_length": 717, - "min_sentence2_length": 31, - "average_sentence2_length": 114.3359375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71606 - }, - "eng_Latn-meu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.01171875, - "max_sentence1_length": 827, - "min_sentence2_length": 23, - "average_sentence2_length": 123.78125, - "max_sentence2_length": 301, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60875 - }, - "meu_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 123.78125, - "max_sentence1_length": 301, - "min_sentence2_length": 24, - "average_sentence2_length": 114.01171875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60875 - }, - "eng_Latn-mgc_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 109.9375, - "max_sentence1_length": 243, - "min_sentence2_length": 23, - "average_sentence2_length": 110.7265625, - "max_sentence2_length": 313, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56490 - }, - "mgc_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 110.7265625, - "max_sentence1_length": 313, - "min_sentence2_length": 38, - "average_sentence2_length": 109.9375, - "max_sentence2_length": 243, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56490 - }, - "eng_Latn-mgh_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 108.90234375, - "max_sentence1_length": 210, - "min_sentence2_length": 29, - "average_sentence2_length": 128.78125, - "max_sentence2_length": 284, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60847 - }, - "mgh_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 128.78125, - "max_sentence1_length": 284, - "min_sentence2_length": 28, - "average_sentence2_length": 108.90234375, - "max_sentence2_length": 210, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60847 - }, - "eng_Latn-mgw_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 111.75598086124401, - "max_sentence1_length": 243, - "min_sentence2_length": 39, - "average_sentence2_length": 125.02870813397129, - "max_sentence2_length": 322, - "num_samples": 209, - "num_samples_sentence2": 209, - "number_of_characters": 49488 - }, - "mgw_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 125.02870813397129, - "max_sentence1_length": 322, - "min_sentence2_length": 37, - "average_sentence2_length": 111.75598086124401, - "max_sentence2_length": 243, - "num_samples": 209, - "num_samples_sentence2": 209, - "number_of_characters": 49488 - }, - "eng_Latn-mhl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.6640625, - "max_sentence1_length": 376, - "min_sentence2_length": 39, - "average_sentence2_length": 211.4609375, - "max_sentence2_length": 653, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82464 - }, - "mhl_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 211.4609375, - "max_sentence1_length": 653, - "min_sentence2_length": 24, - "average_sentence2_length": 110.6640625, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82464 - }, - "eng_Latn-mib_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.32421875, - "max_sentence1_length": 827, - "min_sentence2_length": 42, - "average_sentence2_length": 162.9609375, - "max_sentence2_length": 367, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71241 - }, - "mib_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 162.9609375, - "max_sentence1_length": 367, - "min_sentence2_length": 24, - "average_sentence2_length": 115.32421875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71241 - }, - "eng_Latn-mic_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.13671875, - "max_sentence1_length": 227, - "min_sentence2_length": 38, - "average_sentence2_length": 196.65234375, - "max_sentence2_length": 1958, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78794 - }, - "mic_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 196.65234375, - "max_sentence1_length": 1958, - "min_sentence2_length": 24, - "average_sentence2_length": 111.13671875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78794 - }, - "eng_Latn-mie_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 43, - "average_sentence2_length": 163.33203125, - "max_sentence2_length": 380, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71348 - }, - "mie_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 163.33203125, - "max_sentence1_length": 380, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71348 - }, - "eng_Latn-mig_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 145.109375, - "max_sentence2_length": 302, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66683 - }, - "mig_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 145.109375, - "max_sentence1_length": 302, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66683 - }, - "eng_Latn-mih_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.421875, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 177.89453125, - "max_sentence2_length": 571, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74833 - }, - "mih_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 177.89453125, - "max_sentence1_length": 571, - "min_sentence2_length": 31, - "average_sentence2_length": 114.421875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74833 - }, - "eng_Latn-mil_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.60546875, - "max_sentence1_length": 227, - "min_sentence2_length": 50, - "average_sentence2_length": 227.28515625, - "max_sentence2_length": 613, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86500 - }, - "mil_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 227.28515625, - "max_sentence1_length": 613, - "min_sentence2_length": 24, - "average_sentence2_length": 110.60546875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86500 - }, - "eng_Latn-mio_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.41015625, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 181.44140625, - "max_sentence2_length": 444, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74714 - }, - "mio_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 181.44140625, - "max_sentence1_length": 444, - "min_sentence2_length": 24, - "average_sentence2_length": 110.41015625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74714 - }, - "eng_Latn-mir_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.8046875, - "max_sentence1_length": 246, - "min_sentence2_length": 32, - "average_sentence2_length": 281.359375, - "max_sentence2_length": 1042, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100394 - }, - "mir_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 281.359375, - "max_sentence1_length": 1042, - "min_sentence2_length": 24, - "average_sentence2_length": 110.8046875, - "max_sentence2_length": 246, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100394 - }, - "eng_Latn-mit_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.02734375, - "max_sentence1_length": 229, - "min_sentence2_length": 36, - "average_sentence2_length": 220.85546875, - "max_sentence2_length": 664, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84962 - }, - "mit_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 220.85546875, - "max_sentence1_length": 664, - "min_sentence2_length": 24, - "average_sentence2_length": 111.02734375, - "max_sentence2_length": 229, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84962 - }, - "eng_Latn-miz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 41, - "average_sentence2_length": 150.4453125, - "max_sentence2_length": 340, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68049 - }, - "miz_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 150.4453125, - "max_sentence1_length": 340, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68049 - }, - "eng_Latn-mjc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.140625, - "max_sentence1_length": 827, - "min_sentence2_length": 37, - "average_sentence2_length": 191.84375, - "max_sentence2_length": 545, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78332 - }, - "mjc_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 191.84375, - "max_sentence1_length": 545, - "min_sentence2_length": 24, - "average_sentence2_length": 114.140625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78332 - }, - "eng_Latn-mkj_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 109.86328125, - "max_sentence1_length": 257, - "min_sentence2_length": 39, - "average_sentence2_length": 122.85546875, - "max_sentence2_length": 319, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59576 - }, - "mkj_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 122.85546875, - "max_sentence1_length": 319, - "min_sentence2_length": 37, - "average_sentence2_length": 109.86328125, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59576 - }, - "eng_Latn-mkl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.32421875, - "max_sentence1_length": 827, - "min_sentence2_length": 19, - "average_sentence2_length": 127.73046875, - "max_sentence2_length": 372, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61966 - }, - "mkl_Latn-eng_Latn": { - "min_sentence1_length": 19, - "average_sentence1_length": 127.73046875, - "max_sentence1_length": 372, - "min_sentence2_length": 24, - "average_sentence2_length": 114.32421875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61966 - }, - "eng_Latn-mkn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.76953125, - "max_sentence1_length": 251, - "min_sentence2_length": 37, - "average_sentence2_length": 279.13671875, - "max_sentence2_length": 1299, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100840 - }, - "mkn_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 279.13671875, - "max_sentence1_length": 1299, - "min_sentence2_length": 24, - "average_sentence2_length": 114.76953125, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100840 - }, - "eng_Latn-mks_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 211.61328125, - "max_sentence2_length": 521, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83708 - }, - "mks_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 211.61328125, - "max_sentence1_length": 521, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83708 - }, - "eng_Latn-mle_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.97265625, - "max_sentence1_length": 239, - "min_sentence2_length": 46, - "average_sentence2_length": 332.56640625, - "max_sentence2_length": 1304, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 113546 - }, - "mle_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 332.56640625, - "max_sentence1_length": 1304, - "min_sentence2_length": 24, - "average_sentence2_length": 110.97265625, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 113546 - }, - "eng_Latn-mlh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.6640625, - "max_sentence1_length": 376, - "min_sentence2_length": 39, - "average_sentence2_length": 211.4609375, - "max_sentence2_length": 653, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82464 - }, - "mlh_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 211.4609375, - "max_sentence1_length": 653, - "min_sentence2_length": 24, - "average_sentence2_length": 110.6640625, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82464 - }, - "eng_Latn-mlp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.23828125, - "max_sentence1_length": 827, - "min_sentence2_length": 50, - "average_sentence2_length": 232.94921875, - "max_sentence2_length": 748, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89136 - }, - "mlp_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 232.94921875, - "max_sentence1_length": 748, - "min_sentence2_length": 24, - "average_sentence2_length": 115.23828125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89136 - }, - "eng_Latn-mmo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 53, - "average_sentence2_length": 200.91796875, - "max_sentence2_length": 528, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80208 - }, - "mmo_Latn-eng_Latn": { - "min_sentence1_length": 53, - "average_sentence1_length": 200.91796875, - "max_sentence1_length": 528, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80208 - }, - "eng_Latn-mmx_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.87890625, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 180.81640625, - "max_sentence2_length": 520, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74674 - }, - "mmx_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 180.81640625, - "max_sentence1_length": 520, - "min_sentence2_length": 24, - "average_sentence2_length": 110.87890625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74674 - }, - "eng_Latn-mna_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 110.01953125, - "max_sentence1_length": 231, - "min_sentence2_length": 49, - "average_sentence2_length": 234.32421875, - "max_sentence2_length": 760, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88152 - }, - "mna_Latn-eng_Latn": { - "min_sentence1_length": 49, - "average_sentence1_length": 234.32421875, - "max_sentence1_length": 760, - "min_sentence2_length": 31, - "average_sentence2_length": 110.01953125, - "max_sentence2_length": 231, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88152 - }, - "eng_Latn-mop_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.30078125, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 200.06640625, - "max_sentence2_length": 757, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80222 - }, - "mop_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 200.06640625, - "max_sentence1_length": 757, - "min_sentence2_length": 24, - "average_sentence2_length": 113.30078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80222 - }, - "eng_Latn-mox_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.58984375, - "max_sentence1_length": 232, - "min_sentence2_length": 34, - "average_sentence2_length": 225.30859375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85990 - }, - "mox_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 225.30859375, - "max_sentence1_length": 827, - "min_sentence2_length": 24, - "average_sentence2_length": 110.58984375, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85990 - }, - "eng_Latn-mph_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 104.98795180722891, - "max_sentence1_length": 207, - "min_sentence2_length": 117, - "average_sentence2_length": 378.1807228915663, - "max_sentence2_length": 1106, - "num_samples": 83, - "num_samples_sentence2": 83, - "number_of_characters": 40103 - }, - "mph_Latn-eng_Latn": { - "min_sentence1_length": 117, - "average_sentence1_length": 378.1807228915663, - "max_sentence1_length": 1106, - "min_sentence2_length": 42, - "average_sentence2_length": 104.98795180722891, - "max_sentence2_length": 207, - "num_samples": 83, - "num_samples_sentence2": 83, - "number_of_characters": 40103 - }, - "eng_Latn-mpj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.93359375, - "max_sentence1_length": 246, - "min_sentence2_length": 57, - "average_sentence2_length": 339.09765625, - "max_sentence2_length": 1893, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 115208 - }, - "mpj_Latn-eng_Latn": { - "min_sentence1_length": 57, - "average_sentence1_length": 339.09765625, - "max_sentence1_length": 1893, - "min_sentence2_length": 24, - "average_sentence2_length": 110.93359375, - "max_sentence2_length": 246, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 115208 - }, - "eng_Latn-mpm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.17578125, - "max_sentence1_length": 227, - "min_sentence2_length": 42, - "average_sentence2_length": 234.5546875, - "max_sentence2_length": 727, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87995 - }, - "mpm_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 234.5546875, - "max_sentence1_length": 727, - "min_sentence2_length": 24, - "average_sentence2_length": 109.17578125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87995 - }, - "eng_Latn-mpp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.265625, - "max_sentence1_length": 230, - "min_sentence2_length": 33, - "average_sentence2_length": 223.0390625, - "max_sentence2_length": 978, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85326 - }, - "mpp_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 223.0390625, - "max_sentence1_length": 978, - "min_sentence2_length": 24, - "average_sentence2_length": 110.265625, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85326 - }, - "eng_Latn-mps_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.17578125, - "max_sentence1_length": 230, - "min_sentence2_length": 43, - "average_sentence2_length": 305.2109375, - "max_sentence2_length": 1272, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106083 - }, - "mps_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 305.2109375, - "max_sentence1_length": 1272, - "min_sentence2_length": 24, - "average_sentence2_length": 109.17578125, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106083 - }, - "eng_Latn-mpt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.9140625, - "max_sentence1_length": 227, - "min_sentence2_length": 39, - "average_sentence2_length": 276.9375, - "max_sentence2_length": 1274, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99802 - }, - "mpt_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 276.9375, - "max_sentence1_length": 1274, - "min_sentence2_length": 24, - "average_sentence2_length": 112.9140625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99802 - }, - "eng_Latn-mpx_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.59375, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 175.19140625, - "max_sentence2_length": 568, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74185 - }, - "mpx_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 175.19140625, - "max_sentence1_length": 568, - "min_sentence2_length": 24, - "average_sentence2_length": 114.59375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74185 - }, - "eng_Latn-mqb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.796875, - "max_sentence1_length": 827, - "min_sentence2_length": 42, - "average_sentence2_length": 194.33984375, - "max_sentence2_length": 670, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79139 - }, - "mqb_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 194.33984375, - "max_sentence1_length": 670, - "min_sentence2_length": 24, - "average_sentence2_length": 114.796875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79139 - }, - "eng_Latn-mqj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.84375, - "max_sentence1_length": 227, - "min_sentence2_length": 39, - "average_sentence2_length": 176.8671875, - "max_sentence2_length": 452, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73654 - }, - "mqj_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 176.8671875, - "max_sentence1_length": 452, - "min_sentence2_length": 24, - "average_sentence2_length": 110.84375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73654 - }, - "eng_Latn-msb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.91015625, - "max_sentence1_length": 827, - "min_sentence2_length": 29, - "average_sentence2_length": 157.33984375, - "max_sentence2_length": 394, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69696 - }, - "msb_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 157.33984375, - "max_sentence1_length": 394, - "min_sentence2_length": 24, - "average_sentence2_length": 114.91015625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69696 - }, - "eng_Latn-msc_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 117.7265625, - "max_sentence1_length": 248, - "min_sentence2_length": 20, - "average_sentence2_length": 127.48828125, - "max_sentence2_length": 336, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62775 - }, - "msc_Latn-eng_Latn": { - "min_sentence1_length": 20, - "average_sentence1_length": 127.48828125, - "max_sentence1_length": 336, - "min_sentence2_length": 34, - "average_sentence2_length": 117.7265625, - "max_sentence2_length": 248, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62775 - }, - "eng_Latn-msk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.06640625, - "max_sentence1_length": 227, - "min_sentence2_length": 29, - "average_sentence2_length": 232.93359375, - "max_sentence2_length": 837, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87552 - }, - "msk_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 232.93359375, - "max_sentence1_length": 837, - "min_sentence2_length": 24, - "average_sentence2_length": 109.06640625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87552 - }, - "eng_Latn-msm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.375, - "max_sentence1_length": 827, - "min_sentence2_length": 23, - "average_sentence2_length": 180.16015625, - "max_sentence2_length": 673, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74633 - }, - "msm_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 180.16015625, - "max_sentence1_length": 673, - "min_sentence2_length": 24, - "average_sentence2_length": 111.375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74633 - }, - "eng_Latn-msy_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 146.0, - "max_sentence1_length": 341, - "min_sentence2_length": 31, - "average_sentence2_length": 227.23828125, - "max_sentence2_length": 585, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 95549 - }, - "msy_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 227.23828125, - "max_sentence1_length": 585, - "min_sentence2_length": 1, - "average_sentence2_length": 146.0, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 95549 - }, - "eng_Latn-mti_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.96875, - "max_sentence1_length": 827, - "min_sentence2_length": 3, - "average_sentence2_length": 126.86328125, - "max_sentence2_length": 327, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61653 - }, - "mti_Latn-eng_Latn": { - "min_sentence1_length": 3, - "average_sentence1_length": 126.86328125, - "max_sentence1_length": 327, - "min_sentence2_length": 24, - "average_sentence2_length": 113.96875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61653 - }, - "eng_Latn-mto_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.54296875, - "max_sentence1_length": 827, - "min_sentence2_length": 57, - "average_sentence2_length": 224.1171875, - "max_sentence2_length": 552, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86441 - }, - "mto_Latn-eng_Latn": { - "min_sentence1_length": 57, - "average_sentence1_length": 224.1171875, - "max_sentence1_length": 552, - "min_sentence2_length": 24, - "average_sentence2_length": 113.54296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86441 - }, - "eng_Latn-mux_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.97265625, - "max_sentence1_length": 227, - "min_sentence2_length": 47, - "average_sentence2_length": 372.21484375, - "max_sentence2_length": 1454, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 123440 - }, - "mux_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 372.21484375, - "max_sentence1_length": 1454, - "min_sentence2_length": 24, - "average_sentence2_length": 109.97265625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 123440 - }, - "eng_Latn-muy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.53125, - "max_sentence1_length": 827, - "min_sentence2_length": 33, - "average_sentence2_length": 184.390625, - "max_sentence2_length": 416, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76268 - }, - "muy_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 184.390625, - "max_sentence1_length": 416, - "min_sentence2_length": 24, - "average_sentence2_length": 113.53125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76268 - }, - "eng_Latn-mva_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.2109375, - "max_sentence1_length": 376, - "min_sentence2_length": 33, - "average_sentence2_length": 189.0390625, - "max_sentence2_length": 523, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76608 - }, - "mva_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 189.0390625, - "max_sentence1_length": 523, - "min_sentence2_length": 24, - "average_sentence2_length": 110.2109375, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76608 - }, - "eng_Latn-mvn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.421875, - "max_sentence1_length": 227, - "min_sentence2_length": 42, - "average_sentence2_length": 255.61328125, - "max_sentence2_length": 1123, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93961 - }, - "mvn_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 255.61328125, - "max_sentence1_length": 1123, - "min_sentence2_length": 24, - "average_sentence2_length": 111.421875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93961 - }, - "eng_Latn-mwc_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 112.6875, - "max_sentence1_length": 245, - "min_sentence2_length": 38, - "average_sentence2_length": 129.9453125, - "max_sentence2_length": 273, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62114 - }, - "mwc_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 129.9453125, - "max_sentence1_length": 273, - "min_sentence2_length": 37, - "average_sentence2_length": 112.6875, - "max_sentence2_length": 245, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62114 - }, - "eng_Latn-mwe_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 37, - "average_sentence2_length": 139.8359375, - "max_sentence2_length": 343, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64571 - }, - "mwe_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 139.8359375, - "max_sentence1_length": 343, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64571 - }, - "eng_Latn-mwf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 105.94140625, - "max_sentence1_length": 245, - "min_sentence2_length": 53, - "average_sentence2_length": 367.21484375, - "max_sentence2_length": 1738, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 121128 - }, - "mwf_Latn-eng_Latn": { - "min_sentence1_length": 53, - "average_sentence1_length": 367.21484375, - "max_sentence1_length": 1738, - "min_sentence2_length": 24, - "average_sentence2_length": 105.94140625, - "max_sentence2_length": 245, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 121128 - }, - "eng_Latn-mwp_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.61328125, - "max_sentence1_length": 827, - "min_sentence2_length": 47, - "average_sentence2_length": 192.2890625, - "max_sentence2_length": 634, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77799 - }, - "mwp_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 192.2890625, - "max_sentence1_length": 634, - "min_sentence2_length": 31, - "average_sentence2_length": 111.61328125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77799 - }, - "eng_Latn-mxb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 42, - "average_sentence2_length": 186.51171875, - "max_sentence2_length": 503, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77282 - }, - "mxb_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 186.51171875, - "max_sentence1_length": 503, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77282 - }, - "eng_Latn-mxp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 178.78515625, - "max_sentence2_length": 465, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74553 - }, - "mxp_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 178.78515625, - "max_sentence1_length": 465, - "min_sentence2_length": 24, - "average_sentence2_length": 112.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74553 - }, - "eng_Latn-mxq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 162.6640625, - "max_sentence2_length": 395, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71177 - }, - "mxq_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 162.6640625, - "max_sentence1_length": 395, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71177 - }, - "eng_Latn-mxt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.390625, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 185.3046875, - "max_sentence2_length": 540, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76978 - }, - "mxt_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 185.3046875, - "max_sentence1_length": 540, - "min_sentence2_length": 24, - "average_sentence2_length": 115.390625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76978 - }, - "eng_Latn-mya_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 146.3203125, - "max_sentence1_length": 341, - "min_sentence2_length": 1, - "average_sentence2_length": 167.9140625, - "max_sentence2_length": 392, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80444 - }, - "mya_Latn-eng_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 167.9140625, - "max_sentence1_length": 392, - "min_sentence2_length": 56, - "average_sentence2_length": 146.3203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80444 - }, - "eng_Latn-myk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 41, - "average_sentence2_length": 155.09375, - "max_sentence2_length": 372, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68477 - }, - "myk_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 155.09375, - "max_sentence1_length": 372, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68477 - }, - "eng_Latn-myu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.890625, - "max_sentence1_length": 230, - "min_sentence2_length": 32, - "average_sentence2_length": 254.5390625, - "max_sentence2_length": 1078, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93550 - }, - "myu_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 254.5390625, - "max_sentence1_length": 1078, - "min_sentence2_length": 24, - "average_sentence2_length": 110.890625, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93550 - }, - "eng_Latn-myw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 116.5234375, - "max_sentence1_length": 232, - "min_sentence2_length": 30, - "average_sentence2_length": 246.9453125, - "max_sentence2_length": 663, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93048 - }, - "myw_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 246.9453125, - "max_sentence1_length": 663, - "min_sentence2_length": 24, - "average_sentence2_length": 116.5234375, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93048 - }, - "eng_Latn-myy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.23828125, - "max_sentence1_length": 827, - "min_sentence2_length": 37, - "average_sentence2_length": 241.66796875, - "max_sentence2_length": 1002, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90856 - }, - "myy_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 241.66796875, - "max_sentence1_length": 1002, - "min_sentence2_length": 24, - "average_sentence2_length": 113.23828125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90856 - }, - "eng_Latn-mzz_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 113.664, - "max_sentence1_length": 257, - "min_sentence2_length": 43, - "average_sentence2_length": 168.8, - "max_sentence2_length": 597, - "num_samples": 125, - "num_samples_sentence2": 125, - "number_of_characters": 35308 - }, - "mzz_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 168.8, - "max_sentence1_length": 597, - "min_sentence2_length": 45, - "average_sentence2_length": 113.664, - "max_sentence2_length": 257, - "num_samples": 125, - "num_samples_sentence2": 125, - "number_of_characters": 35308 - }, - "eng_Latn-nab_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.921875, - "max_sentence1_length": 227, - "min_sentence2_length": 51, - "average_sentence2_length": 389.17578125, - "max_sentence2_length": 1402, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 128025 - }, - "nab_Latn-eng_Latn": { - "min_sentence1_length": 51, - "average_sentence1_length": 389.17578125, - "max_sentence1_length": 1402, - "min_sentence2_length": 24, - "average_sentence2_length": 110.921875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 128025 - }, - "eng_Latn-naf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.5390625, - "max_sentence1_length": 227, - "min_sentence2_length": 44, - "average_sentence2_length": 223.32421875, - "max_sentence2_length": 882, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85981 - }, - "naf_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 223.32421875, - "max_sentence1_length": 882, - "min_sentence2_length": 24, - "average_sentence2_length": 112.5390625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85981 - }, - "eng_Latn-nak_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.59765625, - "max_sentence1_length": 227, - "min_sentence2_length": 53, - "average_sentence2_length": 254.71875, - "max_sentence2_length": 1236, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93521 - }, - "nak_Latn-eng_Latn": { - "min_sentence1_length": 53, - "average_sentence1_length": 254.71875, - "max_sentence1_length": 1236, - "min_sentence2_length": 24, - "average_sentence2_length": 110.59765625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93521 - }, - "eng_Latn-nas_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.05078125, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 175.453125, - "max_sentence2_length": 465, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73089 - }, - "nas_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 175.453125, - "max_sentence1_length": 465, - "min_sentence2_length": 24, - "average_sentence2_length": 110.05078125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73089 - }, - "eng_Latn-nbq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.86328125, - "max_sentence1_length": 243, - "min_sentence2_length": 33, - "average_sentence2_length": 255.109375, - "max_sentence2_length": 973, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94201 - }, - "nbq_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 255.109375, - "max_sentence1_length": 973, - "min_sentence2_length": 24, - "average_sentence2_length": 112.86328125, - "max_sentence2_length": 243, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94201 - }, - "eng_Latn-nca_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.53125, - "max_sentence1_length": 376, - "min_sentence2_length": 26, - "average_sentence2_length": 157.6171875, - "max_sentence2_length": 395, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68902 - }, - "nca_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 157.6171875, - "max_sentence1_length": 395, - "min_sentence2_length": 24, - "average_sentence2_length": 111.53125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68902 - }, - "eng_Latn-nch_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.34375, - "max_sentence1_length": 827, - "min_sentence2_length": 46, - "average_sentence2_length": 207.09375, - "max_sentence2_length": 519, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82032 - }, - "nch_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 207.09375, - "max_sentence1_length": 519, - "min_sentence2_length": 24, - "average_sentence2_length": 113.34375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82032 - }, - "eng_Latn-ncj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.16015625, - "max_sentence1_length": 227, - "min_sentence2_length": 38, - "average_sentence2_length": 173.796875, - "max_sentence2_length": 460, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72693 - }, - "ncj_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 173.796875, - "max_sentence1_length": 460, - "min_sentence2_length": 24, - "average_sentence2_length": 110.16015625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72693 - }, - "eng_Latn-ncl_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 115.09375, - "max_sentence1_length": 827, - "min_sentence2_length": 28, - "average_sentence2_length": 290.17578125, - "max_sentence2_length": 794, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103749 - }, - "ncl_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 290.17578125, - "max_sentence1_length": 794, - "min_sentence2_length": 31, - "average_sentence2_length": 115.09375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103749 - }, - "eng_Latn-ncu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.92578125, - "max_sentence1_length": 227, - "min_sentence2_length": 57, - "average_sentence2_length": 269.1171875, - "max_sentence2_length": 1008, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97291 - }, - "ncu_Latn-eng_Latn": { - "min_sentence1_length": 57, - "average_sentence1_length": 269.1171875, - "max_sentence1_length": 1008, - "min_sentence2_length": 24, - "average_sentence2_length": 110.92578125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97291 - }, - "eng_Latn-ndg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.41796875, - "max_sentence1_length": 227, - "min_sentence2_length": 21, - "average_sentence2_length": 116.5703125, - "max_sentence2_length": 277, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58109 - }, - "ndg_Latn-eng_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 116.5703125, - "max_sentence1_length": 277, - "min_sentence2_length": 24, - "average_sentence2_length": 110.41796875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58109 - }, - "eng_Latn-ndj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 32, - "average_sentence2_length": 131.58984375, - "max_sentence2_length": 321, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62460 - }, - "ndj_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 131.58984375, - "max_sentence1_length": 321, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62460 - }, - "eng_Latn-nfa_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.23046875, - "max_sentence1_length": 227, - "min_sentence2_length": 31, - "average_sentence2_length": 268.37890625, - "max_sentence2_length": 1222, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97436 - }, - "nfa_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 268.37890625, - "max_sentence1_length": 1222, - "min_sentence2_length": 24, - "average_sentence2_length": 112.23046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97436 - }, - "eng_Latn-ngp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 27, - "average_sentence2_length": 128.859375, - "max_sentence2_length": 319, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61761 - }, - "ngp_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 128.859375, - "max_sentence1_length": 319, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61761 - }, - "eng_Latn-ngu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.703125, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 193.3515625, - "max_sentence2_length": 601, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77838 - }, - "ngu_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 193.3515625, - "max_sentence1_length": 601, - "min_sentence2_length": 24, - "average_sentence2_length": 110.703125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77838 - }, - "eng_Latn-nhe_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 80, - "average_sentence2_length": 253.1953125, - "max_sentence2_length": 615, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102593 - }, - "nhe_Latn-eng_Latn": { - "min_sentence1_length": 80, - "average_sentence1_length": 253.1953125, - "max_sentence1_length": 615, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102593 - }, - "eng_Latn-nhg_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.359375, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 177.26171875, - "max_sentence2_length": 664, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74655 - }, - "nhg_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 177.26171875, - "max_sentence1_length": 664, - "min_sentence2_length": 31, - "average_sentence2_length": 114.359375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74655 - }, - "eng_Latn-nhi_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 39, - "average_sentence2_length": 171.62109375, - "max_sentence2_length": 418, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72708 - }, - "nhi_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 171.62109375, - "max_sentence1_length": 418, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72708 - }, - "eng_Latn-nho_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 113.66796875, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 188.51953125, - "max_sentence2_length": 474, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77360 - }, - "nho_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 188.51953125, - "max_sentence1_length": 474, - "min_sentence2_length": 31, - "average_sentence2_length": 113.66796875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77360 - }, - "eng_Latn-nhr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.19140625, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 156.34765625, - "max_sentence2_length": 465, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69514 - }, - "nhr_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 156.34765625, - "max_sentence1_length": 465, - "min_sentence2_length": 24, - "average_sentence2_length": 115.19140625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69514 - }, - "eng_Latn-nhu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 32, - "average_sentence2_length": 157.3671875, - "max_sentence2_length": 507, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69059 - }, - "nhu_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 157.3671875, - "max_sentence1_length": 507, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69059 - }, - "eng_Latn-nhw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.34375, - "max_sentence1_length": 827, - "min_sentence2_length": 50, - "average_sentence2_length": 212.4296875, - "max_sentence2_length": 531, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83398 - }, - "nhw_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 212.4296875, - "max_sentence1_length": 531, - "min_sentence2_length": 24, - "average_sentence2_length": 113.34375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83398 - }, - "eng_Latn-nhy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.60546875, - "max_sentence1_length": 827, - "min_sentence2_length": 47, - "average_sentence2_length": 190.59375, - "max_sentence2_length": 450, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78387 - }, - "nhy_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 190.59375, - "max_sentence1_length": 450, - "min_sentence2_length": 24, - "average_sentence2_length": 115.60546875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78387 - }, - "eng_Latn-nif_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 106.3046875, - "max_sentence1_length": 245, - "min_sentence2_length": 48, - "average_sentence2_length": 165.8671875, - "max_sentence2_length": 641, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69676 - }, - "nif_Latn-eng_Latn": { - "min_sentence1_length": 48, - "average_sentence1_length": 165.8671875, - "max_sentence1_length": 641, - "min_sentence2_length": 37, - "average_sentence2_length": 106.3046875, - "max_sentence2_length": 245, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69676 - }, - "eng_Latn-nii_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.26953125, - "max_sentence1_length": 273, - "min_sentence2_length": 59, - "average_sentence2_length": 255.140625, - "max_sentence2_length": 984, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93801 - }, - "nii_Latn-eng_Latn": { - "min_sentence1_length": 59, - "average_sentence1_length": 255.140625, - "max_sentence1_length": 984, - "min_sentence2_length": 24, - "average_sentence2_length": 111.26953125, - "max_sentence2_length": 273, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93801 - }, - "eng_Latn-nin_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 41, - "average_sentence2_length": 157.5703125, - "max_sentence2_length": 411, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69111 - }, - "nin_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 157.5703125, - "max_sentence1_length": 411, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69111 - }, - "eng_Latn-nko_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.01171875, - "max_sentence1_length": 227, - "min_sentence2_length": 23, - "average_sentence2_length": 130.33203125, - "max_sentence2_length": 394, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62040 - }, - "nko_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 130.33203125, - "max_sentence1_length": 394, - "min_sentence2_length": 24, - "average_sentence2_length": 112.01171875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62040 - }, - "eng_Latn-nld_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.88671875, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 119.83203125, - "max_sentence2_length": 226, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60088 - }, - "nld_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 119.83203125, - "max_sentence1_length": 226, - "min_sentence2_length": 24, - "average_sentence2_length": 114.88671875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60088 - }, - "eng_Latn-nlg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.23046875, - "max_sentence1_length": 376, - "min_sentence2_length": 40, - "average_sentence2_length": 182.6015625, - "max_sentence2_length": 525, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75477 - }, - "nlg_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 182.6015625, - "max_sentence1_length": 525, - "min_sentence2_length": 24, - "average_sentence2_length": 112.23046875, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75477 - }, - "eng_Latn-nna_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 117.91796875, - "max_sentence1_length": 257, - "min_sentence2_length": 34, - "average_sentence2_length": 318.4765625, - "max_sentence2_length": 2306, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 111717 - }, - "nna_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 318.4765625, - "max_sentence1_length": 2306, - "min_sentence2_length": 23, - "average_sentence2_length": 117.91796875, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 111717 - }, - "eng_Latn-nnq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 25, - "average_sentence2_length": 125.62109375, - "max_sentence2_length": 323, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60932 - }, - "nnq_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 125.62109375, - "max_sentence1_length": 323, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60932 - }, - "eng_Latn-noa_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 147.40625, - "max_sentence1_length": 341, - "min_sentence2_length": 42, - "average_sentence2_length": 267.203125, - "max_sentence2_length": 709, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106140 - }, - "noa_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 267.203125, - "max_sentence1_length": 709, - "min_sentence2_length": 35, - "average_sentence2_length": 147.40625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106140 - }, - "eng_Latn-nop_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.6640625, - "max_sentence1_length": 827, - "min_sentence2_length": 42, - "average_sentence2_length": 176.57421875, - "max_sentence2_length": 511, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74301 - }, - "nop_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 176.57421875, - "max_sentence1_length": 511, - "min_sentence2_length": 24, - "average_sentence2_length": 113.6640625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74301 - }, - "eng_Latn-not_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.58984375, - "max_sentence1_length": 227, - "min_sentence2_length": 34, - "average_sentence2_length": 228.4375, - "max_sentence2_length": 807, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86791 - }, - "not_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 228.4375, - "max_sentence1_length": 807, - "min_sentence2_length": 24, - "average_sentence2_length": 110.58984375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86791 - }, - "eng_Latn-nou_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.13671875, - "max_sentence1_length": 228, - "min_sentence2_length": 3, - "average_sentence2_length": 214.30859375, - "max_sentence2_length": 811, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83058 - }, - "nou_Latn-eng_Latn": { - "min_sentence1_length": 3, - "average_sentence1_length": 214.30859375, - "max_sentence1_length": 811, - "min_sentence2_length": 24, - "average_sentence2_length": 110.13671875, - "max_sentence2_length": 228, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83058 - }, - "eng_Latn-npi_Deva": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 24, - "average_sentence2_length": 120.40234375, - "max_sentence2_length": 313, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59596 - }, - "npi_Deva-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 120.40234375, - "max_sentence1_length": 313, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59596 - }, - "eng_Latn-npl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 31, - "average_sentence2_length": 146.73828125, - "max_sentence2_length": 337, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66338 - }, - "npl_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 146.73828125, - "max_sentence1_length": 337, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66338 - }, - "eng_Latn-nsn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.8046875, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 192.5078125, - "max_sentence2_length": 683, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77648 - }, - "nsn_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 192.5078125, - "max_sentence1_length": 683, - "min_sentence2_length": 24, - "average_sentence2_length": 110.8046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77648 - }, - "eng_Latn-nss_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 112.18359375, - "max_sentence1_length": 245, - "min_sentence2_length": 44, - "average_sentence2_length": 167.26171875, - "max_sentence2_length": 449, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71538 - }, - "nss_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 167.26171875, - "max_sentence1_length": 449, - "min_sentence2_length": 37, - "average_sentence2_length": 112.18359375, - "max_sentence2_length": 245, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71538 - }, - "eng_Latn-ntj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.59765625, - "max_sentence1_length": 227, - "min_sentence2_length": 56, - "average_sentence2_length": 258.41796875, - "max_sentence2_length": 857, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94468 - }, - "ntj_Latn-eng_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 258.41796875, - "max_sentence1_length": 857, - "min_sentence2_length": 24, - "average_sentence2_length": 110.59765625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94468 - }, - "eng_Latn-ntp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.97265625, - "max_sentence1_length": 230, - "min_sentence2_length": 31, - "average_sentence2_length": 239.52734375, - "max_sentence2_length": 1474, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89472 - }, - "ntp_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 239.52734375, - "max_sentence1_length": 1474, - "min_sentence2_length": 24, - "average_sentence2_length": 109.97265625, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89472 - }, - "eng_Latn-ntu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.15625, - "max_sentence1_length": 273, - "min_sentence2_length": 26, - "average_sentence2_length": 241.875, - "max_sentence2_length": 1550, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91144 - }, - "ntu_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 241.875, - "max_sentence1_length": 1550, - "min_sentence2_length": 24, - "average_sentence2_length": 114.15625, - "max_sentence2_length": 273, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91144 - }, - "eng_Latn-nuy_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 112.17578125, - "max_sentence1_length": 227, - "min_sentence2_length": 86, - "average_sentence2_length": 313.828125, - "max_sentence2_length": 1006, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 109057 - }, - "nuy_Latn-eng_Latn": { - "min_sentence1_length": 86, - "average_sentence1_length": 313.828125, - "max_sentence1_length": 1006, - "min_sentence2_length": 32, - "average_sentence2_length": 112.17578125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 109057 - }, - "eng_Latn-nvm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.984375, - "max_sentence1_length": 232, - "min_sentence2_length": 43, - "average_sentence2_length": 257.30859375, - "max_sentence2_length": 979, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94027 - }, - "nvm_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 257.30859375, - "max_sentence1_length": 979, - "min_sentence2_length": 24, - "average_sentence2_length": 109.984375, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94027 - }, - "eng_Latn-nwi_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.6796875, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 219.140625, - "max_sentence2_length": 738, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84946 - }, - "nwi_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 219.140625, - "max_sentence1_length": 738, - "min_sentence2_length": 24, - "average_sentence2_length": 112.6796875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84946 - }, - "eng_Latn-nya_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 25, - "average_sentence2_length": 127.3203125, - "max_sentence2_length": 328, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61367 - }, - "nya_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 127.3203125, - "max_sentence1_length": 328, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61367 - }, - "eng_Latn-nys_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 114.35245901639344, - "max_sentence1_length": 268, - "min_sentence2_length": 50, - "average_sentence2_length": 230.327868852459, - "max_sentence2_length": 1366, - "num_samples": 122, - "num_samples_sentence2": 122, - "number_of_characters": 42051 - }, - "nys_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 230.327868852459, - "max_sentence1_length": 1366, - "min_sentence2_length": 37, - "average_sentence2_length": 114.35245901639344, - "max_sentence2_length": 268, - "num_samples": 122, - "num_samples_sentence2": 122, - "number_of_characters": 42051 - }, - "eng_Latn-nyu_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 110.2890625, - "max_sentence1_length": 257, - "min_sentence2_length": 34, - "average_sentence2_length": 126.015625, - "max_sentence2_length": 288, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60494 - }, - "nyu_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 126.015625, - "max_sentence1_length": 288, - "min_sentence2_length": 37, - "average_sentence2_length": 110.2890625, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60494 - }, - "eng_Latn-obo_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 115.52734375, - "max_sentence1_length": 827, - "min_sentence2_length": 42, - "average_sentence2_length": 196.0859375, - "max_sentence2_length": 668, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79773 - }, - "obo_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 196.0859375, - "max_sentence1_length": 668, - "min_sentence2_length": 31, - "average_sentence2_length": 115.52734375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79773 - }, - "eng_Latn-okv_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 146.515625, - "max_sentence1_length": 341, - "min_sentence2_length": 50, - "average_sentence2_length": 174.48046875, - "max_sentence2_length": 807, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82175 - }, - "okv_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 174.48046875, - "max_sentence1_length": 807, - "min_sentence2_length": 35, - "average_sentence2_length": 146.515625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82175 - }, - "eng_Latn-omw_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 122.4375, - "max_sentence1_length": 290, - "min_sentence2_length": 58, - "average_sentence2_length": 202.26171875, - "max_sentence2_length": 460, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83123 - }, - "omw_Latn-eng_Latn": { - "min_sentence1_length": 58, - "average_sentence1_length": 202.26171875, - "max_sentence1_length": 460, - "min_sentence2_length": 37, - "average_sentence2_length": 122.4375, - "max_sentence2_length": 290, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83123 - }, - "eng_Latn-ong_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.62890625, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 222.56640625, - "max_sentence2_length": 864, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86322 - }, - "ong_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 222.56640625, - "max_sentence1_length": 864, - "min_sentence2_length": 31, - "average_sentence2_length": 114.62890625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86322 - }, - "eng_Latn-ons_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.984375, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 188.4609375, - "max_sentence2_length": 580, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77426 - }, - "ons_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 188.4609375, - "max_sentence1_length": 580, - "min_sentence2_length": 24, - "average_sentence2_length": 113.984375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77426 - }, - "eng_Latn-ood_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.00390625, - "max_sentence1_length": 239, - "min_sentence2_length": 41, - "average_sentence2_length": 199.0078125, - "max_sentence2_length": 631, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79363 - }, - "ood_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 199.0078125, - "max_sentence1_length": 631, - "min_sentence2_length": 24, - "average_sentence2_length": 111.00390625, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79363 - }, - "eng_Latn-opm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.70703125, - "max_sentence1_length": 227, - "min_sentence2_length": 59, - "average_sentence2_length": 298.8671875, - "max_sentence2_length": 1026, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104595 - }, - "opm_Latn-eng_Latn": { - "min_sentence1_length": 59, - "average_sentence1_length": 298.8671875, - "max_sentence1_length": 1026, - "min_sentence2_length": 24, - "average_sentence2_length": 109.70703125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104595 - }, - "eng_Latn-ory_Orya": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.76171875, - "max_sentence1_length": 827, - "min_sentence2_length": 23, - "average_sentence2_length": 120.41015625, - "max_sentence2_length": 279, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60460 - }, - "ory_Orya-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 120.41015625, - "max_sentence1_length": 279, - "min_sentence2_length": 24, - "average_sentence2_length": 115.76171875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60460 - }, - "eng_Latn-ote_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.55078125, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 167.05078125, - "max_sentence2_length": 424, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72346 - }, - "ote_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 167.05078125, - "max_sentence1_length": 424, - "min_sentence2_length": 24, - "average_sentence2_length": 115.55078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72346 - }, - "eng_Latn-otm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.12890625, - "max_sentence1_length": 827, - "min_sentence2_length": 34, - "average_sentence2_length": 164.359375, - "max_sentence2_length": 500, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71293 - }, - "otm_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 164.359375, - "max_sentence1_length": 500, - "min_sentence2_length": 24, - "average_sentence2_length": 114.12890625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71293 - }, - "eng_Latn-otn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.01953125, - "max_sentence1_length": 827, - "min_sentence2_length": 41, - "average_sentence2_length": 199.9765625, - "max_sentence2_length": 741, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80639 - }, - "otn_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 199.9765625, - "max_sentence1_length": 741, - "min_sentence2_length": 24, - "average_sentence2_length": 115.01953125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80639 - }, - "eng_Latn-otq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 130.1953125, - "max_sentence2_length": 328, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62865 - }, - "otq_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 130.1953125, - "max_sentence1_length": 328, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62865 - }, - "eng_Latn-ots_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.46875, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 265.98828125, - "max_sentence2_length": 818, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97141 - }, - "ots_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 265.98828125, - "max_sentence1_length": 818, - "min_sentence2_length": 24, - "average_sentence2_length": 113.46875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97141 - }, - "eng_Latn-pab_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 40, - "average_sentence2_length": 164.875, - "max_sentence2_length": 422, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70981 - }, - "pab_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 164.875, - "max_sentence1_length": 422, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70981 - }, - "eng_Latn-pad_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 110.1796875, - "max_sentence1_length": 230, - "min_sentence2_length": 35, - "average_sentence2_length": 313.1875, - "max_sentence2_length": 1173, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 108382 - }, - "pad_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 313.1875, - "max_sentence1_length": 1173, - "min_sentence2_length": 31, - "average_sentence2_length": 110.1796875, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 108382 - }, - "eng_Latn-pah_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.34375, - "max_sentence1_length": 216, - "min_sentence2_length": 40, - "average_sentence2_length": 293.7109375, - "max_sentence2_length": 1340, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103694 - }, - "pah_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 293.7109375, - "max_sentence1_length": 1340, - "min_sentence2_length": 24, - "average_sentence2_length": 111.34375, - "max_sentence2_length": 216, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 103694 - }, - "eng_Latn-pan_Guru": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 18, - "average_sentence2_length": 109.73046875, - "max_sentence2_length": 287, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56864 - }, - "pan_Guru-eng_Latn": { - "min_sentence1_length": 18, - "average_sentence1_length": 109.73046875, - "max_sentence1_length": 287, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56864 - }, - "eng_Latn-pao_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 116.69140625, - "max_sentence1_length": 254, - "min_sentence2_length": 27, - "average_sentence2_length": 266.83984375, - "max_sentence2_length": 1493, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98184 - }, - "pao_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 266.83984375, - "max_sentence1_length": 1493, - "min_sentence2_length": 24, - "average_sentence2_length": 116.69140625, - "max_sentence2_length": 254, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98184 - }, - "eng_Latn-pes_Arab": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.51953125, - "max_sentence1_length": 827, - "min_sentence2_length": 17, - "average_sentence2_length": 87.43359375, - "max_sentence2_length": 190, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 51444 - }, - "pes_Arab-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 87.43359375, - "max_sentence1_length": 190, - "min_sentence2_length": 24, - "average_sentence2_length": 113.51953125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 51444 - }, - "eng_Latn-pib_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.3046875, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 130.75390625, - "max_sentence2_length": 409, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62991 - }, - "pib_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 130.75390625, - "max_sentence1_length": 409, - "min_sentence2_length": 24, - "average_sentence2_length": 115.3046875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62991 - }, - "eng_Latn-pio_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.78515625, - "max_sentence1_length": 227, - "min_sentence2_length": 44, - "average_sentence2_length": 292.265625, - "max_sentence2_length": 997, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102925 - }, - "pio_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 292.265625, - "max_sentence1_length": 997, - "min_sentence2_length": 24, - "average_sentence2_length": 109.78515625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102925 - }, - "eng_Latn-pir_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.71484375, - "max_sentence1_length": 227, - "min_sentence2_length": 31, - "average_sentence2_length": 273.71484375, - "max_sentence2_length": 1050, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98670 - }, - "pir_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 273.71484375, - "max_sentence1_length": 1050, - "min_sentence2_length": 31, - "average_sentence2_length": 111.71484375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98670 - }, - "eng_Latn-piu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.66015625, - "max_sentence1_length": 233, - "min_sentence2_length": 62, - "average_sentence2_length": 426.9921875, - "max_sentence2_length": 1444, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 138919 - }, - "piu_Latn-eng_Latn": { - "min_sentence1_length": 62, - "average_sentence1_length": 426.9921875, - "max_sentence1_length": 1444, - "min_sentence2_length": 24, - "average_sentence2_length": 115.66015625, - "max_sentence2_length": 233, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 138919 - }, - "eng_Latn-pjt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.1953125, - "max_sentence1_length": 235, - "min_sentence2_length": 50, - "average_sentence2_length": 404.37109375, - "max_sentence2_length": 1358, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 131729 - }, - "pjt_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 404.37109375, - "max_sentence1_length": 1358, - "min_sentence2_length": 24, - "average_sentence2_length": 110.1953125, - "max_sentence2_length": 235, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 131729 - }, - "eng_Latn-pls_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.71484375, - "max_sentence1_length": 827, - "min_sentence2_length": 40, - "average_sentence2_length": 190.99609375, - "max_sentence2_length": 497, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78006 - }, - "pls_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 190.99609375, - "max_sentence1_length": 497, - "min_sentence2_length": 24, - "average_sentence2_length": 113.71484375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78006 - }, - "eng_Latn-plu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.98828125, - "max_sentence1_length": 376, - "min_sentence2_length": 44, - "average_sentence2_length": 212.9375, - "max_sentence2_length": 1292, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82925 - }, - "plu_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 212.9375, - "max_sentence1_length": 1292, - "min_sentence2_length": 24, - "average_sentence2_length": 110.98828125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82925 - }, - "eng_Latn-pma_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.41015625, - "max_sentence1_length": 827, - "min_sentence2_length": 28, - "average_sentence2_length": 188.00390625, - "max_sentence2_length": 549, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77418 - }, - "pma_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 188.00390625, - "max_sentence1_length": 549, - "min_sentence2_length": 24, - "average_sentence2_length": 114.41015625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77418 - }, - "eng_Latn-poe_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.53515625, - "max_sentence1_length": 827, - "min_sentence2_length": 39, - "average_sentence2_length": 171.234375, - "max_sentence2_length": 435, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72901 - }, - "poe_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 171.234375, - "max_sentence1_length": 435, - "min_sentence2_length": 24, - "average_sentence2_length": 113.53515625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72901 - }, - "eng_Latn-poh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 113, - "average_sentence2_length": 352.9453125, - "max_sentence2_length": 889, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 119889 - }, - "poh_Latn-eng_Latn": { - "min_sentence1_length": 113, - "average_sentence1_length": 352.9453125, - "max_sentence1_length": 889, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 119889 - }, - "eng_Latn-poi_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.55078125, - "max_sentence1_length": 827, - "min_sentence2_length": 40, - "average_sentence2_length": 169.44921875, - "max_sentence2_length": 390, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72960 - }, - "poi_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 169.44921875, - "max_sentence1_length": 390, - "min_sentence2_length": 24, - "average_sentence2_length": 115.55078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72960 - }, - "eng_Latn-pol_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 146.67578125, - "max_sentence1_length": 341, - "min_sentence2_length": 33, - "average_sentence2_length": 133.08203125, - "max_sentence2_length": 307, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71618 - }, - "pol_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 133.08203125, - "max_sentence1_length": 307, - "min_sentence2_length": 35, - "average_sentence2_length": 146.67578125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71618 - }, - "eng_Latn-pon_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 37, - "average_sentence2_length": 148.0234375, - "max_sentence2_length": 401, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75669 - }, - "pon_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 148.0234375, - "max_sentence1_length": 401, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75669 - }, - "eng_Latn-por_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 53, - "average_sentence2_length": 161.03125, - "max_sentence2_length": 398, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78999 - }, - "por_Latn-eng_Latn": { - "min_sentence1_length": 53, - "average_sentence1_length": 161.03125, - "max_sentence1_length": 398, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78999 - }, - "eng_Latn-poy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 25, - "average_sentence2_length": 142.625, - "max_sentence2_length": 360, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65285 - }, - "poy_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 142.625, - "max_sentence1_length": 360, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65285 - }, - "eng_Latn-ppo_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 108.71875, - "max_sentence1_length": 227, - "min_sentence2_length": 59, - "average_sentence2_length": 275.375, - "max_sentence2_length": 1155, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98328 - }, - "ppo_Latn-eng_Latn": { - "min_sentence1_length": 59, - "average_sentence1_length": 275.375, - "max_sentence1_length": 1155, - "min_sentence2_length": 25, - "average_sentence2_length": 108.71875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 98328 - }, - "eng_Latn-prf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.51953125, - "max_sentence1_length": 827, - "min_sentence2_length": 33, - "average_sentence2_length": 141.7890625, - "max_sentence2_length": 318, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65871 - }, - "prf_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 141.7890625, - "max_sentence1_length": 318, - "min_sentence2_length": 24, - "average_sentence2_length": 115.51953125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65871 - }, - "eng_Latn-pri_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.42578125, - "max_sentence1_length": 232, - "min_sentence2_length": 23, - "average_sentence2_length": 234.76171875, - "max_sentence2_length": 691, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88880 - }, - "pri_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 234.76171875, - "max_sentence1_length": 691, - "min_sentence2_length": 24, - "average_sentence2_length": 112.42578125, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88880 - }, - "eng_Latn-ptp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.2421875, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 184.25390625, - "max_sentence2_length": 592, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76415 - }, - "ptp_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 184.25390625, - "max_sentence1_length": 592, - "min_sentence2_length": 24, - "average_sentence2_length": 114.2421875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76415 - }, - "eng_Latn-ptu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.14453125, - "max_sentence1_length": 227, - "min_sentence2_length": 31, - "average_sentence2_length": 186.8203125, - "max_sentence2_length": 566, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76023 - }, - "ptu_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 186.8203125, - "max_sentence1_length": 566, - "min_sentence2_length": 24, - "average_sentence2_length": 110.14453125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76023 - }, - "eng_Latn-pwg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.4921875, - "max_sentence1_length": 376, - "min_sentence2_length": 32, - "average_sentence2_length": 168.41015625, - "max_sentence2_length": 546, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71655 - }, - "pwg_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 168.41015625, - "max_sentence1_length": 546, - "min_sentence2_length": 24, - "average_sentence2_length": 111.4921875, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71655 - }, - "eng_Latn-qub_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 116.26953125, - "max_sentence1_length": 827, - "min_sentence2_length": 34, - "average_sentence2_length": 162.5703125, - "max_sentence2_length": 540, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71383 - }, - "qub_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 162.5703125, - "max_sentence1_length": 540, - "min_sentence2_length": 24, - "average_sentence2_length": 116.26953125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71383 - }, - "eng_Latn-quc_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.15234375, - "max_sentence1_length": 341, - "min_sentence2_length": 61, - "average_sentence2_length": 217.57421875, - "max_sentence2_length": 504, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93370 - }, - "quc_Latn-eng_Latn": { - "min_sentence1_length": 61, - "average_sentence1_length": 217.57421875, - "max_sentence1_length": 504, - "min_sentence2_length": 56, - "average_sentence2_length": 147.15234375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93370 - }, - "eng_Latn-quf_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 110.26953125, - "max_sentence1_length": 227, - "min_sentence2_length": 61, - "average_sentence2_length": 236.96875, - "max_sentence2_length": 657, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88893 - }, - "quf_Latn-eng_Latn": { - "min_sentence1_length": 61, - "average_sentence1_length": 236.96875, - "max_sentence1_length": 657, - "min_sentence2_length": 31, - "average_sentence2_length": 110.26953125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88893 - }, - "eng_Latn-quh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.81640625, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 163.36328125, - "max_sentence2_length": 600, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71214 - }, - "quh_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 163.36328125, - "max_sentence1_length": 600, - "min_sentence2_length": 24, - "average_sentence2_length": 114.81640625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71214 - }, - "eng_Latn-qul_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.6328125, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 130.6796875, - "max_sentence2_length": 286, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63056 - }, - "qul_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 130.6796875, - "max_sentence1_length": 286, - "min_sentence2_length": 24, - "average_sentence2_length": 115.6328125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63056 - }, - "eng_Latn-qup_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.0, - "max_sentence1_length": 227, - "min_sentence2_length": 44, - "average_sentence2_length": 286.640625, - "max_sentence2_length": 955, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101796 - }, - "qup_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 286.640625, - "max_sentence1_length": 955, - "min_sentence2_length": 24, - "average_sentence2_length": 111.0, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101796 - }, - "eng_Latn-qvc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.984375, - "max_sentence1_length": 230, - "min_sentence2_length": 54, - "average_sentence2_length": 229.4765625, - "max_sentence2_length": 807, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87158 - }, - "qvc_Latn-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 229.4765625, - "max_sentence1_length": 807, - "min_sentence2_length": 24, - "average_sentence2_length": 110.984375, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87158 - }, - "eng_Latn-qve_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 63, - "average_sentence2_length": 217.6484375, - "max_sentence2_length": 630, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84491 - }, - "qve_Latn-eng_Latn": { - "min_sentence1_length": 63, - "average_sentence1_length": 217.6484375, - "max_sentence1_length": 630, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84491 - }, - "eng_Latn-qvh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.80078125, - "max_sentence1_length": 271, - "min_sentence2_length": 32, - "average_sentence2_length": 230.640625, - "max_sentence2_length": 759, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88177 - }, - "qvh_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 230.640625, - "max_sentence1_length": 759, - "min_sentence2_length": 24, - "average_sentence2_length": 113.80078125, - "max_sentence2_length": 271, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88177 - }, - "eng_Latn-qvm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.80078125, - "max_sentence1_length": 271, - "min_sentence2_length": 36, - "average_sentence2_length": 238.71484375, - "max_sentence2_length": 762, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90244 - }, - "qvm_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 238.71484375, - "max_sentence1_length": 762, - "min_sentence2_length": 24, - "average_sentence2_length": 113.80078125, - "max_sentence2_length": 271, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90244 - }, - "eng_Latn-qvn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.42578125, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 175.89453125, - "max_sentence2_length": 523, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74578 - }, - "qvn_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 175.89453125, - "max_sentence1_length": 523, - "min_sentence2_length": 24, - "average_sentence2_length": 115.42578125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74578 - }, - "eng_Latn-qvs_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.2265625, - "max_sentence1_length": 827, - "min_sentence2_length": 47, - "average_sentence2_length": 201.8828125, - "max_sentence2_length": 637, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80924 - }, - "qvs_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 201.8828125, - "max_sentence1_length": 637, - "min_sentence2_length": 24, - "average_sentence2_length": 114.2265625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80924 - }, - "eng_Latn-qvw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.88671875, - "max_sentence1_length": 227, - "min_sentence2_length": 41, - "average_sentence2_length": 174.16015625, - "max_sentence2_length": 539, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73228 - }, - "qvw_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 174.16015625, - "max_sentence1_length": 539, - "min_sentence2_length": 24, - "average_sentence2_length": 111.88671875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73228 - }, - "eng_Latn-qvz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.54296875, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 202.98046875, - "max_sentence2_length": 625, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81286 - }, - "qvz_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 202.98046875, - "max_sentence1_length": 625, - "min_sentence2_length": 24, - "average_sentence2_length": 114.54296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81286 - }, - "eng_Latn-qwh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.5078125, - "max_sentence1_length": 227, - "min_sentence2_length": 38, - "average_sentence2_length": 173.35546875, - "max_sentence2_length": 490, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72925 - }, - "qwh_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 173.35546875, - "max_sentence1_length": 490, - "min_sentence2_length": 24, - "average_sentence2_length": 111.5078125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72925 - }, - "eng_Latn-qxh_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.95703125, - "max_sentence1_length": 227, - "min_sentence2_length": 31, - "average_sentence2_length": 188.90625, - "max_sentence2_length": 569, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76253 - }, - "qxh_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 188.90625, - "max_sentence1_length": 569, - "min_sentence2_length": 24, - "average_sentence2_length": 108.95703125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76253 - }, - "eng_Latn-qxn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.30078125, - "max_sentence1_length": 827, - "min_sentence2_length": 48, - "average_sentence2_length": 196.8671875, - "max_sentence2_length": 528, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79403 - }, - "qxn_Latn-eng_Latn": { - "min_sentence1_length": 48, - "average_sentence1_length": 196.8671875, - "max_sentence1_length": 528, - "min_sentence2_length": 24, - "average_sentence2_length": 113.30078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79403 - }, - "eng_Latn-qxo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.046875, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 246.4609375, - "max_sentence2_length": 910, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91522 - }, - "qxo_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 246.4609375, - "max_sentence1_length": 910, - "min_sentence2_length": 24, - "average_sentence2_length": 111.046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91522 - }, - "eng_Latn-rai_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.71875, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 177.04296875, - "max_sentence2_length": 445, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74435 - }, - "rai_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 177.04296875, - "max_sentence1_length": 445, - "min_sentence2_length": 24, - "average_sentence2_length": 113.71875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74435 - }, - "eng_Latn-reg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.75390625, - "max_sentence1_length": 227, - "min_sentence2_length": 24, - "average_sentence2_length": 134.6328125, - "max_sentence2_length": 342, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62819 - }, - "reg_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 134.6328125, - "max_sentence1_length": 342, - "min_sentence2_length": 24, - "average_sentence2_length": 110.75390625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62819 - }, - "eng_Latn-rgu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.55859375, - "max_sentence1_length": 251, - "min_sentence2_length": 38, - "average_sentence2_length": 265.78125, - "max_sentence2_length": 1233, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97367 - }, - "rgu_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 265.78125, - "max_sentence1_length": 1233, - "min_sentence2_length": 24, - "average_sentence2_length": 114.55859375, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97367 - }, - "eng_Latn-rkb_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 109.2421875, - "max_sentence1_length": 251, - "min_sentence2_length": 35, - "average_sentence2_length": 305.28125, - "max_sentence2_length": 1129, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106118 - }, - "rkb_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 305.28125, - "max_sentence1_length": 1129, - "min_sentence2_length": 25, - "average_sentence2_length": 109.2421875, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106118 - }, - "eng_Latn-rmc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.87109375, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 126.6015625, - "max_sentence2_length": 288, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61817 - }, - "rmc_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 126.6015625, - "max_sentence1_length": 288, - "min_sentence2_length": 24, - "average_sentence2_length": 114.87109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61817 - }, - "eng_Latn-rmy_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 100.94140625, - "max_sentence1_length": 218, - "min_sentence2_length": 26, - "average_sentence2_length": 106.82421875, - "max_sentence2_length": 247, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 53188 - }, - "rmy_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 106.82421875, - "max_sentence1_length": 247, - "min_sentence2_length": 26, - "average_sentence2_length": 100.94140625, - "max_sentence2_length": 218, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 53188 - }, - "eng_Latn-ron_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 54, - "average_sentence2_length": 143.33203125, - "max_sentence2_length": 339, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74468 - }, - "ron_Latn-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 143.33203125, - "max_sentence1_length": 339, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74468 - }, - "eng_Latn-roo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.71484375, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 285.2890625, - "max_sentence2_length": 1183, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101889 - }, - "roo_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 285.2890625, - "max_sentence1_length": 1183, - "min_sentence2_length": 24, - "average_sentence2_length": 112.71484375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101889 - }, - "eng_Latn-rop_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.8125, - "max_sentence1_length": 231, - "min_sentence2_length": 43, - "average_sentence2_length": 269.47265625, - "max_sentence2_length": 844, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97353 - }, - "rop_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 269.47265625, - "max_sentence1_length": 844, - "min_sentence2_length": 24, - "average_sentence2_length": 110.8125, - "max_sentence2_length": 231, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97353 - }, - "eng_Latn-row_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.76953125, - "max_sentence1_length": 251, - "min_sentence2_length": 33, - "average_sentence2_length": 257.19140625, - "max_sentence2_length": 1241, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 95222 - }, - "row_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 257.19140625, - "max_sentence1_length": 1241, - "min_sentence2_length": 24, - "average_sentence2_length": 114.76953125, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 95222 - }, - "eng_Latn-rro_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 150.671875, - "max_sentence2_length": 352, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68107 - }, - "rro_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 150.671875, - "max_sentence1_length": 352, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68107 - }, - "eng_Latn-ruf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 30, - "average_sentence2_length": 130.6015625, - "max_sentence2_length": 316, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62207 - }, - "ruf_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 130.6015625, - "max_sentence1_length": 316, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62207 - }, - "eng_Latn-rug_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.87109375, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 174.76171875, - "max_sentence2_length": 480, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74146 - }, - "rug_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 174.76171875, - "max_sentence1_length": 480, - "min_sentence2_length": 24, - "average_sentence2_length": 114.87109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74146 - }, - "eng_Latn-rus_Cyrl": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 17, - "average_sentence2_length": 92.38671875, - "max_sentence2_length": 233, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 52947 - }, - "rus_Cyrl-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 92.38671875, - "max_sentence1_length": 233, - "min_sentence2_length": 24, - "average_sentence2_length": 114.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 52947 - }, - "eng_Latn-rwo_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 145.0625, - "max_sentence1_length": 341, - "min_sentence2_length": 85, - "average_sentence2_length": 306.8515625, - "max_sentence2_length": 871, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 115690 - }, - "rwo_Latn-eng_Latn": { - "min_sentence1_length": 85, - "average_sentence1_length": 306.8515625, - "max_sentence1_length": 871, - "min_sentence2_length": 35, - "average_sentence2_length": 145.0625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 115690 - }, - "eng_Latn-sab_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.7109375, - "max_sentence1_length": 246, - "min_sentence2_length": 29, - "average_sentence2_length": 446.453125, - "max_sentence2_length": 1781, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 143658 - }, - "sab_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 446.453125, - "max_sentence1_length": 1781, - "min_sentence2_length": 24, - "average_sentence2_length": 114.7109375, - "max_sentence2_length": 246, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 143658 - }, - "eng_Latn-san_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 2, - "average_sentence2_length": 159.125, - "max_sentence2_length": 429, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77941 - }, - "san_Latn-eng_Latn": { - "min_sentence1_length": 2, - "average_sentence1_length": 159.125, - "max_sentence1_length": 429, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77941 - }, - "eng_Latn-sbe_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.0546875, - "max_sentence1_length": 239, - "min_sentence2_length": 31, - "average_sentence2_length": 179.20703125, - "max_sentence2_length": 666, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74051 - }, - "sbe_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 179.20703125, - "max_sentence1_length": 666, - "min_sentence2_length": 24, - "average_sentence2_length": 110.0546875, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74051 - }, - "eng_Latn-sbk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.32421875, - "max_sentence1_length": 827, - "min_sentence2_length": 17, - "average_sentence2_length": 112.703125, - "max_sentence2_length": 250, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58375 - }, - "sbk_Latn-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 112.703125, - "max_sentence1_length": 250, - "min_sentence2_length": 24, - "average_sentence2_length": 115.32421875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58375 - }, - "eng_Latn-sbs_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.984375, - "max_sentence1_length": 237, - "min_sentence2_length": 23, - "average_sentence2_length": 117.47265625, - "max_sentence2_length": 434, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58485 - }, - "sbs_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 117.47265625, - "max_sentence1_length": 434, - "min_sentence2_length": 24, - "average_sentence2_length": 110.984375, - "max_sentence2_length": 237, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58485 - }, - "eng_Latn-seh_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 109.953125, - "max_sentence1_length": 257, - "min_sentence2_length": 28, - "average_sentence2_length": 128.59765625, - "max_sentence2_length": 298, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61069 - }, - "seh_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 128.59765625, - "max_sentence1_length": 298, - "min_sentence2_length": 37, - "average_sentence2_length": 109.953125, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61069 - }, - "eng_Latn-sey_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.765625, - "max_sentence1_length": 827, - "min_sentence2_length": 34, - "average_sentence2_length": 168.8046875, - "max_sentence2_length": 483, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72594 - }, - "sey_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 168.8046875, - "max_sentence1_length": 483, - "min_sentence2_length": 24, - "average_sentence2_length": 114.765625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72594 - }, - "eng_Latn-sgb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.53515625, - "max_sentence1_length": 227, - "min_sentence2_length": 38, - "average_sentence2_length": 173.15234375, - "max_sentence2_length": 574, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72880 - }, - "sgb_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 173.15234375, - "max_sentence1_length": 574, - "min_sentence2_length": 24, - "average_sentence2_length": 111.53515625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72880 - }, - "eng_Latn-sgz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.1171875, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 245.6015625, - "max_sentence2_length": 964, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90808 - }, - "sgz_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 245.6015625, - "max_sentence1_length": 964, - "min_sentence2_length": 24, - "average_sentence2_length": 109.1171875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90808 - }, - "eng_Latn-shj_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 108.34065934065934, - "max_sentence1_length": 245, - "min_sentence2_length": 25, - "average_sentence2_length": 114.28571428571429, - "max_sentence2_length": 254, - "num_samples": 91, - "num_samples_sentence2": 91, - "number_of_characters": 20259 - }, - "shj_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 114.28571428571429, - "max_sentence1_length": 254, - "min_sentence2_length": 37, - "average_sentence2_length": 108.34065934065934, - "max_sentence2_length": 245, - "num_samples": 91, - "num_samples_sentence2": 91, - "number_of_characters": 20259 - }, - "eng_Latn-shp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 35, - "average_sentence2_length": 150.4765625, - "max_sentence2_length": 322, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67295 - }, - "shp_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 150.4765625, - "max_sentence1_length": 322, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67295 - }, - "eng_Latn-sim_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.4453125, - "max_sentence1_length": 238, - "min_sentence2_length": 47, - "average_sentence2_length": 246.4921875, - "max_sentence2_length": 975, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92400 - }, - "sim_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 246.4921875, - "max_sentence1_length": 975, - "min_sentence2_length": 31, - "average_sentence2_length": 114.4453125, - "max_sentence2_length": 238, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92400 - }, - "eng_Latn-sja_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.34765625, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 213.08984375, - "max_sentence2_length": 513, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84080 - }, - "sja_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 213.08984375, - "max_sentence1_length": 513, - "min_sentence2_length": 24, - "average_sentence2_length": 115.34765625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84080 - }, - "eng_Latn-sll_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.078125, - "max_sentence1_length": 227, - "min_sentence2_length": 33, - "average_sentence2_length": 220.609375, - "max_sentence2_length": 1080, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84656 - }, - "sll_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 220.609375, - "max_sentence1_length": 1080, - "min_sentence2_length": 24, - "average_sentence2_length": 110.078125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84656 - }, - "eng_Latn-smk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.5078125, - "max_sentence1_length": 827, - "min_sentence2_length": 42, - "average_sentence2_length": 203.859375, - "max_sentence2_length": 567, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81502 - }, - "smk_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 203.859375, - "max_sentence1_length": 567, - "min_sentence2_length": 24, - "average_sentence2_length": 114.5078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81502 - }, - "eng_Latn-snc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.4921875, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 179.6328125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75296 - }, - "snc_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 179.6328125, - "max_sentence1_length": 827, - "min_sentence2_length": 24, - "average_sentence2_length": 114.4921875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75296 - }, - "eng_Latn-snn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.234375, - "max_sentence1_length": 233, - "min_sentence2_length": 36, - "average_sentence2_length": 254.91796875, - "max_sentence2_length": 841, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93479 - }, - "snn_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 254.91796875, - "max_sentence1_length": 841, - "min_sentence2_length": 24, - "average_sentence2_length": 110.234375, - "max_sentence2_length": 233, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93479 - }, - "eng_Latn-snp_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 49, - "average_sentence2_length": 219.875, - "max_sentence2_length": 619, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93493 - }, - "snp_Latn-eng_Latn": { - "min_sentence1_length": 49, - "average_sentence1_length": 219.875, - "max_sentence1_length": 619, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93493 - }, - "eng_Latn-snx_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 108.87857142857143, - "max_sentence1_length": 257, - "min_sentence2_length": 42, - "average_sentence2_length": 132.65, - "max_sentence2_length": 313, - "num_samples": 140, - "num_samples_sentence2": 140, - "number_of_characters": 33814 - }, - "snx_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 132.65, - "max_sentence1_length": 313, - "min_sentence2_length": 39, - "average_sentence2_length": 108.87857142857143, - "max_sentence2_length": 257, - "num_samples": 140, - "num_samples_sentence2": 140, - "number_of_characters": 33814 - }, - "eng_Latn-sny_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.15625, - "max_sentence1_length": 227, - "min_sentence2_length": 44, - "average_sentence2_length": 317.09765625, - "max_sentence2_length": 1158, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 109633 - }, - "sny_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 317.09765625, - "max_sentence1_length": 1158, - "min_sentence2_length": 24, - "average_sentence2_length": 111.15625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 109633 - }, - "eng_Latn-som_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 18, - "average_sentence2_length": 125.13671875, - "max_sentence2_length": 311, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61570 - }, - "som_Latn-eng_Latn": { - "min_sentence1_length": 18, - "average_sentence1_length": 125.13671875, - "max_sentence1_length": 311, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61570 - }, - "eng_Latn-soq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.28515625, - "max_sentence1_length": 227, - "min_sentence2_length": 29, - "average_sentence2_length": 215.83203125, - "max_sentence2_length": 727, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83230 - }, - "soq_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 215.83203125, - "max_sentence1_length": 727, - "min_sentence2_length": 24, - "average_sentence2_length": 109.28515625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83230 - }, - "eng_Latn-soy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.36328125, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 136.3359375, - "max_sentence2_length": 317, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64435 - }, - "soy_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 136.3359375, - "max_sentence1_length": 317, - "min_sentence2_length": 24, - "average_sentence2_length": 115.36328125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64435 - }, - "eng_Latn-spa_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 50, - "average_sentence2_length": 146.875, - "max_sentence2_length": 371, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75375 - }, - "spa_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 146.875, - "max_sentence1_length": 371, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75375 - }, - "eng_Latn-spl_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 112.36328125, - "max_sentence1_length": 265, - "min_sentence2_length": 47, - "average_sentence2_length": 401.6015625, - "max_sentence2_length": 1260, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 131575 - }, - "spl_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 401.6015625, - "max_sentence1_length": 1260, - "min_sentence2_length": 25, - "average_sentence2_length": 112.36328125, - "max_sentence2_length": 265, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 131575 - }, - "eng_Latn-spm_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 114.765625, - "max_sentence1_length": 257, - "min_sentence2_length": 41, - "average_sentence2_length": 219.171875, - "max_sentence2_length": 596, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85488 - }, - "spm_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 219.171875, - "max_sentence1_length": 596, - "min_sentence2_length": 38, - "average_sentence2_length": 114.765625, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85488 - }, - "eng_Latn-spp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.95703125, - "max_sentence1_length": 227, - "min_sentence2_length": 31, - "average_sentence2_length": 167.06640625, - "max_sentence2_length": 601, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71174 - }, - "spp_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 167.06640625, - "max_sentence1_length": 601, - "min_sentence2_length": 24, - "average_sentence2_length": 110.95703125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71174 - }, - "eng_Latn-sps_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.609375, - "max_sentence1_length": 827, - "min_sentence2_length": 38, - "average_sentence2_length": 242.5859375, - "max_sentence2_length": 789, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91698 - }, - "sps_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 242.5859375, - "max_sentence1_length": 789, - "min_sentence2_length": 24, - "average_sentence2_length": 115.609375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91698 - }, - "eng_Latn-spy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4140625, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 145.1640625, - "max_sentence2_length": 398, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66196 - }, - "spy_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 145.1640625, - "max_sentence1_length": 398, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4140625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66196 - }, - "eng_Latn-sri_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.62109375, - "max_sentence1_length": 227, - "min_sentence2_length": 29, - "average_sentence2_length": 208.41015625, - "max_sentence2_length": 543, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81672 - }, - "sri_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 208.41015625, - "max_sentence1_length": 543, - "min_sentence2_length": 24, - "average_sentence2_length": 110.62109375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81672 - }, - "eng_Latn-srm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.73046875, - "max_sentence1_length": 227, - "min_sentence2_length": 39, - "average_sentence2_length": 250.09765625, - "max_sentence2_length": 997, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92372 - }, - "srm_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 250.09765625, - "max_sentence1_length": 997, - "min_sentence2_length": 24, - "average_sentence2_length": 110.73046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92372 - }, - "eng_Latn-srn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.71484375, - "max_sentence1_length": 827, - "min_sentence2_length": 34, - "average_sentence2_length": 131.99609375, - "max_sentence2_length": 315, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63158 - }, - "srn_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 131.99609375, - "max_sentence1_length": 315, - "min_sentence2_length": 24, - "average_sentence2_length": 114.71484375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63158 - }, - "eng_Latn-srp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.67578125, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 89.53515625, - "max_sentence2_length": 220, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 52022 - }, - "srp_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 89.53515625, - "max_sentence1_length": 220, - "min_sentence2_length": 24, - "average_sentence2_length": 113.67578125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 52022 - }, - "eng_Latn-srq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.078125, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 165.5703125, - "max_sentence2_length": 532, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71590 - }, - "srq_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 165.5703125, - "max_sentence1_length": 532, - "min_sentence2_length": 24, - "average_sentence2_length": 114.078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71590 - }, - "eng_Latn-ssd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.75, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 174.31640625, - "max_sentence2_length": 502, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72465 - }, - "ssd_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 174.31640625, - "max_sentence1_length": 502, - "min_sentence2_length": 24, - "average_sentence2_length": 108.75, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72465 - }, - "eng_Latn-ssg_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.9296875, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 164.51171875, - "max_sentence2_length": 461, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71281 - }, - "ssg_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 164.51171875, - "max_sentence1_length": 461, - "min_sentence2_length": 24, - "average_sentence2_length": 113.9296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71281 - }, - "eng_Latn-ssx_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 116.21875, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 213.1640625, - "max_sentence2_length": 493, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84322 - }, - "ssx_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 213.1640625, - "max_sentence1_length": 493, - "min_sentence2_length": 24, - "average_sentence2_length": 116.21875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84322 - }, - "eng_Latn-stp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.640625, - "max_sentence1_length": 251, - "min_sentence2_length": 46, - "average_sentence2_length": 279.2734375, - "max_sentence2_length": 743, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99562 - }, - "stp_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 279.2734375, - "max_sentence1_length": 743, - "min_sentence2_length": 24, - "average_sentence2_length": 109.640625, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99562 - }, - "eng_Latn-sua_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 109.33984375, - "max_sentence1_length": 227, - "min_sentence2_length": 53, - "average_sentence2_length": 425.78515625, - "max_sentence2_length": 1832, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 136992 - }, - "sua_Latn-eng_Latn": { - "min_sentence1_length": 53, - "average_sentence1_length": 425.78515625, - "max_sentence1_length": 1832, - "min_sentence2_length": 25, - "average_sentence2_length": 109.33984375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 136992 - }, - "eng_Latn-sue_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.70703125, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 170.77734375, - "max_sentence2_length": 777, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73084 - }, - "sue_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 170.77734375, - "max_sentence1_length": 777, - "min_sentence2_length": 24, - "average_sentence2_length": 114.70703125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73084 - }, - "eng_Latn-sus_Arab": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 27, - "average_sentence2_length": 147.64453125, - "max_sentence2_length": 435, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75002 - }, - "sus_Arab-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 147.64453125, - "max_sentence1_length": 435, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75002 - }, - "eng_Latn-suz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 26, - "average_sentence2_length": 150.5703125, - "max_sentence2_length": 456, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67319 - }, - "suz_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 150.5703125, - "max_sentence1_length": 456, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67319 - }, - "eng_Latn-swe_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 33, - "average_sentence2_length": 274.54296875, - "max_sentence2_length": 1575, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99056 - }, - "swe_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 274.54296875, - "max_sentence1_length": 1575, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99056 - }, - "eng_Latn-swh_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 43, - "average_sentence2_length": 152.96484375, - "max_sentence2_length": 378, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76934 - }, - "swh_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 152.96484375, - "max_sentence1_length": 378, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76934 - }, - "eng_Latn-swp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.3125, - "max_sentence1_length": 227, - "min_sentence2_length": 24, - "average_sentence2_length": 129.51953125, - "max_sentence2_length": 378, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61397 - }, - "swp_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 129.51953125, - "max_sentence1_length": 378, - "min_sentence2_length": 24, - "average_sentence2_length": 110.3125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61397 - }, - "eng_Latn-sxb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.4453125, - "max_sentence1_length": 227, - "min_sentence2_length": 27, - "average_sentence2_length": 154.4921875, - "max_sentence2_length": 393, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67824 - }, - "sxb_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 154.4921875, - "max_sentence1_length": 393, - "min_sentence2_length": 24, - "average_sentence2_length": 110.4453125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67824 - }, - "eng_Latn-tac_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.203125, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 224.921875, - "max_sentence2_length": 673, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85792 - }, - "tac_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 224.921875, - "max_sentence1_length": 673, - "min_sentence2_length": 24, - "average_sentence2_length": 110.203125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85792 - }, - "eng_Latn-taj_Deva": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.34375, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 151.29296875, - "max_sentence2_length": 392, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67747 - }, - "taj_Deva-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 151.29296875, - "max_sentence1_length": 392, - "min_sentence2_length": 24, - "average_sentence2_length": 113.34375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67747 - }, - "eng_Latn-tam_Taml": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.296875, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 135.61328125, - "max_sentence2_length": 304, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64233 - }, - "tam_Taml-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 135.61328125, - "max_sentence1_length": 304, - "min_sentence2_length": 24, - "average_sentence2_length": 115.296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64233 - }, - "eng_Latn-tav_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.55859375, - "max_sentence1_length": 230, - "min_sentence2_length": 37, - "average_sentence2_length": 326.8125, - "max_sentence2_length": 1637, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 112479 - }, - "tav_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 326.8125, - "max_sentence1_length": 1637, - "min_sentence2_length": 24, - "average_sentence2_length": 112.55859375, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 112479 - }, - "eng_Latn-taw_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.49609375, - "max_sentence1_length": 227, - "min_sentence2_length": 41, - "average_sentence2_length": 227.09375, - "max_sentence2_length": 647, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86679 - }, - "taw_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 227.09375, - "max_sentence1_length": 647, - "min_sentence2_length": 31, - "average_sentence2_length": 111.49609375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86679 - }, - "eng_Latn-tbc_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 113.28515625, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 195.09765625, - "max_sentence2_length": 552, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78946 - }, - "tbc_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 195.09765625, - "max_sentence1_length": 552, - "min_sentence2_length": 21, - "average_sentence2_length": 113.28515625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78946 - }, - "eng_Latn-tbf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.50390625, - "max_sentence1_length": 227, - "min_sentence2_length": 35, - "average_sentence2_length": 170.6328125, - "max_sentence2_length": 527, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71971 - }, - "tbf_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 170.6328125, - "max_sentence1_length": 527, - "min_sentence2_length": 24, - "average_sentence2_length": 110.50390625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71971 - }, - "eng_Latn-tbg_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 105.53515625, - "max_sentence1_length": 217, - "min_sentence2_length": 18, - "average_sentence2_length": 176.765625, - "max_sentence2_length": 557, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72269 - }, - "tbg_Latn-eng_Latn": { - "min_sentence1_length": 18, - "average_sentence1_length": 176.765625, - "max_sentence1_length": 557, - "min_sentence2_length": 29, - "average_sentence2_length": 105.53515625, - "max_sentence2_length": 217, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72269 - }, - "eng_Latn-tbo_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.03125, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 147.125, - "max_sentence2_length": 495, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66856 - }, - "tbo_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 147.125, - "max_sentence1_length": 495, - "min_sentence2_length": 31, - "average_sentence2_length": 114.03125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66856 - }, - "eng_Latn-tbz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.125, - "max_sentence1_length": 227, - "min_sentence2_length": 30, - "average_sentence2_length": 107.83203125, - "max_sentence2_length": 275, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 55797 - }, - "tbz_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 107.83203125, - "max_sentence1_length": 275, - "min_sentence2_length": 24, - "average_sentence2_length": 110.125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 55797 - }, - "eng_Latn-tca_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.3046875, - "max_sentence1_length": 227, - "min_sentence2_length": 40, - "average_sentence2_length": 274.484375, - "max_sentence2_length": 1003, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99018 - }, - "tca_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 274.484375, - "max_sentence1_length": 1003, - "min_sentence2_length": 24, - "average_sentence2_length": 112.3046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99018 - }, - "eng_Latn-tcs_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.69921875, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 175.640625, - "max_sentence2_length": 559, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73303 - }, - "tcs_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 175.640625, - "max_sentence1_length": 559, - "min_sentence2_length": 24, - "average_sentence2_length": 110.69921875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73303 - }, - "eng_Latn-tcz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 30, - "average_sentence2_length": 141.09375, - "max_sentence2_length": 363, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64893 - }, - "tcz_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 141.09375, - "max_sentence1_length": 363, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64893 - }, - "eng_Latn-tdt_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 115.015625, - "max_sentence1_length": 827, - "min_sentence2_length": 33, - "average_sentence2_length": 168.8203125, - "max_sentence2_length": 617, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72662 - }, - "tdt_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 168.8203125, - "max_sentence1_length": 617, - "min_sentence2_length": 31, - "average_sentence2_length": 115.015625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72662 - }, - "eng_Latn-tee_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.37890625, - "max_sentence1_length": 827, - "min_sentence2_length": 46, - "average_sentence2_length": 231.21484375, - "max_sentence2_length": 533, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88472 - }, - "tee_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 231.21484375, - "max_sentence1_length": 533, - "min_sentence2_length": 24, - "average_sentence2_length": 114.37890625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88472 - }, - "eng_Latn-tel_Telu": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.12109375, - "max_sentence1_length": 227, - "min_sentence2_length": 24, - "average_sentence2_length": 120.203125, - "max_sentence2_length": 369, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58963 - }, - "tel_Telu-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 120.203125, - "max_sentence1_length": 369, - "min_sentence2_length": 24, - "average_sentence2_length": 110.12109375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58963 - }, - "eng_Latn-ter_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 54, - "average_sentence2_length": 202.0625, - "max_sentence2_length": 599, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80501 - }, - "ter_Latn-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 202.0625, - "max_sentence1_length": 599, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80501 - }, - "eng_Latn-tet_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 113.78125, - "max_sentence1_length": 251, - "min_sentence2_length": 36, - "average_sentence2_length": 232.72265625, - "max_sentence2_length": 1185, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88705 - }, - "tet_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 232.72265625, - "max_sentence1_length": 1185, - "min_sentence2_length": 31, - "average_sentence2_length": 113.78125, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 88705 - }, - "eng_Latn-tew_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.140625, - "max_sentence1_length": 239, - "min_sentence2_length": 39, - "average_sentence2_length": 210.421875, - "max_sentence2_length": 585, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82320 - }, - "tew_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 210.421875, - "max_sentence1_length": 585, - "min_sentence2_length": 24, - "average_sentence2_length": 111.140625, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82320 - }, - "eng_Latn-tfr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.41796875, - "max_sentence1_length": 235, - "min_sentence2_length": 32, - "average_sentence2_length": 224.50390625, - "max_sentence2_length": 717, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85996 - }, - "tfr_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 224.50390625, - "max_sentence1_length": 717, - "min_sentence2_length": 24, - "average_sentence2_length": 111.41796875, - "max_sentence2_length": 235, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85996 - }, - "eng_Latn-tgk_Cyrl": { - "min_sentence1_length": 32, - "average_sentence1_length": 104.37109375, - "max_sentence1_length": 245, - "min_sentence2_length": 31, - "average_sentence2_length": 121.40234375, - "max_sentence2_length": 360, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57798 - }, - "tgk_Cyrl-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 121.40234375, - "max_sentence1_length": 360, - "min_sentence2_length": 32, - "average_sentence2_length": 104.37109375, - "max_sentence2_length": 245, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57798 - }, - "eng_Latn-tgl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 29, - "average_sentence2_length": 145.71875, - "max_sentence2_length": 357, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66077 - }, - "tgl_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 145.71875, - "max_sentence1_length": 357, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66077 - }, - "eng_Latn-tgo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.2109375, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 177.0, - "max_sentence2_length": 472, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73526 - }, - "tgo_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 177.0, - "max_sentence1_length": 472, - "min_sentence2_length": 24, - "average_sentence2_length": 110.2109375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73526 - }, - "eng_Latn-tgp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.50390625, - "max_sentence1_length": 227, - "min_sentence2_length": 33, - "average_sentence2_length": 157.234375, - "max_sentence2_length": 609, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68797 - }, - "tgp_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 157.234375, - "max_sentence1_length": 609, - "min_sentence2_length": 24, - "average_sentence2_length": 111.50390625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 68797 - }, - "eng_Latn-tha_Thai": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 16, - "average_sentence2_length": 123.1015625, - "max_sentence2_length": 312, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60554 - }, - "tha_Thai-eng_Latn": { - "min_sentence1_length": 16, - "average_sentence1_length": 123.1015625, - "max_sentence1_length": 312, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60554 - }, - "eng_Latn-tif_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.62890625, - "max_sentence1_length": 230, - "min_sentence2_length": 42, - "average_sentence2_length": 404.69921875, - "max_sentence2_length": 1636, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 132436 - }, - "tif_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 404.69921875, - "max_sentence1_length": 1636, - "min_sentence2_length": 24, - "average_sentence2_length": 112.62890625, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 132436 - }, - "eng_Latn-tim_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.66796875, - "max_sentence1_length": 227, - "min_sentence2_length": 42, - "average_sentence2_length": 202.26953125, - "max_sentence2_length": 868, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79600 - }, - "tim_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 202.26953125, - "max_sentence1_length": 868, - "min_sentence2_length": 24, - "average_sentence2_length": 108.66796875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79600 - }, - "eng_Latn-tiw_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 103.73023255813953, - "max_sentence1_length": 257, - "min_sentence2_length": 95, - "average_sentence2_length": 703.9720930232559, - "max_sentence2_length": 2787, - "num_samples": 215, - "num_samples_sentence2": 215, - "number_of_characters": 173656 - }, - "tiw_Latn-eng_Latn": { - "min_sentence1_length": 95, - "average_sentence1_length": 703.9720930232559, - "max_sentence1_length": 2787, - "min_sentence2_length": 40, - "average_sentence2_length": 103.73023255813953, - "max_sentence2_length": 257, - "num_samples": 215, - "num_samples_sentence2": 215, - "number_of_characters": 173656 - }, - "eng_Latn-tiy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.0859375, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 173.453125, - "max_sentence2_length": 679, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73610 - }, - "tiy_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 173.453125, - "max_sentence1_length": 679, - "min_sentence2_length": 24, - "average_sentence2_length": 114.0859375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73610 - }, - "eng_Latn-tke_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 107.67391304347827, - "max_sentence1_length": 245, - "min_sentence2_length": 31, - "average_sentence2_length": 110.21739130434783, - "max_sentence2_length": 212, - "num_samples": 92, - "num_samples_sentence2": 92, - "number_of_characters": 20046 - }, - "tke_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 110.21739130434783, - "max_sentence1_length": 212, - "min_sentence2_length": 37, - "average_sentence2_length": 107.67391304347827, - "max_sentence2_length": 245, - "num_samples": 92, - "num_samples_sentence2": 92, - "number_of_characters": 20046 - }, - "eng_Latn-tku_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.55078125, - "max_sentence1_length": 827, - "min_sentence2_length": 28, - "average_sentence2_length": 174.171875, - "max_sentence2_length": 415, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74169 - }, - "tku_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 174.171875, - "max_sentence1_length": 415, - "min_sentence2_length": 24, - "average_sentence2_length": 115.55078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74169 - }, - "eng_Latn-tlf_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.07421875, - "max_sentence1_length": 230, - "min_sentence2_length": 42, - "average_sentence2_length": 383.2734375, - "max_sentence2_length": 1527, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 126553 - }, - "tlf_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 383.2734375, - "max_sentence1_length": 1527, - "min_sentence2_length": 24, - "average_sentence2_length": 111.07421875, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 126553 - }, - "eng_Latn-tmd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.16015625, - "max_sentence1_length": 227, - "min_sentence2_length": 27, - "average_sentence2_length": 218.640625, - "max_sentence2_length": 702, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83917 - }, - "tmd_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 218.640625, - "max_sentence1_length": 702, - "min_sentence2_length": 24, - "average_sentence2_length": 109.16015625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 83917 - }, - "eng_Latn-tna_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 109.80859375, - "max_sentence1_length": 227, - "min_sentence2_length": 37, - "average_sentence2_length": 208.0234375, - "max_sentence2_length": 741, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81365 - }, - "tna_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 208.0234375, - "max_sentence1_length": 741, - "min_sentence2_length": 21, - "average_sentence2_length": 109.80859375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81365 - }, - "eng_Latn-tnc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.6015625, - "max_sentence1_length": 263, - "min_sentence2_length": 46, - "average_sentence2_length": 302.96484375, - "max_sentence2_length": 899, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106641 - }, - "tnc_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 302.96484375, - "max_sentence1_length": 899, - "min_sentence2_length": 24, - "average_sentence2_length": 113.6015625, - "max_sentence2_length": 263, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 106641 - }, - "eng_Latn-tnk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.58203125, - "max_sentence1_length": 227, - "min_sentence2_length": 35, - "average_sentence2_length": 195.19140625, - "max_sentence2_length": 547, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78022 - }, - "tnk_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 195.19140625, - "max_sentence1_length": 547, - "min_sentence2_length": 24, - "average_sentence2_length": 109.58203125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78022 - }, - "eng_Latn-tnn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.359375, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 203.25390625, - "max_sentence2_length": 1039, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81309 - }, - "tnn_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 203.25390625, - "max_sentence1_length": 1039, - "min_sentence2_length": 24, - "average_sentence2_length": 114.359375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81309 - }, - "eng_Latn-tnp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.2734375, - "max_sentence1_length": 827, - "min_sentence2_length": 33, - "average_sentence2_length": 214.32421875, - "max_sentence2_length": 588, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84121 - }, - "tnp_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 214.32421875, - "max_sentence1_length": 588, - "min_sentence2_length": 24, - "average_sentence2_length": 114.2734375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84121 - }, - "eng_Latn-toc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.421875, - "max_sentence1_length": 227, - "min_sentence2_length": 54, - "average_sentence2_length": 287.953125, - "max_sentence2_length": 808, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101728 - }, - "toc_Latn-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 287.953125, - "max_sentence1_length": 808, - "min_sentence2_length": 24, - "average_sentence2_length": 109.421875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101728 - }, - "eng_Latn-tod_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 123.76171875, - "max_sentence1_length": 306, - "min_sentence2_length": 17, - "average_sentence2_length": 124.19140625, - "max_sentence2_length": 293, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63476 - }, - "tod_Latn-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 124.19140625, - "max_sentence1_length": 293, - "min_sentence2_length": 24, - "average_sentence2_length": 123.76171875, - "max_sentence2_length": 306, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63476 - }, - "eng_Latn-tof_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.203125, - "max_sentence1_length": 827, - "min_sentence2_length": 46, - "average_sentence2_length": 199.18359375, - "max_sentence2_length": 546, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80483 - }, - "tof_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 199.18359375, - "max_sentence1_length": 546, - "min_sentence2_length": 24, - "average_sentence2_length": 115.203125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80483 - }, - "eng_Latn-toj_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 108.55078125, - "max_sentence1_length": 227, - "min_sentence2_length": 32, - "average_sentence2_length": 245.6171875, - "max_sentence2_length": 816, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90667 - }, - "toj_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 245.6171875, - "max_sentence1_length": 816, - "min_sentence2_length": 31, - "average_sentence2_length": 108.55078125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90667 - }, - "eng_Latn-ton_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4375, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 134.453125, - "max_sentence2_length": 302, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63460 - }, - "ton_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 134.453125, - "max_sentence1_length": 302, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63460 - }, - "eng_Latn-too_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.55078125, - "max_sentence1_length": 827, - "min_sentence2_length": 33, - "average_sentence2_length": 196.1015625, - "max_sentence2_length": 458, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79783 - }, - "too_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 196.1015625, - "max_sentence1_length": 458, - "min_sentence2_length": 24, - "average_sentence2_length": 115.55078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79783 - }, - "eng_Latn-top_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.54296875, - "max_sentence1_length": 827, - "min_sentence2_length": 41, - "average_sentence2_length": 182.8671875, - "max_sentence2_length": 628, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75881 - }, - "top_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 182.8671875, - "max_sentence1_length": 628, - "min_sentence2_length": 24, - "average_sentence2_length": 113.54296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75881 - }, - "eng_Latn-tos_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.44140625, - "max_sentence1_length": 827, - "min_sentence2_length": 60, - "average_sentence2_length": 288.21484375, - "max_sentence2_length": 845, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102824 - }, - "tos_Latn-eng_Latn": { - "min_sentence1_length": 60, - "average_sentence1_length": 288.21484375, - "max_sentence1_length": 845, - "min_sentence2_length": 24, - "average_sentence2_length": 113.44140625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102824 - }, - "eng_Latn-tpa_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 110.51063829787235, - "max_sentence1_length": 257, - "min_sentence2_length": 47, - "average_sentence2_length": 150.54609929078015, - "max_sentence2_length": 540, - "num_samples": 141, - "num_samples_sentence2": 141, - "number_of_characters": 36809 - }, - "tpa_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 150.54609929078015, - "max_sentence1_length": 540, - "min_sentence2_length": 39, - "average_sentence2_length": 110.51063829787235, - "max_sentence2_length": 257, - "num_samples": 141, - "num_samples_sentence2": 141, - "number_of_characters": 36809 - }, - "eng_Latn-tpi_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 138.0859375, - "max_sentence1_length": 341, - "min_sentence2_length": 46, - "average_sentence2_length": 244.546875, - "max_sentence2_length": 2947, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97954 - }, - "tpi_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 244.546875, - "max_sentence1_length": 2947, - "min_sentence2_length": 1, - "average_sentence2_length": 138.0859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97954 - }, - "eng_Latn-tpt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 46, - "average_sentence2_length": 163.7890625, - "max_sentence2_length": 441, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71465 - }, - "tpt_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 163.7890625, - "max_sentence1_length": 441, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71465 - }, - "eng_Latn-tpz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.09765625, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 222.984375, - "max_sentence2_length": 780, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85269 - }, - "tpz_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 222.984375, - "max_sentence1_length": 780, - "min_sentence2_length": 24, - "average_sentence2_length": 110.09765625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85269 - }, - "eng_Latn-trc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.23046875, - "max_sentence1_length": 376, - "min_sentence2_length": 46, - "average_sentence2_length": 270.44921875, - "max_sentence2_length": 859, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97710 - }, - "trc_Latn-eng_Latn": { - "min_sentence1_length": 46, - "average_sentence1_length": 270.44921875, - "max_sentence1_length": 859, - "min_sentence2_length": 24, - "average_sentence2_length": 111.23046875, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 97710 - }, - "eng_Latn-tsw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.4765625, - "max_sentence1_length": 227, - "min_sentence2_length": 27, - "average_sentence2_length": 184.984375, - "max_sentence2_length": 1078, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75638 - }, - "tsw_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 184.984375, - "max_sentence1_length": 1078, - "min_sentence2_length": 24, - "average_sentence2_length": 110.4765625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75638 - }, - "eng_Latn-ttc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 30, - "average_sentence2_length": 172.53515625, - "max_sentence2_length": 410, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72942 - }, - "ttc_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 172.53515625, - "max_sentence1_length": 410, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72942 - }, - "eng_Latn-tte_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.7734375, - "max_sentence1_length": 239, - "min_sentence2_length": 25, - "average_sentence2_length": 233.83203125, - "max_sentence2_length": 973, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87707 - }, - "tte_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 233.83203125, - "max_sentence1_length": 973, - "min_sentence2_length": 24, - "average_sentence2_length": 108.7734375, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87707 - }, - "eng_Latn-tuc_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 146.65625, - "max_sentence1_length": 341, - "min_sentence2_length": 32, - "average_sentence2_length": 221.24609375, - "max_sentence2_length": 533, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94183 - }, - "tuc_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 221.24609375, - "max_sentence1_length": 533, - "min_sentence2_length": 1, - "average_sentence2_length": 146.65625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 94183 - }, - "eng_Latn-tue_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.07421875, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 179.30859375, - "max_sentence2_length": 980, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74850 - }, - "tue_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 179.30859375, - "max_sentence1_length": 980, - "min_sentence2_length": 24, - "average_sentence2_length": 113.07421875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74850 - }, - "eng_Latn-tuf_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.47265625, - "max_sentence1_length": 827, - "min_sentence2_length": 23, - "average_sentence2_length": 208.703125, - "max_sentence2_length": 628, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82733 - }, - "tuf_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 208.703125, - "max_sentence1_length": 628, - "min_sentence2_length": 31, - "average_sentence2_length": 114.47265625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82733 - }, - "eng_Latn-tuo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.75390625, - "max_sentence1_length": 230, - "min_sentence2_length": 29, - "average_sentence2_length": 221.515625, - "max_sentence2_length": 945, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85317 - }, - "tuo_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 221.515625, - "max_sentence1_length": 945, - "min_sentence2_length": 24, - "average_sentence2_length": 111.75390625, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85317 - }, - "eng_Latn-tur_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.5703125, - "max_sentence1_length": 225, - "min_sentence2_length": 18, - "average_sentence2_length": 156.30859375, - "max_sentence2_length": 448, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67809 - }, - "tur_Latn-eng_Latn": { - "min_sentence1_length": 18, - "average_sentence1_length": 156.30859375, - "max_sentence1_length": 448, - "min_sentence2_length": 24, - "average_sentence2_length": 108.5703125, - "max_sentence2_length": 225, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67809 - }, - "eng_Latn-tvk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.1484375, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 193.80859375, - "max_sentence2_length": 500, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79093 - }, - "tvk_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 193.80859375, - "max_sentence1_length": 500, - "min_sentence2_length": 24, - "average_sentence2_length": 115.1484375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79093 - }, - "eng_Latn-twi_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 21, - "average_sentence2_length": 108.4921875, - "max_sentence2_length": 240, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56547 - }, - "twi_Latn-eng_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 108.4921875, - "max_sentence1_length": 240, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 56547 - }, - "eng_Latn-txq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.76953125, - "max_sentence1_length": 251, - "min_sentence2_length": 37, - "average_sentence2_length": 293.578125, - "max_sentence2_length": 1375, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104537 - }, - "txq_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 293.578125, - "max_sentence1_length": 1375, - "min_sentence2_length": 24, - "average_sentence2_length": 114.76953125, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104537 - }, - "eng_Latn-txu_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.9375, - "max_sentence1_length": 256, - "min_sentence2_length": 33, - "average_sentence2_length": 362.140625, - "max_sentence2_length": 1373, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 121364 - }, - "txu_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 362.140625, - "max_sentence1_length": 1373, - "min_sentence2_length": 31, - "average_sentence2_length": 111.9375, - "max_sentence2_length": 256, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 121364 - }, - "eng_Latn-tzj_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 146.40625, - "max_sentence1_length": 341, - "min_sentence2_length": 58, - "average_sentence2_length": 246.171875, - "max_sentence2_length": 687, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100500 - }, - "tzj_Latn-eng_Latn": { - "min_sentence1_length": 58, - "average_sentence1_length": 246.171875, - "max_sentence1_length": 687, - "min_sentence2_length": 35, - "average_sentence2_length": 146.40625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 100500 - }, - "eng_Latn-tzo_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 148.25390625, - "max_sentence1_length": 341, - "min_sentence2_length": 54, - "average_sentence2_length": 223.5625, - "max_sentence2_length": 965, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 95185 - }, - "tzo_Latn-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 223.5625, - "max_sentence1_length": 965, - "min_sentence2_length": 35, - "average_sentence2_length": 148.25390625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 95185 - }, - "eng_Latn-ubr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.6875, - "max_sentence1_length": 227, - "min_sentence2_length": 27, - "average_sentence2_length": 179.2109375, - "max_sentence2_length": 462, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73958 - }, - "ubr_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 179.2109375, - "max_sentence1_length": 462, - "min_sentence2_length": 24, - "average_sentence2_length": 109.6875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73958 - }, - "eng_Latn-ubu_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 146.6953125, - "max_sentence1_length": 341, - "min_sentence2_length": 47, - "average_sentence2_length": 266.8984375, - "max_sentence2_length": 858, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 105880 - }, - "ubu_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 266.8984375, - "max_sentence1_length": 858, - "min_sentence2_length": 1, - "average_sentence2_length": 146.6953125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 105880 - }, - "eng_Latn-udu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 24, - "average_sentence2_length": 174.4296875, - "max_sentence2_length": 525, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74189 - }, - "udu_Latn-eng_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 174.4296875, - "max_sentence1_length": 525, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74189 - }, - "eng_Latn-uig_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.046875, - "max_sentence1_length": 341, - "min_sentence2_length": 49, - "average_sentence2_length": 158.07421875, - "max_sentence2_length": 374, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78111 - }, - "uig_Latn-eng_Latn": { - "min_sentence1_length": 49, - "average_sentence1_length": 158.07421875, - "max_sentence1_length": 374, - "min_sentence2_length": 56, - "average_sentence2_length": 147.046875, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78111 - }, - "eng_Latn-ukr_Cyrl": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.78125, - "max_sentence1_length": 227, - "min_sentence2_length": 17, - "average_sentence2_length": 87.7109375, - "max_sentence2_length": 234, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 50558 - }, - "ukr_Cyrl-eng_Latn": { - "min_sentence1_length": 17, - "average_sentence1_length": 87.7109375, - "max_sentence1_length": 234, - "min_sentence2_length": 24, - "average_sentence2_length": 109.78125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 50558 - }, - "eng_Latn-uli_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 113.765625, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 156.32421875, - "max_sentence2_length": 429, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69143 - }, - "uli_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 156.32421875, - "max_sentence1_length": 429, - "min_sentence2_length": 25, - "average_sentence2_length": 113.765625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 69143 - }, - "eng_Latn-ulk_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 110.41361256544502, - "max_sentence1_length": 244, - "min_sentence2_length": 41, - "average_sentence2_length": 147.36649214659687, - "max_sentence2_length": 858, - "num_samples": 191, - "num_samples_sentence2": 191, - "number_of_characters": 49236 - }, - "ulk_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 147.36649214659687, - "max_sentence1_length": 858, - "min_sentence2_length": 39, - "average_sentence2_length": 110.41361256544502, - "max_sentence2_length": 244, - "num_samples": 191, - "num_samples_sentence2": 191, - "number_of_characters": 49236 - }, - "eng_Latn-upv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.0546875, - "max_sentence1_length": 376, - "min_sentence2_length": 36, - "average_sentence2_length": 181.41796875, - "max_sentence2_length": 584, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75129 - }, - "upv_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 181.41796875, - "max_sentence1_length": 584, - "min_sentence2_length": 24, - "average_sentence2_length": 112.0546875, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75129 - }, - "eng_Latn-ura_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.66015625, - "max_sentence1_length": 227, - "min_sentence2_length": 31, - "average_sentence2_length": 231.44140625, - "max_sentence2_length": 990, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87834 - }, - "ura_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 231.44140625, - "max_sentence1_length": 990, - "min_sentence2_length": 24, - "average_sentence2_length": 111.66015625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 87834 - }, - "eng_Latn-urb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.109375, - "max_sentence1_length": 227, - "min_sentence2_length": 31, - "average_sentence2_length": 299.421875, - "max_sentence2_length": 1080, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104840 - }, - "urb_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 299.421875, - "max_sentence1_length": 1080, - "min_sentence2_length": 24, - "average_sentence2_length": 110.109375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104840 - }, - "eng_Latn-urd_Arab": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 35, - "average_sentence2_length": 156.125, - "max_sentence2_length": 420, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77743 - }, - "urd_Arab-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 156.125, - "max_sentence1_length": 420, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77743 - }, - "eng_Latn-uri_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 112.38888888888889, - "max_sentence1_length": 245, - "min_sentence2_length": 60, - "average_sentence2_length": 212.01190476190476, - "max_sentence2_length": 618, - "num_samples": 252, - "num_samples_sentence2": 252, - "number_of_characters": 81749 - }, - "uri_Latn-eng_Latn": { - "min_sentence1_length": 60, - "average_sentence1_length": 212.01190476190476, - "max_sentence1_length": 618, - "min_sentence2_length": 38, - "average_sentence2_length": 112.38888888888889, - "max_sentence2_length": 245, - "num_samples": 252, - "num_samples_sentence2": 252, - "number_of_characters": 81749 - }, - "eng_Latn-urt_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.5703125, - "max_sentence1_length": 827, - "min_sentence2_length": 40, - "average_sentence2_length": 203.98046875, - "max_sentence2_length": 882, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81805 - }, - "urt_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 203.98046875, - "max_sentence1_length": 882, - "min_sentence2_length": 24, - "average_sentence2_length": 115.5703125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81805 - }, - "eng_Latn-urw_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 112.38888888888889, - "max_sentence1_length": 245, - "min_sentence2_length": 47, - "average_sentence2_length": 180.75555555555556, - "max_sentence2_length": 597, - "num_samples": 90, - "num_samples_sentence2": 90, - "number_of_characters": 26383 - }, - "urw_Latn-eng_Latn": { - "min_sentence1_length": 47, - "average_sentence1_length": 180.75555555555556, - "max_sentence1_length": 597, - "min_sentence2_length": 37, - "average_sentence2_length": 112.38888888888889, - "max_sentence2_length": 245, - "num_samples": 90, - "num_samples_sentence2": 90, - "number_of_characters": 26383 - }, - "eng_Latn-usa_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.59375, - "max_sentence1_length": 251, - "min_sentence2_length": 8, - "average_sentence2_length": 200.51171875, - "max_sentence2_length": 896, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79899 - }, - "usa_Latn-eng_Latn": { - "min_sentence1_length": 8, - "average_sentence1_length": 200.51171875, - "max_sentence1_length": 896, - "min_sentence2_length": 24, - "average_sentence2_length": 111.59375, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79899 - }, - "eng_Latn-usp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.296875, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 190.515625, - "max_sentence2_length": 480, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78288 - }, - "usp_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 190.515625, - "max_sentence1_length": 480, - "min_sentence2_length": 24, - "average_sentence2_length": 115.296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78288 - }, - "eng_Latn-uvh_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 109.5390625, - "max_sentence1_length": 271, - "min_sentence2_length": 54, - "average_sentence2_length": 372.28515625, - "max_sentence2_length": 1232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 123347 - }, - "uvh_Latn-eng_Latn": { - "min_sentence1_length": 54, - "average_sentence1_length": 372.28515625, - "max_sentence1_length": 1232, - "min_sentence2_length": 31, - "average_sentence2_length": 109.5390625, - "max_sentence2_length": 271, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 123347 - }, - "eng_Latn-uvl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.4609375, - "max_sentence1_length": 227, - "min_sentence2_length": 45, - "average_sentence2_length": 192.77734375, - "max_sentence2_length": 702, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77885 - }, - "uvl_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 192.77734375, - "max_sentence1_length": 702, - "min_sentence2_length": 24, - "average_sentence2_length": 111.4609375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77885 - }, - "eng_Latn-vid_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 38, - "average_sentence2_length": 138.671875, - "max_sentence2_length": 305, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64273 - }, - "vid_Latn-eng_Latn": { - "min_sentence1_length": 38, - "average_sentence1_length": 138.671875, - "max_sentence1_length": 305, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64273 - }, - "eng_Latn-vie_Latn": { - "min_sentence1_length": 56, - "average_sentence1_length": 147.55859375, - "max_sentence1_length": 341, - "min_sentence2_length": 32, - "average_sentence2_length": 127.79296875, - "max_sentence2_length": 355, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70490 - }, - "vie_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 127.79296875, - "max_sentence1_length": 355, - "min_sentence2_length": 56, - "average_sentence2_length": 147.55859375, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70490 - }, - "eng_Latn-viv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.82421875, - "max_sentence1_length": 227, - "min_sentence2_length": 50, - "average_sentence2_length": 319.16015625, - "max_sentence2_length": 1077, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 110076 - }, - "viv_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 319.16015625, - "max_sentence1_length": 1077, - "min_sentence2_length": 24, - "average_sentence2_length": 110.82421875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 110076 - }, - "eng_Latn-vmy_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 42, - "average_sentence2_length": 149.4921875, - "max_sentence2_length": 337, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67043 - }, - "vmy_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 149.4921875, - "max_sentence1_length": 337, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67043 - }, - "eng_Latn-waj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.875, - "max_sentence1_length": 827, - "min_sentence2_length": 43, - "average_sentence2_length": 248.9609375, - "max_sentence2_length": 1148, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92630 - }, - "waj_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 248.9609375, - "max_sentence1_length": 1148, - "min_sentence2_length": 24, - "average_sentence2_length": 112.875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92630 - }, - "eng_Latn-wal_Ethi": { - "min_sentence1_length": 24, - "average_sentence1_length": 106.609375, - "max_sentence1_length": 207, - "min_sentence2_length": 28, - "average_sentence2_length": 146.234375, - "max_sentence2_length": 451, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64728 - }, - "wal_Ethi-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 146.234375, - "max_sentence1_length": 451, - "min_sentence2_length": 24, - "average_sentence2_length": 106.609375, - "max_sentence2_length": 207, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64728 - }, - "eng_Latn-wap_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.37890625, - "max_sentence1_length": 827, - "min_sentence2_length": 41, - "average_sentence2_length": 218.2421875, - "max_sentence2_length": 566, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84895 - }, - "wap_Latn-eng_Latn": { - "min_sentence1_length": 41, - "average_sentence1_length": 218.2421875, - "max_sentence1_length": 566, - "min_sentence2_length": 24, - "average_sentence2_length": 113.37890625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84895 - }, - "eng_Latn-wat_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 114.1875, - "max_sentence1_length": 257, - "min_sentence2_length": 50, - "average_sentence2_length": 172.70703125, - "max_sentence2_length": 748, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73445 - }, - "wat_Latn-eng_Latn": { - "min_sentence1_length": 50, - "average_sentence1_length": 172.70703125, - "max_sentence1_length": 748, - "min_sentence2_length": 39, - "average_sentence2_length": 114.1875, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73445 - }, - "eng_Latn-wbi_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.48046875, - "max_sentence1_length": 227, - "min_sentence2_length": 4, - "average_sentence2_length": 132.11328125, - "max_sentence2_length": 359, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62104 - }, - "wbi_Latn-eng_Latn": { - "min_sentence1_length": 4, - "average_sentence1_length": 132.11328125, - "max_sentence1_length": 359, - "min_sentence2_length": 24, - "average_sentence2_length": 110.48046875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62104 - }, - "eng_Latn-wbp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.17578125, - "max_sentence1_length": 230, - "min_sentence2_length": 99, - "average_sentence2_length": 460.8671875, - "max_sentence2_length": 1904, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 146443 - }, - "wbp_Latn-eng_Latn": { - "min_sentence1_length": 99, - "average_sentence1_length": 460.8671875, - "max_sentence1_length": 1904, - "min_sentence2_length": 24, - "average_sentence2_length": 111.17578125, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 146443 - }, - "eng_Latn-wed_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 111.1953125, - "max_sentence1_length": 215, - "min_sentence2_length": 30, - "average_sentence2_length": 113.93359375, - "max_sentence2_length": 298, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57633 - }, - "wed_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 113.93359375, - "max_sentence1_length": 298, - "min_sentence2_length": 28, - "average_sentence2_length": 111.1953125, - "max_sentence2_length": 215, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57633 - }, - "eng_Latn-wer_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.5859375, - "max_sentence1_length": 827, - "min_sentence2_length": 19, - "average_sentence2_length": 165.30859375, - "max_sentence2_length": 590, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71653 - }, - "wer_Latn-eng_Latn": { - "min_sentence1_length": 19, - "average_sentence1_length": 165.30859375, - "max_sentence1_length": 590, - "min_sentence2_length": 24, - "average_sentence2_length": 114.5859375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71653 - }, - "eng_Latn-wim_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.02734375, - "max_sentence1_length": 232, - "min_sentence2_length": 64, - "average_sentence2_length": 350.8046875, - "max_sentence2_length": 1563, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 117973 - }, - "wim_Latn-eng_Latn": { - "min_sentence1_length": 64, - "average_sentence1_length": 350.8046875, - "max_sentence1_length": 1563, - "min_sentence2_length": 24, - "average_sentence2_length": 110.02734375, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 117973 - }, - "eng_Latn-wiu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.5078125, - "max_sentence1_length": 827, - "min_sentence2_length": 55, - "average_sentence2_length": 200.34765625, - "max_sentence2_length": 590, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80347 - }, - "wiu_Latn-eng_Latn": { - "min_sentence1_length": 55, - "average_sentence1_length": 200.34765625, - "max_sentence1_length": 590, - "min_sentence2_length": 24, - "average_sentence2_length": 113.5078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80347 - }, - "eng_Latn-wiv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.328125, - "max_sentence1_length": 827, - "min_sentence2_length": 44, - "average_sentence2_length": 192.15625, - "max_sentence2_length": 518, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78716 - }, - "wiv_Latn-eng_Latn": { - "min_sentence1_length": 44, - "average_sentence1_length": 192.15625, - "max_sentence1_length": 518, - "min_sentence2_length": 24, - "average_sentence2_length": 115.328125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 78716 - }, - "eng_Latn-wmt_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 116.83203125, - "max_sentence1_length": 288, - "min_sentence2_length": 60, - "average_sentence2_length": 793.13671875, - "max_sentence2_length": 4949, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 232952 - }, - "wmt_Latn-eng_Latn": { - "min_sentence1_length": 60, - "average_sentence1_length": 793.13671875, - "max_sentence1_length": 4949, - "min_sentence2_length": 33, - "average_sentence2_length": 116.83203125, - "max_sentence2_length": 288, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 232952 - }, - "eng_Latn-wmw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 33, - "average_sentence2_length": 125.3671875, - "max_sentence2_length": 276, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60867 - }, - "wmw_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 125.3671875, - "max_sentence1_length": 276, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60867 - }, - "eng_Latn-wnc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.4375, - "max_sentence1_length": 251, - "min_sentence2_length": 58, - "average_sentence2_length": 314.421875, - "max_sentence2_length": 1344, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 108764 - }, - "wnc_Latn-eng_Latn": { - "min_sentence1_length": 58, - "average_sentence1_length": 314.421875, - "max_sentence1_length": 1344, - "min_sentence2_length": 24, - "average_sentence2_length": 110.4375, - "max_sentence2_length": 251, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 108764 - }, - "eng_Latn-wnu_Latn": { - "min_sentence1_length": 21, - "average_sentence1_length": 110.328125, - "max_sentence1_length": 271, - "min_sentence2_length": 32, - "average_sentence2_length": 180.5078125, - "max_sentence2_length": 904, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74454 - }, - "wnu_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 180.5078125, - "max_sentence1_length": 904, - "min_sentence2_length": 21, - "average_sentence2_length": 110.328125, - "max_sentence2_length": 271, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74454 - }, - "eng_Latn-wol_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.33203125, - "max_sentence1_length": 341, - "min_sentence2_length": 31, - "average_sentence2_length": 131.58984375, - "max_sentence2_length": 322, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70892 - }, - "wol_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 131.58984375, - "max_sentence1_length": 322, - "min_sentence2_length": 1, - "average_sentence2_length": 145.33203125, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70892 - }, - "eng_Latn-wos_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.16796875, - "max_sentence1_length": 227, - "min_sentence2_length": 23, - "average_sentence2_length": 256.5078125, - "max_sentence2_length": 878, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93613 - }, - "wos_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 256.5078125, - "max_sentence1_length": 878, - "min_sentence2_length": 24, - "average_sentence2_length": 109.16796875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93613 - }, - "eng_Latn-wrk_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 109.18359375, - "max_sentence1_length": 256, - "min_sentence2_length": 85, - "average_sentence2_length": 712.43359375, - "max_sentence2_length": 2886, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 210334 - }, - "wrk_Latn-eng_Latn": { - "min_sentence1_length": 85, - "average_sentence1_length": 712.43359375, - "max_sentence1_length": 2886, - "min_sentence2_length": 31, - "average_sentence2_length": 109.18359375, - "max_sentence2_length": 256, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 210334 - }, - "eng_Latn-wro_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 110.109375, - "max_sentence1_length": 257, - "min_sentence2_length": 28, - "average_sentence2_length": 113.57421875, - "max_sentence2_length": 323, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57263 - }, - "wro_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 113.57421875, - "max_sentence1_length": 323, - "min_sentence2_length": 37, - "average_sentence2_length": 110.109375, - "max_sentence2_length": 257, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 57263 - }, - "eng_Latn-wrs_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.2578125, - "max_sentence1_length": 827, - "min_sentence2_length": 43, - "average_sentence2_length": 216.0, - "max_sentence2_length": 761, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84290 - }, - "wrs_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 216.0, - "max_sentence1_length": 761, - "min_sentence2_length": 24, - "average_sentence2_length": 113.2578125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84290 - }, - "eng_Latn-wsk_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.75, - "max_sentence1_length": 827, - "min_sentence2_length": 45, - "average_sentence2_length": 172.875, - "max_sentence2_length": 362, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73376 - }, - "wsk_Latn-eng_Latn": { - "min_sentence1_length": 45, - "average_sentence1_length": 172.875, - "max_sentence1_length": 362, - "min_sentence2_length": 24, - "average_sentence2_length": 113.75, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73376 - }, - "eng_Latn-wuv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4140625, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 131.328125, - "max_sentence2_length": 284, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62654 - }, - "wuv_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 131.328125, - "max_sentence1_length": 284, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4140625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 62654 - }, - "eng_Latn-xav_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.640625, - "max_sentence1_length": 227, - "min_sentence2_length": 65, - "average_sentence2_length": 432.13671875, - "max_sentence2_length": 1467, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 138695 - }, - "xav_Latn-eng_Latn": { - "min_sentence1_length": 65, - "average_sentence1_length": 432.13671875, - "max_sentence1_length": 1467, - "min_sentence2_length": 24, - "average_sentence2_length": 109.640625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 138695 - }, - "eng_Latn-xbi_Latn": { - "min_sentence1_length": 22, - "average_sentence1_length": 108.2265625, - "max_sentence1_length": 232, - "min_sentence2_length": 32, - "average_sentence2_length": 215.0703125, - "max_sentence2_length": 2517, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82764 - }, - "xbi_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 215.0703125, - "max_sentence1_length": 2517, - "min_sentence2_length": 22, - "average_sentence2_length": 108.2265625, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 82764 - }, - "eng_Latn-xed_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.484375, - "max_sentence1_length": 376, - "min_sentence2_length": 30, - "average_sentence2_length": 152.98046875, - "max_sentence2_length": 361, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67959 - }, - "xed_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 152.98046875, - "max_sentence1_length": 361, - "min_sentence2_length": 24, - "average_sentence2_length": 112.484375, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67959 - }, - "eng_Latn-xla_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.15234375, - "max_sentence1_length": 259, - "min_sentence2_length": 29, - "average_sentence2_length": 242.9375, - "max_sentence2_length": 775, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90647 - }, - "xla_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 242.9375, - "max_sentence1_length": 775, - "min_sentence2_length": 24, - "average_sentence2_length": 111.15234375, - "max_sentence2_length": 259, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90647 - }, - "eng_Latn-xnn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.05078125, - "max_sentence1_length": 239, - "min_sentence2_length": 29, - "average_sentence2_length": 239.5703125, - "max_sentence2_length": 944, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89247 - }, - "xnn_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 239.5703125, - "max_sentence1_length": 944, - "min_sentence2_length": 24, - "average_sentence2_length": 109.05078125, - "max_sentence2_length": 239, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89247 - }, - "eng_Latn-xon_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 112.515625, - "max_sentence1_length": 376, - "min_sentence2_length": 35, - "average_sentence2_length": 141.6640625, - "max_sentence2_length": 776, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65070 - }, - "xon_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 141.6640625, - "max_sentence1_length": 776, - "min_sentence2_length": 31, - "average_sentence2_length": 112.515625, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65070 - }, - "eng_Latn-xsi_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.8046875, - "max_sentence1_length": 827, - "min_sentence2_length": 39, - "average_sentence2_length": 245.1796875, - "max_sentence2_length": 735, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91644 - }, - "xsi_Latn-eng_Latn": { - "min_sentence1_length": 39, - "average_sentence1_length": 245.1796875, - "max_sentence1_length": 735, - "min_sentence2_length": 24, - "average_sentence2_length": 112.8046875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91644 - }, - "eng_Latn-xtd_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.34375, - "max_sentence1_length": 227, - "min_sentence2_length": 48, - "average_sentence2_length": 201.03125, - "max_sentence2_length": 707, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79968 - }, - "xtd_Latn-eng_Latn": { - "min_sentence1_length": 48, - "average_sentence1_length": 201.03125, - "max_sentence1_length": 707, - "min_sentence2_length": 24, - "average_sentence2_length": 111.34375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79968 - }, - "eng_Latn-xtm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.54296875, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 197.6171875, - "max_sentence2_length": 544, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79657 - }, - "xtm_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 197.6171875, - "max_sentence1_length": 544, - "min_sentence2_length": 24, - "average_sentence2_length": 113.54296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79657 - }, - "eng_Latn-yaa_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 114.7109375, - "max_sentence1_length": 227, - "min_sentence2_length": 27, - "average_sentence2_length": 283.4921875, - "max_sentence2_length": 1230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101940 - }, - "yaa_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 283.4921875, - "max_sentence1_length": 1230, - "min_sentence2_length": 31, - "average_sentence2_length": 114.7109375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 101940 - }, - "eng_Latn-yad_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.2421875, - "max_sentence1_length": 233, - "min_sentence2_length": 31, - "average_sentence2_length": 249.9453125, - "max_sentence2_length": 650, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93232 - }, - "yad_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 249.9453125, - "max_sentence1_length": 650, - "min_sentence2_length": 24, - "average_sentence2_length": 114.2421875, - "max_sentence2_length": 233, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 93232 - }, - "eng_Latn-yal_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 28, - "average_sentence2_length": 122.8515625, - "max_sentence2_length": 308, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60223 - }, - "yal_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 122.8515625, - "max_sentence1_length": 308, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60223 - }, - "eng_Latn-yap_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.4921875, - "max_sentence1_length": 827, - "min_sentence2_length": 28, - "average_sentence2_length": 176.61328125, - "max_sentence2_length": 491, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74267 - }, - "yap_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 176.61328125, - "max_sentence1_length": 491, - "min_sentence2_length": 24, - "average_sentence2_length": 113.4921875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74267 - }, - "eng_Latn-yaq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 109.8359375, - "max_sentence1_length": 227, - "min_sentence2_length": 60, - "average_sentence2_length": 220.11328125, - "max_sentence2_length": 476, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84467 - }, - "yaq_Latn-eng_Latn": { - "min_sentence1_length": 60, - "average_sentence1_length": 220.11328125, - "max_sentence1_length": 476, - "min_sentence2_length": 24, - "average_sentence2_length": 109.8359375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 84467 - }, - "eng_Latn-yby_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.140625, - "max_sentence1_length": 227, - "min_sentence2_length": 49, - "average_sentence2_length": 201.703125, - "max_sentence2_length": 611, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79832 - }, - "yby_Latn-eng_Latn": { - "min_sentence1_length": 49, - "average_sentence1_length": 201.703125, - "max_sentence1_length": 611, - "min_sentence2_length": 24, - "average_sentence2_length": 110.140625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79832 - }, - "eng_Latn-ycn_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.47265625, - "max_sentence1_length": 827, - "min_sentence2_length": 67, - "average_sentence2_length": 272.796875, - "max_sentence2_length": 793, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99397 - }, - "ycn_Latn-eng_Latn": { - "min_sentence1_length": 67, - "average_sentence1_length": 272.796875, - "max_sentence1_length": 793, - "min_sentence2_length": 24, - "average_sentence2_length": 115.47265625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 99397 - }, - "eng_Latn-yka_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 112.5, - "max_sentence1_length": 827, - "min_sentence2_length": 33, - "average_sentence2_length": 183.38671875, - "max_sentence2_length": 494, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75747 - }, - "yka_Latn-eng_Latn": { - "min_sentence1_length": 33, - "average_sentence1_length": 183.38671875, - "max_sentence1_length": 494, - "min_sentence2_length": 31, - "average_sentence2_length": 112.5, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75747 - }, - "eng_Latn-yle_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 111.36328125, - "max_sentence1_length": 233, - "min_sentence2_length": 29, - "average_sentence2_length": 205.69140625, - "max_sentence2_length": 587, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81166 - }, - "yle_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 205.69140625, - "max_sentence1_length": 587, - "min_sentence2_length": 31, - "average_sentence2_length": 111.36328125, - "max_sentence2_length": 233, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 81166 - }, - "eng_Latn-yml_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 112.3203125, - "max_sentence1_length": 227, - "min_sentence2_length": 42, - "average_sentence2_length": 296.03515625, - "max_sentence2_length": 1111, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104539 - }, - "yml_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 296.03515625, - "max_sentence1_length": 1111, - "min_sentence2_length": 25, - "average_sentence2_length": 112.3203125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 104539 - }, - "eng_Latn-yon_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.66015625, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 183.55859375, - "max_sentence2_length": 597, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76088 - }, - "yon_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 183.55859375, - "max_sentence1_length": 597, - "min_sentence2_length": 24, - "average_sentence2_length": 113.66015625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76088 - }, - "eng_Latn-yor_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.421875, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 114.8984375, - "max_sentence2_length": 287, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58962 - }, - "yor_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 114.8984375, - "max_sentence1_length": 287, - "min_sentence2_length": 24, - "average_sentence2_length": 115.421875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 58962 - }, - "eng_Latn-yrb_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.19921875, - "max_sentence1_length": 232, - "min_sentence2_length": 40, - "average_sentence2_length": 234.0625, - "max_sentence2_length": 765, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89411 - }, - "yrb_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 234.0625, - "max_sentence1_length": 765, - "min_sentence2_length": 24, - "average_sentence2_length": 115.19921875, - "max_sentence2_length": 232, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89411 - }, - "eng_Latn-yre_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.09375, - "max_sentence1_length": 827, - "min_sentence2_length": 43, - "average_sentence2_length": 172.7109375, - "max_sentence2_length": 461, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73678 - }, - "yre_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 172.7109375, - "max_sentence1_length": 461, - "min_sentence2_length": 24, - "average_sentence2_length": 115.09375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73678 - }, - "eng_Latn-yss_Latn": { - "min_sentence1_length": 1, - "average_sentence1_length": 145.40625, - "max_sentence1_length": 341, - "min_sentence2_length": 23, - "average_sentence2_length": 214.68359375, - "max_sentence2_length": 524, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92183 - }, - "yss_Latn-eng_Latn": { - "min_sentence1_length": 23, - "average_sentence1_length": 214.68359375, - "max_sentence1_length": 524, - "min_sentence2_length": 1, - "average_sentence2_length": 145.40625, - "max_sentence2_length": 341, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 92183 - }, - "eng_Latn-yuj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.8125, - "max_sentence1_length": 827, - "min_sentence2_length": 48, - "average_sentence2_length": 221.23046875, - "max_sentence2_length": 747, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86283 - }, - "yuj_Latn-eng_Latn": { - "min_sentence1_length": 48, - "average_sentence1_length": 221.23046875, - "max_sentence1_length": 747, - "min_sentence2_length": 24, - "average_sentence2_length": 115.8125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 86283 - }, - "eng_Latn-yut_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.35546875, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 172.515625, - "max_sentence2_length": 549, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73439 - }, - "yut_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 172.515625, - "max_sentence1_length": 549, - "min_sentence2_length": 24, - "average_sentence2_length": 114.35546875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73439 - }, - "eng_Latn-yuw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.0703125, - "max_sentence1_length": 227, - "min_sentence2_length": 43, - "average_sentence2_length": 221.84765625, - "max_sentence2_length": 809, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85227 - }, - "yuw_Latn-eng_Latn": { - "min_sentence1_length": 43, - "average_sentence1_length": 221.84765625, - "max_sentence1_length": 809, - "min_sentence2_length": 24, - "average_sentence2_length": 111.0703125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 85227 - }, - "eng_Latn-yva_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.35546875, - "max_sentence1_length": 230, - "min_sentence2_length": 37, - "average_sentence2_length": 241.4609375, - "max_sentence2_length": 1386, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90577 - }, - "yva_Latn-eng_Latn": { - "min_sentence1_length": 37, - "average_sentence1_length": 241.4609375, - "max_sentence1_length": 1386, - "min_sentence2_length": 24, - "average_sentence2_length": 112.35546875, - "max_sentence2_length": 230, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 90577 - }, - "eng_Latn-zaa_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.95703125, - "max_sentence1_length": 827, - "min_sentence2_length": 58, - "average_sentence2_length": 243.12890625, - "max_sentence2_length": 823, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91414 - }, - "zaa_Latn-eng_Latn": { - "min_sentence1_length": 58, - "average_sentence1_length": 243.12890625, - "max_sentence1_length": 823, - "min_sentence2_length": 24, - "average_sentence2_length": 113.95703125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 91414 - }, - "eng_Latn-zab_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.93359375, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 140.60546875, - "max_sentence2_length": 323, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65162 - }, - "zab_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 140.60546875, - "max_sentence1_length": 323, - "min_sentence2_length": 24, - "average_sentence2_length": 113.93359375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 65162 - }, - "eng_Latn-zac_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.5078125, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 173.734375, - "max_sentence2_length": 379, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73790 - }, - "zac_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 173.734375, - "max_sentence1_length": 379, - "min_sentence2_length": 24, - "average_sentence2_length": 114.5078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 73790 - }, - "eng_Latn-zad_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.41796875, - "max_sentence1_length": 227, - "min_sentence2_length": 29, - "average_sentence2_length": 148.8125, - "max_sentence2_length": 371, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66619 - }, - "zad_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 148.8125, - "max_sentence1_length": 371, - "min_sentence2_length": 24, - "average_sentence2_length": 111.41796875, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66619 - }, - "eng_Latn-zai_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.6328125, - "max_sentence1_length": 227, - "min_sentence2_length": 35, - "average_sentence2_length": 136.72265625, - "max_sentence2_length": 399, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63323 - }, - "zai_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 136.72265625, - "max_sentence1_length": 399, - "min_sentence2_length": 24, - "average_sentence2_length": 110.6328125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63323 - }, - "eng_Latn-zaj_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 111.21484375, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 134.9921875, - "max_sentence2_length": 340, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63029 - }, - "zaj_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 134.9921875, - "max_sentence1_length": 340, - "min_sentence2_length": 24, - "average_sentence2_length": 111.21484375, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63029 - }, - "eng_Latn-zam_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.8046875, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 285.17578125, - "max_sentence2_length": 829, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102139 - }, - "zam_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 285.17578125, - "max_sentence1_length": 829, - "min_sentence2_length": 24, - "average_sentence2_length": 113.8046875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 102139 - }, - "eng_Latn-zao_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.84375, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 149.453125, - "max_sentence2_length": 447, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67404 - }, - "zao_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 149.453125, - "max_sentence1_length": 447, - "min_sentence2_length": 24, - "average_sentence2_length": 113.84375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67404 - }, - "eng_Latn-zap_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.22265625, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 139.109375, - "max_sentence2_length": 358, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64597 - }, - "zap_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 139.109375, - "max_sentence1_length": 358, - "min_sentence2_length": 24, - "average_sentence2_length": 113.22265625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 64597 - }, - "eng_Latn-zar_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.078125, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 175.34375, - "max_sentence2_length": 531, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74092 - }, - "zar_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 175.34375, - "max_sentence1_length": 531, - "min_sentence2_length": 24, - "average_sentence2_length": 114.078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74092 - }, - "eng_Latn-zas_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.93359375, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 146.23828125, - "max_sentence2_length": 329, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66604 - }, - "zas_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 146.23828125, - "max_sentence1_length": 329, - "min_sentence2_length": 24, - "average_sentence2_length": 113.93359375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 66604 - }, - "eng_Latn-zat_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.67578125, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 164.9765625, - "max_sentence2_length": 418, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71335 - }, - "zat_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 164.9765625, - "max_sentence1_length": 418, - "min_sentence2_length": 24, - "average_sentence2_length": 113.67578125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71335 - }, - "eng_Latn-zav_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.37890625, - "max_sentence1_length": 227, - "min_sentence2_length": 28, - "average_sentence2_length": 240.625, - "max_sentence2_length": 749, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89857 - }, - "zav_Latn-eng_Latn": { - "min_sentence1_length": 28, - "average_sentence1_length": 240.625, - "max_sentence1_length": 749, - "min_sentence2_length": 24, - "average_sentence2_length": 110.37890625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 89857 - }, - "eng_Latn-zaw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.54296875, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 135.7890625, - "max_sentence2_length": 332, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63829 - }, - "zaw_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 135.7890625, - "max_sentence1_length": 332, - "min_sentence2_length": 24, - "average_sentence2_length": 113.54296875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63829 - }, - "eng_Latn-zca_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.73828125, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 167.70703125, - "max_sentence2_length": 440, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72050 - }, - "zca_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 167.70703125, - "max_sentence1_length": 440, - "min_sentence2_length": 24, - "average_sentence2_length": 113.73828125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72050 - }, - "eng_Latn-zga_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 116.0625, - "max_sentence1_length": 827, - "min_sentence2_length": 22, - "average_sentence2_length": 124.90234375, - "max_sentence2_length": 319, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61687 - }, - "zga_Latn-eng_Latn": { - "min_sentence1_length": 22, - "average_sentence1_length": 124.90234375, - "max_sentence1_length": 319, - "min_sentence2_length": 24, - "average_sentence2_length": 116.0625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 61687 - }, - "eng_Latn-zia_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.75390625, - "max_sentence1_length": 827, - "min_sentence2_length": 29, - "average_sentence2_length": 184.64453125, - "max_sentence2_length": 826, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76390 - }, - "zia_Latn-eng_Latn": { - "min_sentence1_length": 29, - "average_sentence1_length": 184.64453125, - "max_sentence1_length": 826, - "min_sentence2_length": 24, - "average_sentence2_length": 113.75390625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76390 - }, - "eng_Latn-ziw_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 112.39453125, - "max_sentence1_length": 376, - "min_sentence2_length": 19, - "average_sentence2_length": 124.8671875, - "max_sentence2_length": 349, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60739 - }, - "ziw_Latn-eng_Latn": { - "min_sentence1_length": 19, - "average_sentence1_length": 124.8671875, - "max_sentence1_length": 349, - "min_sentence2_length": 24, - "average_sentence2_length": 112.39453125, - "max_sentence2_length": 376, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 60739 - }, - "eng_Latn-zlm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 25, - "average_sentence2_length": 131.09765625, - "max_sentence2_length": 302, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63096 - }, - "zlm_Latn-eng_Latn": { - "min_sentence1_length": 25, - "average_sentence1_length": 131.09765625, - "max_sentence1_length": 302, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 63096 - }, - "eng_Latn-zos_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.48828125, - "max_sentence1_length": 827, - "min_sentence2_length": 30, - "average_sentence2_length": 179.2265625, - "max_sentence2_length": 429, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75447 - }, - "zos_Latn-eng_Latn": { - "min_sentence1_length": 30, - "average_sentence1_length": 179.2265625, - "max_sentence1_length": 429, - "min_sentence2_length": 24, - "average_sentence2_length": 115.48828125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75447 - }, - "eng_Latn-zpc_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.0390625, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 182.3828125, - "max_sentence2_length": 515, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76140 - }, - "zpc_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 182.3828125, - "max_sentence1_length": 515, - "min_sentence2_length": 24, - "average_sentence2_length": 115.0390625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 76140 - }, - "eng_Latn-zpl_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.3046875, - "max_sentence1_length": 827, - "min_sentence2_length": 34, - "average_sentence2_length": 170.328125, - "max_sentence2_length": 409, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72610 - }, - "zpl_Latn-eng_Latn": { - "min_sentence1_length": 34, - "average_sentence1_length": 170.328125, - "max_sentence1_length": 409, - "min_sentence2_length": 24, - "average_sentence2_length": 113.3046875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 72610 - }, - "eng_Latn-zpm_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 108.8125, - "max_sentence1_length": 227, - "min_sentence2_length": 36, - "average_sentence2_length": 192.28125, - "max_sentence2_length": 615, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77080 - }, - "zpm_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 192.28125, - "max_sentence1_length": 615, - "min_sentence2_length": 24, - "average_sentence2_length": 108.8125, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 77080 - }, - "eng_Latn-zpo_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.3984375, - "max_sentence1_length": 827, - "min_sentence2_length": 27, - "average_sentence2_length": 149.56640625, - "max_sentence2_length": 398, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67831 - }, - "zpo_Latn-eng_Latn": { - "min_sentence1_length": 27, - "average_sentence1_length": 149.56640625, - "max_sentence1_length": 398, - "min_sentence2_length": 24, - "average_sentence2_length": 115.3984375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67831 - }, - "eng_Latn-zpq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 110.1640625, - "max_sentence1_length": 227, - "min_sentence2_length": 40, - "average_sentence2_length": 200.3203125, - "max_sentence2_length": 664, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79484 - }, - "zpq_Latn-eng_Latn": { - "min_sentence1_length": 40, - "average_sentence1_length": 200.3203125, - "max_sentence1_length": 664, - "min_sentence2_length": 24, - "average_sentence2_length": 110.1640625, - "max_sentence2_length": 227, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 79484 - }, - "eng_Latn-zpu_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.37109375, - "max_sentence1_length": 827, - "min_sentence2_length": 35, - "average_sentence2_length": 165.80859375, - "max_sentence2_length": 409, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71982 - }, - "zpu_Latn-eng_Latn": { - "min_sentence1_length": 35, - "average_sentence1_length": 165.80859375, - "max_sentence1_length": 409, - "min_sentence2_length": 24, - "average_sentence2_length": 115.37109375, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 71982 - }, - "eng_Latn-zpv_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.69140625, - "max_sentence1_length": 827, - "min_sentence2_length": 42, - "average_sentence2_length": 201.7734375, - "max_sentence2_length": 522, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80759 - }, - "zpv_Latn-eng_Latn": { - "min_sentence1_length": 42, - "average_sentence1_length": 201.7734375, - "max_sentence1_length": 522, - "min_sentence2_length": 24, - "average_sentence2_length": 113.69140625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 80759 - }, - "eng_Latn-zpz_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.1015625, - "max_sentence1_length": 827, - "min_sentence2_length": 36, - "average_sentence2_length": 180.28125, - "max_sentence2_length": 506, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75618 - }, - "zpz_Latn-eng_Latn": { - "min_sentence1_length": 36, - "average_sentence1_length": 180.28125, - "max_sentence1_length": 506, - "min_sentence2_length": 24, - "average_sentence2_length": 115.1015625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 75618 - }, - "eng_Latn-zsr_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 114.078125, - "max_sentence1_length": 827, - "min_sentence2_length": 31, - "average_sentence2_length": 161.50390625, - "max_sentence2_length": 488, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70549 - }, - "zsr_Latn-eng_Latn": { - "min_sentence1_length": 31, - "average_sentence1_length": 161.50390625, - "max_sentence1_length": 488, - "min_sentence2_length": 24, - "average_sentence2_length": 114.078125, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 70549 - }, - "eng_Latn-ztq_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 115.171875, - "max_sentence1_length": 827, - "min_sentence2_length": 19, - "average_sentence2_length": 117.6953125, - "max_sentence2_length": 505, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59614 - }, - "ztq_Latn-eng_Latn": { - "min_sentence1_length": 19, - "average_sentence1_length": 117.6953125, - "max_sentence1_length": 505, - "min_sentence2_length": 24, - "average_sentence2_length": 115.171875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 59614 - }, - "eng_Latn-zty_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.1171875, - "max_sentence1_length": 827, - "min_sentence2_length": 26, - "average_sentence2_length": 176.6484375, - "max_sentence2_length": 528, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74180 - }, - "zty_Latn-eng_Latn": { - "min_sentence1_length": 26, - "average_sentence1_length": 176.6484375, - "max_sentence1_length": 528, - "min_sentence2_length": 24, - "average_sentence2_length": 113.1171875, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 74180 - }, - "eng_Latn-zyp_Latn": { - "min_sentence1_length": 24, - "average_sentence1_length": 113.59765625, - "max_sentence1_length": 827, - "min_sentence2_length": 32, - "average_sentence2_length": 150.9375, - "max_sentence2_length": 342, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67721 - }, - "zyp_Latn-eng_Latn": { - "min_sentence1_length": 32, - "average_sentence1_length": 150.9375, - "max_sentence1_length": 342, - "min_sentence2_length": 24, - "average_sentence2_length": 113.59765625, - "max_sentence2_length": 827, - "num_samples": 256, - "num_samples_sentence2": 256, - "number_of_characters": 67721 - } - } - } -} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json b/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json index 41741d3da..c53818c9c 100644 --- a/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json @@ -1,5580 +1,6594 @@ { "test": { + "num_samples": 518144, + "number_of_characters": 162367876, + "unique_pairs": 518101, "min_sentence1_length": 9, "average_sentence1_length": 156.6821925951087, "max_sentence1_length": 692, + "unique_sentence1": 23550, "min_sentence2_length": 9, "average_sentence2_length": 156.6821925951087, "max_sentence2_length": 692, - "num_samples": 518144, - "num_samples_sentence2": 518144, - "number_of_characters": 162367876, + "unique_sentence2": 23550, "hf_subset_descriptive_stats": { "asm_Beng-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 310622, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310622 + "unique_sentence2": 1024 }, "asm_Beng-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 323609, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323609 + "unique_sentence2": 1024 }, "asm_Beng-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 319020, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319020 + "unique_sentence2": 1024 }, "asm_Beng-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 320098, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320098 + "unique_sentence2": 1024 }, "asm_Beng-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 312594, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312594 + "unique_sentence2": 1024 }, "asm_Beng-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 309440, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309440 + "unique_sentence2": 1024 }, "asm_Beng-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 320106, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320106 + "unique_sentence2": 1024 }, "asm_Beng-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 332064, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 332064 + "unique_sentence2": 1024 }, "asm_Beng-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 322764, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 322764 + "unique_sentence2": 1024 }, "asm_Beng-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 308682, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308682 + "unique_sentence2": 1024 }, "asm_Beng-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 343636, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 343636 + "unique_sentence2": 1024 }, "asm_Beng-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 321784, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321784 + "unique_sentence2": 1024 }, "asm_Beng-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 313134, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313134 + "unique_sentence2": 1024 }, "asm_Beng-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 313419, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313419 + "unique_sentence2": 1024 }, "asm_Beng-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 334226, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 334226 + "unique_sentence2": 1024 }, "asm_Beng-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306863, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306863 + "unique_sentence2": 1024 }, "asm_Beng-san_Deva": { + "num_samples": 1024, + "number_of_characters": 318079, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318079 + "unique_sentence2": 1024 }, "asm_Beng-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 326732, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 326732 + "unique_sentence2": 1024 }, "asm_Beng-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 320421, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320421 + "unique_sentence2": 1024 }, "asm_Beng-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 348346, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 348346 + "unique_sentence2": 1023 }, "asm_Beng-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 319045, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319045 + "unique_sentence2": 1024 }, "asm_Beng-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 315134, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 156.6982421875, "max_sentence1_length": 582, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 315134 + "unique_sentence2": 1024 }, "ben_Beng-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 310622, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310622 + "unique_sentence2": 1024 }, "ben_Beng-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 313313, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313313 + "unique_sentence2": 1024 }, "ben_Beng-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 308724, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308724 + "unique_sentence2": 1024 }, "ben_Beng-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 309802, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309802 + "unique_sentence2": 1024 }, "ben_Beng-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 302298, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 302298 + "unique_sentence2": 1024 }, "ben_Beng-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 299144, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 299144 + "unique_sentence2": 1024 }, "ben_Beng-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 309810, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309810 + "unique_sentence2": 1024 }, "ben_Beng-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 321768, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321768 + "unique_sentence2": 1024 }, "ben_Beng-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 312468, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312468 + "unique_sentence2": 1024 }, "ben_Beng-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 298386, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 298386 + "unique_sentence2": 1024 }, "ben_Beng-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 333340, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 333340 + "unique_sentence2": 1024 }, "ben_Beng-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 311488, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311488 + "unique_sentence2": 1024 }, "ben_Beng-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 302838, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 302838 + "unique_sentence2": 1024 }, "ben_Beng-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 303123, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 303123 + "unique_sentence2": 1024 }, "ben_Beng-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 323930, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323930 + "unique_sentence2": 1024 }, "ben_Beng-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 296567, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 296567 + "unique_sentence2": 1024 }, "ben_Beng-san_Deva": { + "num_samples": 1024, + "number_of_characters": 307783, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307783 + "unique_sentence2": 1024 }, "ben_Beng-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 316436, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316436 + "unique_sentence2": 1024 }, "ben_Beng-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 310125, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310125 + "unique_sentence2": 1024 }, "ben_Beng-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 338050, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 338050 + "unique_sentence2": 1023 }, "ben_Beng-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 308749, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308749 + "unique_sentence2": 1024 }, "ben_Beng-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 304838, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 146.6435546875, "max_sentence1_length": 538, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 304838 + "unique_sentence2": 1024 }, "brx_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 323609, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323609 + "unique_sentence2": 1024 }, "brx_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 313313, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313313 + "unique_sentence2": 1024 }, "brx_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 321711, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321711 + "unique_sentence2": 1024 }, "brx_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 322789, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 322789 + "unique_sentence2": 1024 }, "brx_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 315285, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 315285 + "unique_sentence2": 1024 }, "brx_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 312131, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312131 + "unique_sentence2": 1024 }, "brx_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 322797, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 322797 + "unique_sentence2": 1024 }, "brx_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 334755, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 334755 + "unique_sentence2": 1024 }, "brx_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 325455, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 325455 + "unique_sentence2": 1024 }, "brx_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 311373, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311373 + "unique_sentence2": 1024 }, "brx_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 346327, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 346327 + "unique_sentence2": 1024 }, "brx_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 324475, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 324475 + "unique_sentence2": 1024 }, "brx_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 315825, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 315825 + "unique_sentence2": 1024 }, "brx_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 316110, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316110 + "unique_sentence2": 1024 }, "brx_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 336917, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 336917 + "unique_sentence2": 1024 }, "brx_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 309554, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309554 + "unique_sentence2": 1024 }, "brx_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 320770, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320770 + "unique_sentence2": 1024 }, "brx_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 329423, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 329423 + "unique_sentence2": 1024 }, "brx_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 323112, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323112 + "unique_sentence2": 1024 }, "brx_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 351037, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 351037 + "unique_sentence2": 1023 }, "brx_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 321736, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321736 + "unique_sentence2": 1024 }, "brx_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 317825, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 159.326171875, "max_sentence1_length": 631, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 317825 + "unique_sentence2": 1024 }, "doi_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 319020, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319020 + "unique_sentence2": 1024 }, "doi_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 308724, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308724 + "unique_sentence2": 1024 }, "doi_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 321711, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321711 + "unique_sentence2": 1024 }, "doi_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 318200, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318200 + "unique_sentence2": 1024 }, "doi_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 310696, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310696 + "unique_sentence2": 1024 }, "doi_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 307542, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307542 + "unique_sentence2": 1024 }, "doi_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 318208, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318208 + "unique_sentence2": 1024 }, "doi_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 330166, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 330166 + "unique_sentence2": 1024 }, "doi_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 320866, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320866 + "unique_sentence2": 1024 }, "doi_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 306784, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306784 + "unique_sentence2": 1024 }, "doi_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 341738, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 341738 + "unique_sentence2": 1024 }, "doi_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 319886, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319886 + "unique_sentence2": 1024 }, "doi_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 311236, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311236 + "unique_sentence2": 1024 }, "doi_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 311521, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311521 + "unique_sentence2": 1024 }, "doi_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 332328, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 332328 + "unique_sentence2": 1024 }, "doi_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 304965, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 304965 + "unique_sentence2": 1024 }, "doi_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 316181, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316181 + "unique_sentence2": 1024 }, "doi_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 324834, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 324834 + "unique_sentence2": 1024 }, "doi_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 318523, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318523 + "unique_sentence2": 1024 }, "doi_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 346448, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 346448 + "unique_sentence2": 1023 }, "doi_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 317147, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 317147 + "unique_sentence2": 1024 }, "doi_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 313236, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.8447265625, "max_sentence1_length": 500, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313236 + "unique_sentence2": 1024 }, "eng_Latn-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 320098, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320098 + "unique_sentence2": 1024 }, "eng_Latn-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 309802, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309802 + "unique_sentence2": 1024 }, "eng_Latn-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 322789, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 322789 + "unique_sentence2": 1024 }, "eng_Latn-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 318200, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318200 + "unique_sentence2": 1024 }, "eng_Latn-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 311774, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311774 + "unique_sentence2": 1024 }, "eng_Latn-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 308620, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308620 + "unique_sentence2": 1024 }, "eng_Latn-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 319286, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319286 + "unique_sentence2": 1024 }, "eng_Latn-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 331244, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 331244 + "unique_sentence2": 1024 }, "eng_Latn-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 321944, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321944 + "unique_sentence2": 1024 }, "eng_Latn-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 307862, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307862 + "unique_sentence2": 1024 }, "eng_Latn-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 342816, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 342816 + "unique_sentence2": 1024 }, "eng_Latn-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 320964, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320964 + "unique_sentence2": 1024 }, "eng_Latn-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 312314, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312314 + "unique_sentence2": 1024 }, "eng_Latn-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 312599, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312599 + "unique_sentence2": 1024 }, "eng_Latn-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 333406, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 333406 + "unique_sentence2": 1024 }, "eng_Latn-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306043, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306043 + "unique_sentence2": 1024 }, "eng_Latn-san_Deva": { + "num_samples": 1024, + "number_of_characters": 317259, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 317259 + "unique_sentence2": 1024 }, "eng_Latn-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 325912, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 325912 + "unique_sentence2": 1024 }, "eng_Latn-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 319601, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319601 + "unique_sentence2": 1024 }, "eng_Latn-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 347526, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 347526 + "unique_sentence2": 1023 }, "eng_Latn-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 318225, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318225 + "unique_sentence2": 1024 }, "eng_Latn-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 314314, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 155.8974609375, "max_sentence1_length": 532, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314314 + "unique_sentence2": 1024 }, "gom_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 312594, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312594 + "unique_sentence2": 1024 }, "gom_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 302298, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 302298 + "unique_sentence2": 1024 }, "gom_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 315285, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 315285 + "unique_sentence2": 1024 }, "gom_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 310696, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310696 + "unique_sentence2": 1024 }, "gom_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 311774, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311774 + "unique_sentence2": 1024 }, "gom_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 301116, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 301116 + "unique_sentence2": 1024 }, "gom_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 311782, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311782 + "unique_sentence2": 1024 }, "gom_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 323740, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323740 + "unique_sentence2": 1024 }, "gom_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 314440, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314440 + "unique_sentence2": 1024 }, "gom_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 300358, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 300358 + "unique_sentence2": 1024 }, "gom_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 335312, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 335312 + "unique_sentence2": 1024 }, "gom_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 313460, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313460 + "unique_sentence2": 1024 }, "gom_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 304810, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 304810 + "unique_sentence2": 1024 }, "gom_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 305095, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 305095 + "unique_sentence2": 1024 }, "gom_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 325902, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 325902 + "unique_sentence2": 1024 }, "gom_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 298539, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 298539 + "unique_sentence2": 1024 }, "gom_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 309755, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309755 + "unique_sentence2": 1024 }, "gom_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 318408, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318408 + "unique_sentence2": 1024 }, "gom_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 312097, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312097 + "unique_sentence2": 1024 }, "gom_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 340022, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 340022 + "unique_sentence2": 1023 }, "gom_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 310721, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310721 + "unique_sentence2": 1024 }, "gom_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 306810, + "unique_pairs": 1024, "min_sentence1_length": 17, "average_sentence1_length": 148.5693359375, "max_sentence1_length": 537, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306810 + "unique_sentence2": 1024 }, "guj_Gujr-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 309440, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309440 + "unique_sentence2": 1024 }, "guj_Gujr-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 299144, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 299144 + "unique_sentence2": 1024 }, "guj_Gujr-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 312131, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312131 + "unique_sentence2": 1024 }, "guj_Gujr-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 307542, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307542 + "unique_sentence2": 1024 }, "guj_Gujr-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 308620, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308620 + "unique_sentence2": 1024 }, "guj_Gujr-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 301116, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 301116 + "unique_sentence2": 1024 }, "guj_Gujr-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 308628, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308628 + "unique_sentence2": 1024 }, "guj_Gujr-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 320586, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320586 + "unique_sentence2": 1024 }, "guj_Gujr-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 311286, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311286 + "unique_sentence2": 1024 }, "guj_Gujr-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 297204, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 297204 + "unique_sentence2": 1024 }, "guj_Gujr-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 332158, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 332158 + "unique_sentence2": 1024 }, "guj_Gujr-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 310306, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310306 + "unique_sentence2": 1024 }, "guj_Gujr-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 301656, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 301656 + "unique_sentence2": 1024 }, "guj_Gujr-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 301941, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 301941 + "unique_sentence2": 1024 }, "guj_Gujr-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 322748, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 322748 + "unique_sentence2": 1024 }, "guj_Gujr-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 295385, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 295385 + "unique_sentence2": 1024 }, "guj_Gujr-san_Deva": { + "num_samples": 1024, + "number_of_characters": 306601, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306601 + "unique_sentence2": 1024 }, "guj_Gujr-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 315254, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 315254 + "unique_sentence2": 1024 }, "guj_Gujr-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 308943, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308943 + "unique_sentence2": 1024 }, "guj_Gujr-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 336868, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 336868 + "unique_sentence2": 1023 }, "guj_Gujr-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 307567, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307567 + "unique_sentence2": 1024 }, "guj_Gujr-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 303656, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 145.4892578125, "max_sentence1_length": 488, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 303656 + "unique_sentence2": 1024 }, "hin_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 320106, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320106 + "unique_sentence2": 1024 }, "hin_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 309810, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309810 + "unique_sentence2": 1024 }, "hin_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 322797, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 322797 + "unique_sentence2": 1024 }, "hin_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 318208, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318208 + "unique_sentence2": 1024 }, "hin_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 319286, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319286 + "unique_sentence2": 1024 }, "hin_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 311782, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311782 + "unique_sentence2": 1024 }, "hin_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 308628, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308628 + "unique_sentence2": 1024 }, "hin_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 331252, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 331252 + "unique_sentence2": 1024 }, "hin_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 321952, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321952 + "unique_sentence2": 1024 }, "hin_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 307870, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307870 + "unique_sentence2": 1024 }, "hin_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 342824, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 342824 + "unique_sentence2": 1024 }, "hin_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 320972, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320972 + "unique_sentence2": 1024 }, "hin_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 312322, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312322 + "unique_sentence2": 1024 }, "hin_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 312607, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312607 + "unique_sentence2": 1024 }, "hin_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 333414, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 333414 + "unique_sentence2": 1024 }, "hin_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306051, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306051 + "unique_sentence2": 1024 }, "hin_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 317267, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 317267 + "unique_sentence2": 1024 }, "hin_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 325920, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 325920 + "unique_sentence2": 1024 }, "hin_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 319609, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319609 + "unique_sentence2": 1024 }, "hin_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 347534, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 347534 + "unique_sentence2": 1023 }, "hin_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 318233, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318233 + "unique_sentence2": 1024 }, "hin_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 314322, + "unique_pairs": 1024, "min_sentence1_length": 21, "average_sentence1_length": 155.9052734375, "max_sentence1_length": 531, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314322 + "unique_sentence2": 1024 }, "kan_Knda-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 332064, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 332064 + "unique_sentence2": 1024 }, "kan_Knda-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 321768, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321768 + "unique_sentence2": 1024 }, "kan_Knda-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 334755, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 334755 + "unique_sentence2": 1024 }, "kan_Knda-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 330166, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 330166 + "unique_sentence2": 1024 }, "kan_Knda-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 331244, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 331244 + "unique_sentence2": 1024 }, "kan_Knda-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 323740, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323740 + "unique_sentence2": 1024 }, "kan_Knda-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 320586, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320586 + "unique_sentence2": 1024 }, "kan_Knda-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 331252, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 331252 + "unique_sentence2": 1024 }, "kan_Knda-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 333910, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 333910 + "unique_sentence2": 1024 }, "kan_Knda-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 319828, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319828 + "unique_sentence2": 1024 }, "kan_Knda-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 354782, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 354782 + "unique_sentence2": 1024 }, "kan_Knda-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 332930, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 332930 + "unique_sentence2": 1024 }, "kan_Knda-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 324280, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 324280 + "unique_sentence2": 1024 }, "kan_Knda-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 324565, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 324565 + "unique_sentence2": 1024 }, "kan_Knda-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 345372, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 345372 + "unique_sentence2": 1024 }, "kan_Knda-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 318009, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318009 + "unique_sentence2": 1024 }, "kan_Knda-san_Deva": { + "num_samples": 1024, + "number_of_characters": 329225, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 329225 + "unique_sentence2": 1024 }, "kan_Knda-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 337878, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 337878 + "unique_sentence2": 1024 }, "kan_Knda-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 331567, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 331567 + "unique_sentence2": 1024 }, "kan_Knda-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 359492, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 359492 + "unique_sentence2": 1023 }, "kan_Knda-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 330191, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 330191 + "unique_sentence2": 1024 }, "kan_Knda-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 326280, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 167.5830078125, "max_sentence1_length": 668, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 326280 + "unique_sentence2": 1024 }, "kas_Arab-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 322764, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 322764 + "unique_sentence2": 1024 }, "kas_Arab-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 312468, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312468 + "unique_sentence2": 1024 }, "kas_Arab-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 325455, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 325455 + "unique_sentence2": 1024 }, "kas_Arab-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 320866, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320866 + "unique_sentence2": 1024 }, "kas_Arab-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 321944, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321944 + "unique_sentence2": 1024 }, "kas_Arab-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 314440, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314440 + "unique_sentence2": 1024 }, "kas_Arab-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 311286, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311286 + "unique_sentence2": 1024 }, "kas_Arab-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 321952, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321952 + "unique_sentence2": 1024 }, "kas_Arab-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 333910, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 333910 + "unique_sentence2": 1024 }, "kas_Arab-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 310528, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310528 + "unique_sentence2": 1024 }, "kas_Arab-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 345482, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 345482 + "unique_sentence2": 1024 }, "kas_Arab-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 323630, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323630 + "unique_sentence2": 1024 }, "kas_Arab-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 314980, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314980 + "unique_sentence2": 1024 }, "kas_Arab-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 315265, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 315265 + "unique_sentence2": 1024 }, "kas_Arab-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 336072, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 336072 + "unique_sentence2": 1024 }, "kas_Arab-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 308709, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308709 + "unique_sentence2": 1024 }, "kas_Arab-san_Deva": { + "num_samples": 1024, + "number_of_characters": 319925, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319925 + "unique_sentence2": 1024 }, "kas_Arab-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 328578, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 328578 + "unique_sentence2": 1024 }, "kas_Arab-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 322267, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 322267 + "unique_sentence2": 1024 }, "kas_Arab-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 350192, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 350192 + "unique_sentence2": 1023 }, "kas_Arab-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 320891, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320891 + "unique_sentence2": 1024 }, "kas_Arab-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 316980, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 158.5009765625, "max_sentence1_length": 520, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316980 + "unique_sentence2": 1024 }, "mai_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 308682, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308682 + "unique_sentence2": 1024 }, "mai_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 298386, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 298386 + "unique_sentence2": 1024 }, "mai_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 311373, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311373 + "unique_sentence2": 1024 }, "mai_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 306784, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306784 + "unique_sentence2": 1024 }, "mai_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 307862, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307862 + "unique_sentence2": 1024 }, "mai_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 300358, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 300358 + "unique_sentence2": 1024 }, "mai_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 297204, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 297204 + "unique_sentence2": 1024 }, "mai_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 307870, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307870 + "unique_sentence2": 1024 }, "mai_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 319828, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319828 + "unique_sentence2": 1024 }, "mai_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 310528, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310528 + "unique_sentence2": 1024 }, "mai_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 331400, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 331400 + "unique_sentence2": 1024 }, "mai_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 309548, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309548 + "unique_sentence2": 1024 }, "mai_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 300898, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 300898 + "unique_sentence2": 1024 }, "mai_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 301183, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 301183 + "unique_sentence2": 1024 }, "mai_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 321990, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321990 + "unique_sentence2": 1024 }, "mai_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 294627, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 294627 + "unique_sentence2": 1024 }, "mai_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 305843, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 305843 + "unique_sentence2": 1024 }, "mai_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 314496, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314496 + "unique_sentence2": 1024 }, "mai_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 308185, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308185 + "unique_sentence2": 1024 }, "mai_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 336110, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 336110 + "unique_sentence2": 1023 }, "mai_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 306809, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306809 + "unique_sentence2": 1024 }, "mai_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 302898, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 144.7490234375, "max_sentence1_length": 562, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 302898 + "unique_sentence2": 1024 }, "mal_Mlym-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 343636, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 343636 + "unique_sentence2": 1024 }, "mal_Mlym-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 333340, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 333340 + "unique_sentence2": 1024 }, "mal_Mlym-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 346327, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 346327 + "unique_sentence2": 1024 }, "mal_Mlym-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 341738, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 341738 + "unique_sentence2": 1024 }, "mal_Mlym-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 342816, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 342816 + "unique_sentence2": 1024 }, "mal_Mlym-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 335312, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 335312 + "unique_sentence2": 1024 }, "mal_Mlym-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 332158, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 332158 + "unique_sentence2": 1024 }, "mal_Mlym-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 342824, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 342824 + "unique_sentence2": 1024 }, "mal_Mlym-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 354782, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 354782 + "unique_sentence2": 1024 }, "mal_Mlym-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 345482, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 345482 + "unique_sentence2": 1024 }, "mal_Mlym-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 331400, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 331400 + "unique_sentence2": 1024 }, "mal_Mlym-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 344502, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 344502 + "unique_sentence2": 1024 }, "mal_Mlym-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 335852, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 335852 + "unique_sentence2": 1024 }, "mal_Mlym-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 336137, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 336137 + "unique_sentence2": 1024 }, "mal_Mlym-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 356944, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 356944 + "unique_sentence2": 1024 }, "mal_Mlym-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 329581, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 329581 + "unique_sentence2": 1024 }, "mal_Mlym-san_Deva": { + "num_samples": 1024, + "number_of_characters": 340797, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 340797 + "unique_sentence2": 1024 }, "mal_Mlym-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 349450, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 349450 + "unique_sentence2": 1024 }, "mal_Mlym-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 343139, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 343139 + "unique_sentence2": 1024 }, "mal_Mlym-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 371064, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 371064 + "unique_sentence2": 1023 }, "mal_Mlym-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 341763, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 341763 + "unique_sentence2": 1024 }, "mal_Mlym-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 337852, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 178.8837890625, "max_sentence1_length": 692, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 337852 + "unique_sentence2": 1024 }, "mar_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 321784, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321784 + "unique_sentence2": 1024 }, "mar_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 311488, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311488 + "unique_sentence2": 1024 }, "mar_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 324475, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 324475 + "unique_sentence2": 1024 }, "mar_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 319886, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319886 + "unique_sentence2": 1024 }, "mar_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 320964, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320964 + "unique_sentence2": 1024 }, "mar_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 313460, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313460 + "unique_sentence2": 1024 }, "mar_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 310306, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310306 + "unique_sentence2": 1024 }, "mar_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 320972, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320972 + "unique_sentence2": 1024 }, "mar_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 332930, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 332930 + "unique_sentence2": 1024 }, "mar_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 323630, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323630 + "unique_sentence2": 1024 }, "mar_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 309548, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309548 + "unique_sentence2": 1024 }, "mar_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 344502, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 344502 + "unique_sentence2": 1024 }, "mar_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 314000, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314000 + "unique_sentence2": 1024 }, "mar_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 314285, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314285 + "unique_sentence2": 1024 }, "mar_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 335092, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 335092 + "unique_sentence2": 1024 }, "mar_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 307729, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307729 + "unique_sentence2": 1024 }, "mar_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 318945, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318945 + "unique_sentence2": 1024 }, "mar_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 327598, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 327598 + "unique_sentence2": 1024 }, "mar_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 321287, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321287 + "unique_sentence2": 1024 }, "mar_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 349212, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 349212 + "unique_sentence2": 1023 }, "mar_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 319911, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319911 + "unique_sentence2": 1024 }, "mar_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 316000, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 157.5439453125, "max_sentence1_length": 555, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316000 + "unique_sentence2": 1024 }, "mni_Mtei-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 313134, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313134 + "unique_sentence2": 1024 }, "mni_Mtei-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 302838, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 302838 + "unique_sentence2": 1024 }, "mni_Mtei-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 315825, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 315825 + "unique_sentence2": 1024 }, "mni_Mtei-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 311236, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311236 + "unique_sentence2": 1024 }, "mni_Mtei-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 312314, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312314 + "unique_sentence2": 1024 }, "mni_Mtei-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 304810, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 304810 + "unique_sentence2": 1024 }, "mni_Mtei-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 301656, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 301656 + "unique_sentence2": 1024 }, "mni_Mtei-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 312322, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312322 + "unique_sentence2": 1024 }, "mni_Mtei-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 324280, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 324280 + "unique_sentence2": 1024 }, "mni_Mtei-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 314980, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314980 + "unique_sentence2": 1024 }, "mni_Mtei-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 300898, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 300898 + "unique_sentence2": 1024 }, "mni_Mtei-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 335852, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 335852 + "unique_sentence2": 1024 }, "mni_Mtei-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 314000, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314000 + "unique_sentence2": 1024 }, "mni_Mtei-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 305635, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 305635 + "unique_sentence2": 1024 }, "mni_Mtei-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 326442, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 326442 + "unique_sentence2": 1024 }, "mni_Mtei-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 299079, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 299079 + "unique_sentence2": 1024 }, "mni_Mtei-san_Deva": { + "num_samples": 1024, + "number_of_characters": 310295, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310295 + "unique_sentence2": 1024 }, "mni_Mtei-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 318948, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318948 + "unique_sentence2": 1024 }, "mni_Mtei-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 312637, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312637 + "unique_sentence2": 1024 }, "mni_Mtei-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 340562, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 340562 + "unique_sentence2": 1023 }, "mni_Mtei-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 311261, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311261 + "unique_sentence2": 1024 }, "mni_Mtei-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 307350, + "unique_pairs": 1024, "min_sentence1_length": 16, "average_sentence1_length": 149.0966796875, "max_sentence1_length": 597, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307350 + "unique_sentence2": 1024 }, "npi_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 313419, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313419 + "unique_sentence2": 1024 }, "npi_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 303123, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 303123 + "unique_sentence2": 1024 }, "npi_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 316110, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316110 + "unique_sentence2": 1024 }, "npi_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 311521, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311521 + "unique_sentence2": 1024 }, "npi_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 312599, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312599 + "unique_sentence2": 1024 }, "npi_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 305095, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 305095 + "unique_sentence2": 1024 }, "npi_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 301941, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 301941 + "unique_sentence2": 1024 }, "npi_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 312607, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312607 + "unique_sentence2": 1024 }, "npi_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 324565, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 324565 + "unique_sentence2": 1024 }, "npi_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 315265, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 315265 + "unique_sentence2": 1024 }, "npi_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 301183, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 301183 + "unique_sentence2": 1024 }, "npi_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 336137, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 336137 + "unique_sentence2": 1024 }, "npi_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 314285, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314285 + "unique_sentence2": 1024 }, "npi_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 305635, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 305635 + "unique_sentence2": 1024 }, "npi_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 326727, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 326727 + "unique_sentence2": 1024 }, "npi_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 299364, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 299364 + "unique_sentence2": 1024 }, "npi_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 310580, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310580 + "unique_sentence2": 1024 }, "npi_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 319233, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319233 + "unique_sentence2": 1024 }, "npi_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 312922, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312922 + "unique_sentence2": 1024 }, "npi_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 340847, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 340847 + "unique_sentence2": 1023 }, "npi_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 311546, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311546 + "unique_sentence2": 1024 }, "npi_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 307635, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 149.375, "max_sentence1_length": 525, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307635 + "unique_sentence2": 1024 }, "ory_Orya-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 334226, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 334226 + "unique_sentence2": 1024 }, "ory_Orya-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 323930, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323930 + "unique_sentence2": 1024 }, "ory_Orya-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 336917, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 336917 + "unique_sentence2": 1024 }, "ory_Orya-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 332328, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 332328 + "unique_sentence2": 1024 }, "ory_Orya-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 333406, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 333406 + "unique_sentence2": 1024 }, "ory_Orya-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 325902, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 325902 + "unique_sentence2": 1024 }, "ory_Orya-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 322748, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 322748 + "unique_sentence2": 1024 }, "ory_Orya-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 333414, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 333414 + "unique_sentence2": 1024 }, "ory_Orya-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 345372, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 345372 + "unique_sentence2": 1024 }, "ory_Orya-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 336072, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 336072 + "unique_sentence2": 1024 }, "ory_Orya-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 321990, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321990 + "unique_sentence2": 1024 }, "ory_Orya-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 356944, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 356944 + "unique_sentence2": 1024 }, "ory_Orya-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 335092, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 335092 + "unique_sentence2": 1024 }, "ory_Orya-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 326442, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 326442 + "unique_sentence2": 1024 }, "ory_Orya-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 326727, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 326727 + "unique_sentence2": 1024 }, "ory_Orya-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 320171, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320171 + "unique_sentence2": 1024 }, "ory_Orya-san_Deva": { + "num_samples": 1024, + "number_of_characters": 331387, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 331387 + "unique_sentence2": 1024 }, "ory_Orya-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 340040, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 340040 + "unique_sentence2": 1024 }, "ory_Orya-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 333729, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 333729 + "unique_sentence2": 1024 }, "ory_Orya-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 361654, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 361654 + "unique_sentence2": 1023 }, "ory_Orya-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 332353, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 332353 + "unique_sentence2": 1024 }, "ory_Orya-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 328442, + "unique_pairs": 1024, "min_sentence1_length": 10, "average_sentence1_length": 169.6943359375, "max_sentence1_length": 578, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 328442 + "unique_sentence2": 1024 }, "pan_Guru-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 306863, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306863 + "unique_sentence2": 1024 }, "pan_Guru-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 296567, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 296567 + "unique_sentence2": 1024 }, "pan_Guru-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 309554, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309554 + "unique_sentence2": 1024 }, "pan_Guru-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 304965, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 304965 + "unique_sentence2": 1024 }, "pan_Guru-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 306043, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306043 + "unique_sentence2": 1024 }, "pan_Guru-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 298539, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 298539 + "unique_sentence2": 1024 }, "pan_Guru-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 295385, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 295385 + "unique_sentence2": 1024 }, "pan_Guru-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 306051, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306051 + "unique_sentence2": 1024 }, "pan_Guru-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 318009, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318009 + "unique_sentence2": 1024 }, "pan_Guru-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 308709, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308709 + "unique_sentence2": 1024 }, "pan_Guru-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 294627, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 294627 + "unique_sentence2": 1024 }, "pan_Guru-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 329581, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 329581 + "unique_sentence2": 1024 }, "pan_Guru-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 307729, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307729 + "unique_sentence2": 1024 }, "pan_Guru-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 299079, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 299079 + "unique_sentence2": 1024 }, "pan_Guru-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 299364, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 299364 + "unique_sentence2": 1024 }, "pan_Guru-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 320171, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320171 + "unique_sentence2": 1024 }, "pan_Guru-san_Deva": { + "num_samples": 1024, + "number_of_characters": 304024, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 304024 + "unique_sentence2": 1024 }, "pan_Guru-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 312677, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312677 + "unique_sentence2": 1024 }, "pan_Guru-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 306366, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306366 + "unique_sentence2": 1024 }, "pan_Guru-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 334291, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 334291 + "unique_sentence2": 1023 }, "pan_Guru-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 304990, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 304990 + "unique_sentence2": 1024 }, "pan_Guru-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 301079, + "unique_pairs": 1024, "min_sentence1_length": 19, "average_sentence1_length": 142.97265625, "max_sentence1_length": 476, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 301079 + "unique_sentence2": 1024 }, "san_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 318079, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318079 + "unique_sentence2": 1024 }, "san_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 307783, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307783 + "unique_sentence2": 1024 }, "san_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 320770, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320770 + "unique_sentence2": 1024 }, "san_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 316181, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316181 + "unique_sentence2": 1024 }, "san_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 317259, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 317259 + "unique_sentence2": 1024 }, "san_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 309755, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 309755 + "unique_sentence2": 1024 }, "san_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 306601, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306601 + "unique_sentence2": 1024 }, "san_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 317267, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 317267 + "unique_sentence2": 1024 }, "san_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 329225, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 329225 + "unique_sentence2": 1024 }, "san_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 319925, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319925 + "unique_sentence2": 1024 }, "san_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 305843, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 305843 + "unique_sentence2": 1024 }, "san_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 340797, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 340797 + "unique_sentence2": 1024 }, "san_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 318945, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318945 + "unique_sentence2": 1024 }, "san_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 310295, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310295 + "unique_sentence2": 1024 }, "san_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 310580, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310580 + "unique_sentence2": 1024 }, "san_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 331387, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 331387 + "unique_sentence2": 1024 }, "san_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 304024, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 304024 + "unique_sentence2": 1024 }, "san_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 323893, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323893 + "unique_sentence2": 1024 }, "san_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 317582, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 317582 + "unique_sentence2": 1024 }, "san_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 345507, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 345507 + "unique_sentence2": 1023 }, "san_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 316206, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316206 + "unique_sentence2": 1024 }, "san_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 312295, + "unique_pairs": 1024, "min_sentence1_length": 9, "average_sentence1_length": 153.92578125, "max_sentence1_length": 601, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312295 + "unique_sentence2": 1024 }, "sat_Olck-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 326732, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 326732 + "unique_sentence2": 1024 }, "sat_Olck-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 316436, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316436 + "unique_sentence2": 1024 }, "sat_Olck-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 329423, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 329423 + "unique_sentence2": 1024 }, "sat_Olck-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 324834, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 324834 + "unique_sentence2": 1024 }, "sat_Olck-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 325912, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 325912 + "unique_sentence2": 1024 }, "sat_Olck-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 318408, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318408 + "unique_sentence2": 1024 }, "sat_Olck-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 315254, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 315254 + "unique_sentence2": 1024 }, "sat_Olck-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 325920, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 325920 + "unique_sentence2": 1024 }, "sat_Olck-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 337878, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 337878 + "unique_sentence2": 1024 }, "sat_Olck-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 328578, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 328578 + "unique_sentence2": 1024 }, "sat_Olck-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 314496, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314496 + "unique_sentence2": 1024 }, "sat_Olck-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 349450, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 349450 + "unique_sentence2": 1024 }, "sat_Olck-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 327598, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 327598 + "unique_sentence2": 1024 }, "sat_Olck-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 318948, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318948 + "unique_sentence2": 1024 }, "sat_Olck-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 319233, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319233 + "unique_sentence2": 1024 }, "sat_Olck-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 340040, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 340040 + "unique_sentence2": 1024 }, "sat_Olck-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 312677, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312677 + "unique_sentence2": 1024 }, "sat_Olck-san_Deva": { + "num_samples": 1024, + "number_of_characters": 323893, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323893 + "unique_sentence2": 1024 }, "sat_Olck-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 326235, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 326235 + "unique_sentence2": 1024 }, "sat_Olck-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 354160, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 354160 + "unique_sentence2": 1023 }, "sat_Olck-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 324859, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 324859 + "unique_sentence2": 1024 }, "sat_Olck-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 320948, + "unique_pairs": 1024, "min_sentence1_length": 11, "average_sentence1_length": 162.3759765625, "max_sentence1_length": 536, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320948 + "unique_sentence2": 1024 }, "snd_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 320421, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320421 + "unique_sentence2": 1024 }, "snd_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 310125, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310125 + "unique_sentence2": 1024 }, "snd_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 323112, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 323112 + "unique_sentence2": 1024 }, "snd_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 318523, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318523 + "unique_sentence2": 1024 }, "snd_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 319601, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319601 + "unique_sentence2": 1024 }, "snd_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 312097, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312097 + "unique_sentence2": 1024 }, "snd_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 308943, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308943 + "unique_sentence2": 1024 }, "snd_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 319609, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319609 + "unique_sentence2": 1024 }, "snd_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 331567, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 331567 + "unique_sentence2": 1024 }, "snd_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 322267, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 322267 + "unique_sentence2": 1024 }, "snd_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 308185, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308185 + "unique_sentence2": 1024 }, "snd_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 343139, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 343139 + "unique_sentence2": 1024 }, "snd_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 321287, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321287 + "unique_sentence2": 1024 }, "snd_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 312637, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312637 + "unique_sentence2": 1024 }, "snd_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 312922, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312922 + "unique_sentence2": 1024 }, "snd_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 333729, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 333729 + "unique_sentence2": 1024 }, "snd_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306366, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306366 + "unique_sentence2": 1024 }, "snd_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 317582, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 317582 + "unique_sentence2": 1024 }, "snd_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 326235, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 326235 + "unique_sentence2": 1024 }, "snd_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 347849, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 347849 + "unique_sentence2": 1023 }, "snd_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 318548, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318548 + "unique_sentence2": 1024 }, "snd_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 314637, + "unique_pairs": 1024, "min_sentence1_length": 18, "average_sentence1_length": 156.212890625, "max_sentence1_length": 545, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314637 + "unique_sentence2": 1024 }, "tam_Taml-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 348346, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 348346 + "unique_sentence2": 1024 }, "tam_Taml-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 338050, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 338050 + "unique_sentence2": 1024 }, "tam_Taml-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 351037, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 351037 + "unique_sentence2": 1024 }, "tam_Taml-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 346448, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 346448 + "unique_sentence2": 1024 }, "tam_Taml-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 347526, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 347526 + "unique_sentence2": 1024 }, "tam_Taml-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 340022, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 340022 + "unique_sentence2": 1024 }, "tam_Taml-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 336868, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 336868 + "unique_sentence2": 1024 }, "tam_Taml-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 347534, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 347534 + "unique_sentence2": 1024 }, "tam_Taml-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 359492, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 359492 + "unique_sentence2": 1024 }, "tam_Taml-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 350192, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 350192 + "unique_sentence2": 1024 }, "tam_Taml-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 336110, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 336110 + "unique_sentence2": 1024 }, "tam_Taml-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 371064, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 371064 + "unique_sentence2": 1024 }, "tam_Taml-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 349212, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 349212 + "unique_sentence2": 1024 }, "tam_Taml-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 340562, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 340562 + "unique_sentence2": 1024 }, "tam_Taml-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 340847, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 340847 + "unique_sentence2": 1024 }, "tam_Taml-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 361654, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 361654 + "unique_sentence2": 1024 }, "tam_Taml-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 334291, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 334291 + "unique_sentence2": 1024 }, "tam_Taml-san_Deva": { + "num_samples": 1024, + "number_of_characters": 345507, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 345507 + "unique_sentence2": 1024 }, "tam_Taml-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 354160, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 354160 + "unique_sentence2": 1024 }, "tam_Taml-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 347849, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 347849 + "unique_sentence2": 1024 }, "tam_Taml-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 346473, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 346473 + "unique_sentence2": 1024 }, "tam_Taml-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 342562, + "unique_pairs": 1024, "min_sentence1_length": 32, "average_sentence1_length": 183.4833984375, "max_sentence1_length": 614, + "unique_sentence1": 1023, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 342562 + "unique_sentence2": 1024 }, "tel_Telu-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 319045, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319045 + "unique_sentence2": 1024 }, "tel_Telu-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 308749, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 308749 + "unique_sentence2": 1024 }, "tel_Telu-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 321736, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 321736 + "unique_sentence2": 1024 }, "tel_Telu-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 317147, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 317147 + "unique_sentence2": 1024 }, "tel_Telu-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 318225, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318225 + "unique_sentence2": 1024 }, "tel_Telu-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 310721, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 310721 + "unique_sentence2": 1024 }, "tel_Telu-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 307567, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307567 + "unique_sentence2": 1024 }, "tel_Telu-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 318233, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318233 + "unique_sentence2": 1024 }, "tel_Telu-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 330191, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 330191 + "unique_sentence2": 1024 }, "tel_Telu-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 320891, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320891 + "unique_sentence2": 1024 }, "tel_Telu-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 306809, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306809 + "unique_sentence2": 1024 }, "tel_Telu-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 341763, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 341763 + "unique_sentence2": 1024 }, "tel_Telu-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 319911, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 319911 + "unique_sentence2": 1024 }, "tel_Telu-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 311261, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311261 + "unique_sentence2": 1024 }, "tel_Telu-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 311546, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 311546 + "unique_sentence2": 1024 }, "tel_Telu-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 332353, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 332353 + "unique_sentence2": 1024 }, "tel_Telu-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 304990, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 304990 + "unique_sentence2": 1024 }, "tel_Telu-san_Deva": { + "num_samples": 1024, + "number_of_characters": 316206, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316206 + "unique_sentence2": 1024 }, "tel_Telu-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 324859, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 324859 + "unique_sentence2": 1024 }, "tel_Telu-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 318548, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 318548 + "unique_sentence2": 1024 }, "tel_Telu-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 346473, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 346473 + "unique_sentence2": 1023 }, "tel_Telu-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 313261, + "unique_pairs": 1024, "min_sentence1_length": 14, "average_sentence1_length": 154.869140625, "max_sentence1_length": 658, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 151.0498046875, "max_sentence2_length": 574, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313261 + "unique_sentence2": 1024 }, "urd_Arab-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 315134, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 156.6982421875, "max_sentence2_length": 582, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 315134 + "unique_sentence2": 1024 }, "urd_Arab-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 304838, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 146.6435546875, "max_sentence2_length": 538, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 304838 + "unique_sentence2": 1024 }, "urd_Arab-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 317825, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 159.326171875, "max_sentence2_length": 631, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 317825 + "unique_sentence2": 1024 }, "urd_Arab-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 313236, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.8447265625, "max_sentence2_length": 500, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313236 + "unique_sentence2": 1024 }, "urd_Arab-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 314314, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 155.8974609375, "max_sentence2_length": 532, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314314 + "unique_sentence2": 1024 }, "urd_Arab-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 306810, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 17, "average_sentence2_length": 148.5693359375, "max_sentence2_length": 537, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 306810 + "unique_sentence2": 1024 }, "urd_Arab-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 303656, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 145.4892578125, "max_sentence2_length": 488, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 303656 + "unique_sentence2": 1024 }, "urd_Arab-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 314322, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 21, "average_sentence2_length": 155.9052734375, "max_sentence2_length": 531, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314322 + "unique_sentence2": 1024 }, "urd_Arab-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 326280, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 167.5830078125, "max_sentence2_length": 668, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 326280 + "unique_sentence2": 1024 }, "urd_Arab-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 316980, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 158.5009765625, "max_sentence2_length": 520, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316980 + "unique_sentence2": 1024 }, "urd_Arab-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 302898, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 144.7490234375, "max_sentence2_length": 562, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 302898 + "unique_sentence2": 1024 }, "urd_Arab-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 337852, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 13, "average_sentence2_length": 178.8837890625, "max_sentence2_length": 692, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 337852 + "unique_sentence2": 1024 }, "urd_Arab-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 316000, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 157.5439453125, "max_sentence2_length": 555, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 316000 + "unique_sentence2": 1024 }, "urd_Arab-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 307350, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 16, "average_sentence2_length": 149.0966796875, "max_sentence2_length": 597, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307350 + "unique_sentence2": 1024 }, "urd_Arab-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 307635, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 149.375, "max_sentence2_length": 525, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 307635 + "unique_sentence2": 1024 }, "urd_Arab-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 328442, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 10, "average_sentence2_length": 169.6943359375, "max_sentence2_length": 578, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 328442 + "unique_sentence2": 1024 }, "urd_Arab-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 301079, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 19, "average_sentence2_length": 142.97265625, "max_sentence2_length": 476, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 301079 + "unique_sentence2": 1024 }, "urd_Arab-san_Deva": { + "num_samples": 1024, + "number_of_characters": 312295, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 9, "average_sentence2_length": 153.92578125, "max_sentence2_length": 601, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 312295 + "unique_sentence2": 1024 }, "urd_Arab-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 320948, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 11, "average_sentence2_length": 162.3759765625, "max_sentence2_length": 536, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 320948 + "unique_sentence2": 1024 }, "urd_Arab-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 314637, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 18, "average_sentence2_length": 156.212890625, "max_sentence2_length": 545, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 314637 + "unique_sentence2": 1024 }, "urd_Arab-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 342562, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 32, "average_sentence2_length": 183.4833984375, "max_sentence2_length": 614, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 342562 + "unique_sentence2": 1023 }, "urd_Arab-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 313261, + "unique_pairs": 1024, "min_sentence1_length": 13, "average_sentence1_length": 151.0498046875, "max_sentence1_length": 574, + "unique_sentence1": 1024, "min_sentence2_length": 14, "average_sentence2_length": 154.869140625, "max_sentence2_length": 658, - "num_samples": 1024, - "num_samples_sentence2": 1024, - "number_of_characters": 313261 + "unique_sentence2": 1024 } } } diff --git a/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json b/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json index 40451d3b5..504c3f190 100644 --- a/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json +++ b/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json @@ -1,278 +1,328 @@ { "validation": { + "num_samples": 21938, + "number_of_characters": 4256244, + "unique_pairs": 21840, "min_sentence1_length": 2, "average_sentence1_length": 97.0061992889051, "max_sentence1_length": 521, + "unique_sentence1": 11563, "min_sentence2_length": 2, "average_sentence2_length": 97.0061992889051, "max_sentence2_length": 521, - "num_samples": 21938, - "num_samples_sentence2": 21938, - "number_of_characters": 4256244, + "unique_sentence2": 11563, "hf_subset_descriptive_stats": { "ar-en": { + "num_samples": 888, + "number_of_characters": 172499, + "unique_pairs": 887, "min_sentence1_length": 4, "average_sentence1_length": 85.48873873873873, "max_sentence1_length": 369, + "unique_sentence1": 887, "min_sentence2_length": 10, "average_sentence2_length": 108.76689189189189, "max_sentence2_length": 462, - "num_samples": 888, - "num_samples_sentence2": 888, - "number_of_characters": 172499 + "unique_sentence2": 881 }, "de-en": { + "num_samples": 888, + "number_of_characters": 202336, + "unique_pairs": 883, "min_sentence1_length": 6, "average_sentence1_length": 119.02702702702703, "max_sentence1_length": 521, + "unique_sentence1": 881, "min_sentence2_length": 10, "average_sentence2_length": 108.82882882882883, "max_sentence2_length": 462, - "num_samples": 888, - "num_samples_sentence2": 888, - "number_of_characters": 202336 + "unique_sentence2": 881 }, "en-ar": { + "num_samples": 888, + "number_of_characters": 172499, + "unique_pairs": 887, "min_sentence1_length": 10, "average_sentence1_length": 108.76689189189189, "max_sentence1_length": 462, + "unique_sentence1": 881, "min_sentence2_length": 4, "average_sentence2_length": 85.48873873873873, "max_sentence2_length": 369, - "num_samples": 888, - "num_samples_sentence2": 888, - "number_of_characters": 172499 + "unique_sentence2": 887 }, "en-de": { + "num_samples": 888, + "number_of_characters": 202336, + "unique_pairs": 883, "min_sentence1_length": 10, "average_sentence1_length": 108.82882882882883, "max_sentence1_length": 462, + "unique_sentence1": 881, "min_sentence2_length": 6, "average_sentence2_length": 119.02702702702703, "max_sentence2_length": 521, - "num_samples": 888, - "num_samples_sentence2": 888, - "number_of_characters": 202336 + "unique_sentence2": 881 }, "en-fr": { + "num_samples": 890, + "number_of_characters": 197619, + "unique_pairs": 883, "min_sentence1_length": 10, "average_sentence1_length": 108.4123595505618, "max_sentence1_length": 462, + "unique_sentence1": 883, "min_sentence2_length": 6, "average_sentence2_length": 113.63146067415731, "max_sentence2_length": 493, - "num_samples": 890, - "num_samples_sentence2": 890, - "number_of_characters": 197619 + "unique_sentence2": 881 }, "en-it": { + "num_samples": 929, + "number_of_characters": 191803, + "unique_pairs": 924, "min_sentence1_length": 10, "average_sentence1_length": 103.0010764262648, "max_sentence1_length": 433, + "unique_sentence1": 922, "min_sentence2_length": 7, "average_sentence2_length": 103.46071044133477, "max_sentence2_length": 444, - "num_samples": 929, - "num_samples_sentence2": 929, - "number_of_characters": 191803 + "unique_sentence2": 918 }, "en-ja": { + "num_samples": 871, + "number_of_characters": 132742, + "unique_pairs": 867, "min_sentence1_length": 10, "average_sentence1_length": 109.80826636050517, "max_sentence1_length": 462, + "unique_sentence1": 864, "min_sentence2_length": 5, "average_sentence2_length": 42.59357060849598, "max_sentence2_length": 225, - "num_samples": 871, - "num_samples_sentence2": 871, - "number_of_characters": 132742 + "unique_sentence2": 866 }, "en-ko": { + "num_samples": 879, + "number_of_characters": 142659, + "unique_pairs": 874, "min_sentence1_length": 10, "average_sentence1_length": 107.74175199089875, "max_sentence1_length": 462, + "unique_sentence1": 872, "min_sentence2_length": 3, "average_sentence2_length": 54.5551763367463, "max_sentence2_length": 250, - "num_samples": 879, - "num_samples_sentence2": 879, - "number_of_characters": 142659 + "unique_sentence2": 872 }, "en-nl": { + "num_samples": 1003, + "number_of_characters": 189637, + "unique_pairs": 1000, "min_sentence1_length": 10, "average_sentence1_length": 95.26819541375872, "max_sentence1_length": 433, + "unique_sentence1": 996, "min_sentence2_length": 4, "average_sentence2_length": 93.80159521435692, "max_sentence2_length": 477, - "num_samples": 1003, - "num_samples_sentence2": 1003, - "number_of_characters": 189637 + "unique_sentence2": 1000 }, "en-ro": { + "num_samples": 914, + "number_of_characters": 194128, + "unique_pairs": 910, "min_sentence1_length": 10, "average_sentence1_length": 104.72100656455142, "max_sentence1_length": 433, + "unique_sentence1": 907, "min_sentence2_length": 9, "average_sentence2_length": 107.67286652078775, "max_sentence2_length": 448, - "num_samples": 914, - "num_samples_sentence2": 914, - "number_of_characters": 194128 + "unique_sentence2": 910 }, "en-zh": { + "num_samples": 879, + "number_of_characters": 131126, + "unique_pairs": 877, "min_sentence1_length": 10, "average_sentence1_length": 109.36518771331058, "max_sentence1_length": 462, + "unique_sentence1": 872, "min_sentence2_length": 2, "average_sentence2_length": 39.811149032992034, "max_sentence2_length": 230, - "num_samples": 879, - "num_samples_sentence2": 879, - "number_of_characters": 131126 + "unique_sentence2": 867 }, "fr-en": { + "num_samples": 890, + "number_of_characters": 197619, + "unique_pairs": 883, "min_sentence1_length": 6, "average_sentence1_length": 113.63146067415731, "max_sentence1_length": 493, + "unique_sentence1": 881, "min_sentence2_length": 10, "average_sentence2_length": 108.4123595505618, "max_sentence2_length": 462, - "num_samples": 890, - "num_samples_sentence2": 890, - "number_of_characters": 197619 + "unique_sentence2": 883 }, "it-en": { + "num_samples": 929, + "number_of_characters": 191803, + "unique_pairs": 924, "min_sentence1_length": 7, "average_sentence1_length": 103.46071044133477, "max_sentence1_length": 444, + "unique_sentence1": 918, "min_sentence2_length": 10, "average_sentence2_length": 103.0010764262648, "max_sentence2_length": 433, - "num_samples": 929, - "num_samples_sentence2": 929, - "number_of_characters": 191803 + "unique_sentence2": 922 }, "it-nl": { + "num_samples": 1001, + "number_of_characters": 188858, + "unique_pairs": 998, "min_sentence1_length": 7, "average_sentence1_length": 94.64235764235764, "max_sentence1_length": 459, + "unique_sentence1": 994, "min_sentence2_length": 7, "average_sentence2_length": 94.02697302697302, "max_sentence2_length": 505, - "num_samples": 1001, - "num_samples_sentence2": 1001, - "number_of_characters": 188858 + "unique_sentence2": 998 }, "it-ro": { + "num_samples": 914, + "number_of_characters": 193339, + "unique_pairs": 911, "min_sentence1_length": 7, "average_sentence1_length": 103.90809628008753, "max_sentence1_length": 435, + "unique_sentence1": 907, "min_sentence2_length": 9, "average_sentence2_length": 107.62253829321664, "max_sentence2_length": 448, - "num_samples": 914, - "num_samples_sentence2": 914, - "number_of_characters": 193339 + "unique_sentence2": 910 }, "ja-en": { + "num_samples": 871, + "number_of_characters": 132742, + "unique_pairs": 867, "min_sentence1_length": 5, "average_sentence1_length": 42.59357060849598, "max_sentence1_length": 225, + "unique_sentence1": 866, "min_sentence2_length": 10, "average_sentence2_length": 109.80826636050517, "max_sentence2_length": 462, - "num_samples": 871, - "num_samples_sentence2": 871, - "number_of_characters": 132742 + "unique_sentence2": 864 }, "ko-en": { + "num_samples": 879, + "number_of_characters": 142659, + "unique_pairs": 874, "min_sentence1_length": 3, "average_sentence1_length": 54.5551763367463, "max_sentence1_length": 250, + "unique_sentence1": 872, "min_sentence2_length": 10, "average_sentence2_length": 107.74175199089875, "max_sentence2_length": 462, - "num_samples": 879, - "num_samples_sentence2": 879, - "number_of_characters": 142659 + "unique_sentence2": 872 }, "nl-en": { + "num_samples": 1003, + "number_of_characters": 189637, + "unique_pairs": 1000, "min_sentence1_length": 4, "average_sentence1_length": 93.80159521435692, "max_sentence1_length": 477, + "unique_sentence1": 1000, "min_sentence2_length": 10, "average_sentence2_length": 95.26819541375872, "max_sentence2_length": 433, - "num_samples": 1003, - "num_samples_sentence2": 1003, - "number_of_characters": 189637 + "unique_sentence2": 996 }, "nl-it": { + "num_samples": 1001, + "number_of_characters": 188858, + "unique_pairs": 998, "min_sentence1_length": 7, "average_sentence1_length": 94.02697302697302, "max_sentence1_length": 505, + "unique_sentence1": 998, "min_sentence2_length": 7, "average_sentence2_length": 94.64235764235764, "max_sentence2_length": 459, - "num_samples": 1001, - "num_samples_sentence2": 1001, - "number_of_characters": 188858 + "unique_sentence2": 994 }, "nl-ro": { + "num_samples": 913, + "number_of_characters": 191376, + "unique_pairs": 911, "min_sentence1_length": 7, "average_sentence1_length": 102.01971522453451, "max_sentence1_length": 478, + "unique_sentence1": 909, "min_sentence2_length": 9, "average_sentence2_length": 107.59255202628697, "max_sentence2_length": 515, - "num_samples": 913, - "num_samples_sentence2": 913, - "number_of_characters": 191376 + "unique_sentence2": 909 }, "ro-en": { + "num_samples": 914, + "number_of_characters": 194128, + "unique_pairs": 910, "min_sentence1_length": 9, "average_sentence1_length": 107.67286652078775, "max_sentence1_length": 448, + "unique_sentence1": 910, "min_sentence2_length": 10, "average_sentence2_length": 104.72100656455142, "max_sentence2_length": 433, - "num_samples": 914, - "num_samples_sentence2": 914, - "number_of_characters": 194128 + "unique_sentence2": 907 }, "ro-it": { + "num_samples": 914, + "number_of_characters": 193339, + "unique_pairs": 911, "min_sentence1_length": 9, "average_sentence1_length": 107.62253829321664, "max_sentence1_length": 448, + "unique_sentence1": 910, "min_sentence2_length": 7, "average_sentence2_length": 103.90809628008753, "max_sentence2_length": 435, - "num_samples": 914, - "num_samples_sentence2": 914, - "number_of_characters": 193339 + "unique_sentence2": 907 }, "ro-nl": { + "num_samples": 913, + "number_of_characters": 191376, + "unique_pairs": 911, "min_sentence1_length": 9, "average_sentence1_length": 107.59255202628697, "max_sentence1_length": 515, + "unique_sentence1": 909, "min_sentence2_length": 7, "average_sentence2_length": 102.01971522453451, "max_sentence2_length": 478, - "num_samples": 913, - "num_samples_sentence2": 913, - "number_of_characters": 191376 + "unique_sentence2": 909 }, "zh-en": { + "num_samples": 879, + "number_of_characters": 131126, + "unique_pairs": 877, "min_sentence1_length": 2, "average_sentence1_length": 39.811149032992034, "max_sentence1_length": 230, + "unique_sentence1": 867, "min_sentence2_length": 10, "average_sentence2_length": 109.36518771331058, "max_sentence2_length": 462, - "num_samples": 879, - "num_samples_sentence2": 879, - "number_of_characters": 131126 + "unique_sentence2": 872 } } } diff --git a/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json b/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json index ac4678376..1aaed3945 100644 --- a/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json @@ -1,1303 +1,1539 @@ { "validation": { + "num_samples": 57826, + "number_of_characters": 14600950, + "unique_pairs": 57826, "min_sentence1_length": 24, "average_sentence1_length": 126.2541071490333, "max_sentence1_length": 368, + "unique_sentence1": 29903, "min_sentence2_length": 24, "average_sentence2_length": 126.24390412617161, "max_sentence2_length": 368, - "num_samples": 57826, - "num_samples_sentence2": 57826, - "number_of_characters": 14600950, + "unique_sentence2": 29903, "hf_subset_descriptive_stats": { "ben-eng": { + "num_samples": 997, + "number_of_characters": 248469, + "unique_pairs": 997, "min_sentence1_length": 30, "average_sentence1_length": 123.64593781344033, "max_sentence1_length": 320, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 248469 + "unique_sentence2": 997 }, "eng-ben": { + "num_samples": 997, + "number_of_characters": 248469, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 30, "average_sentence2_length": 123.64593781344033, "max_sentence2_length": 320, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 248469 + "unique_sentence2": 997 }, "guj-eng": { + "num_samples": 997, + "number_of_characters": 245477, + "unique_pairs": 997, "min_sentence1_length": 30, "average_sentence1_length": 120.64493480441324, "max_sentence1_length": 368, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 245477 + "unique_sentence2": 997 }, "eng-guj": { + "num_samples": 997, + "number_of_characters": 245477, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 30, "average_sentence2_length": 120.64493480441324, "max_sentence2_length": 368, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 245477 + "unique_sentence2": 997 }, "hin-eng": { + "num_samples": 997, + "number_of_characters": 250573, + "unique_pairs": 997, "min_sentence1_length": 31, "average_sentence1_length": 125.75626880641926, "max_sentence1_length": 355, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 250573 + "unique_sentence2": 997 }, "eng-hin": { + "num_samples": 997, + "number_of_characters": 250564, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 31, "average_sentence2_length": 125.74724172517553, "max_sentence2_length": 355, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 250564 + "unique_sentence2": 997 }, "kan-eng": { + "num_samples": 997, + "number_of_characters": 257131, + "unique_pairs": 997, "min_sentence1_length": 34, "average_sentence1_length": 132.33400200601807, "max_sentence1_length": 331, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 257131 + "unique_sentence2": 997 }, "eng-kan": { + "num_samples": 997, + "number_of_characters": 256986, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 34, "average_sentence2_length": 132.18856569709126, "max_sentence2_length": 331, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 256986 + "unique_sentence2": 997 }, "mal-eng": { + "num_samples": 997, + "number_of_characters": 267295, + "unique_pairs": 997, "min_sentence1_length": 31, "average_sentence1_length": 142.52858575727183, "max_sentence1_length": 360, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 267295 + "unique_sentence2": 997 }, "eng-mal": { + "num_samples": 997, + "number_of_characters": 267296, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 31, "average_sentence2_length": 142.5295887662989, "max_sentence2_length": 360, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 267296 + "unique_sentence2": 997 }, "mar-eng": { + "num_samples": 997, + "number_of_characters": 251107, + "unique_pairs": 997, "min_sentence1_length": 29, "average_sentence1_length": 126.29187562688064, "max_sentence1_length": 321, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 251107 + "unique_sentence2": 997 }, "eng-mar": { + "num_samples": 997, + "number_of_characters": 250897, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 29, "average_sentence2_length": 126.08124373119358, "max_sentence2_length": 321, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 250897 + "unique_sentence2": 997 }, "tam-eng": { + "num_samples": 997, + "number_of_characters": 271322, + "unique_pairs": 997, "min_sentence1_length": 30, "average_sentence1_length": 146.567703109328, "max_sentence1_length": 358, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 271322 + "unique_sentence2": 997 }, "eng-tam": { + "num_samples": 997, + "number_of_characters": 271322, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 30, "average_sentence2_length": 146.567703109328, "max_sentence2_length": 358, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 271322 + "unique_sentence2": 997 }, "tel-eng": { + "num_samples": 997, + "number_of_characters": 252385, + "unique_pairs": 997, "min_sentence1_length": 29, "average_sentence1_length": 127.57372116349048, "max_sentence1_length": 317, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 252385 + "unique_sentence2": 997 }, "eng-tel": { + "num_samples": 997, + "number_of_characters": 252380, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 29, "average_sentence2_length": 127.56870611835507, "max_sentence2_length": 317, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 252380 + "unique_sentence2": 997 }, "urd-eng": { + "num_samples": 997, + "number_of_characters": 249824, + "unique_pairs": 997, "min_sentence1_length": 37, "average_sentence1_length": 125.00501504513541, "max_sentence1_length": 295, + "unique_sentence1": 996, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 249824 + "unique_sentence2": 997 }, "eng-urd": { + "num_samples": 997, + "number_of_characters": 249824, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 37, "average_sentence2_length": 125.00501504513541, "max_sentence2_length": 295, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 249824 + "unique_sentence2": 996 }, "asm-eng": { + "num_samples": 997, + "number_of_characters": 246220, + "unique_pairs": 997, "min_sentence1_length": 30, "average_sentence1_length": 121.3901705115346, "max_sentence1_length": 314, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 246220 + "unique_sentence2": 997 }, "eng-asm": { + "num_samples": 997, + "number_of_characters": 246224, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 30, "average_sentence2_length": 121.39418254764293, "max_sentence2_length": 314, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 246224 + "unique_sentence2": 997 }, "bho-eng": { + "num_samples": 997, + "number_of_characters": 246895, + "unique_pairs": 997, "min_sentence1_length": 25, "average_sentence1_length": 122.06720160481444, "max_sentence1_length": 326, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 246895 + "unique_sentence2": 997 }, "eng-bho": { + "num_samples": 997, + "number_of_characters": 246919, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 25, "average_sentence2_length": 122.0912738214644, "max_sentence2_length": 326, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 246919 + "unique_sentence2": 997 }, "nep-eng": { + "num_samples": 997, + "number_of_characters": 245984, + "unique_pairs": 997, "min_sentence1_length": 24, "average_sentence1_length": 121.15346038114343, "max_sentence1_length": 307, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 245984 + "unique_sentence2": 997 }, "eng-nep": { + "num_samples": 997, + "number_of_characters": 245984, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 24, "average_sentence2_length": 121.15346038114343, "max_sentence2_length": 307, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 245984 + "unique_sentence2": 997 }, "ory-eng": { + "num_samples": 997, + "number_of_characters": 254206, + "unique_pairs": 997, "min_sentence1_length": 34, "average_sentence1_length": 129.4002006018054, "max_sentence1_length": 308, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 254206 + "unique_sentence2": 997 }, "eng-ory": { + "num_samples": 997, + "number_of_characters": 254206, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 34, "average_sentence2_length": 129.4002006018054, "max_sentence2_length": 308, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 254206 + "unique_sentence2": 997 }, "pan-eng": { + "num_samples": 997, + "number_of_characters": 251598, + "unique_pairs": 997, "min_sentence1_length": 29, "average_sentence1_length": 126.78435305917753, "max_sentence1_length": 309, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 251598 + "unique_sentence2": 997 }, "eng-pan": { + "num_samples": 997, + "number_of_characters": 251597, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 29, "average_sentence2_length": 126.78335005015045, "max_sentence2_length": 309, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 251597 + "unique_sentence2": 997 }, "pus-eng": { + "num_samples": 997, + "number_of_characters": 247450, + "unique_pairs": 997, "min_sentence1_length": 32, "average_sentence1_length": 122.62387161484453, "max_sentence1_length": 300, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 247450 + "unique_sentence2": 997 }, "eng-pus": { + "num_samples": 997, + "number_of_characters": 247450, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 32, "average_sentence2_length": 122.62387161484453, "max_sentence2_length": 300, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 247450 + "unique_sentence2": 997 }, "san-eng": { + "num_samples": 997, + "number_of_characters": 249042, + "unique_pairs": 997, "min_sentence1_length": 31, "average_sentence1_length": 124.22066198595788, "max_sentence1_length": 311, + "unique_sentence1": 994, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 249042 + "unique_sentence2": 997 }, "eng-san": { + "num_samples": 997, + "number_of_characters": 248877, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 31, "average_sentence2_length": 124.05516549648947, "max_sentence2_length": 311, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 248877 + "unique_sentence2": 994 }, "awa-eng": { + "num_samples": 997, + "number_of_characters": 247944, + "unique_pairs": 997, "min_sentence1_length": 34, "average_sentence1_length": 123.11935807422267, "max_sentence1_length": 329, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 247944 + "unique_sentence2": 997 }, "eng-awa": { + "num_samples": 997, + "number_of_characters": 247884, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 34, "average_sentence2_length": 123.05917753259779, "max_sentence2_length": 329, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 247884 + "unique_sentence2": 997 }, "bgc-eng": { + "num_samples": 997, + "number_of_characters": 245935, + "unique_pairs": 997, "min_sentence1_length": 27, "average_sentence1_length": 121.10431293881645, "max_sentence1_length": 303, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 245935 + "unique_sentence2": 997 }, "eng-bgc": { + "num_samples": 997, + "number_of_characters": 245935, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 27, "average_sentence2_length": 121.10431293881645, "max_sentence2_length": 303, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 245935 + "unique_sentence2": 997 }, "bod-eng": { + "num_samples": 997, + "number_of_characters": 266515, + "unique_pairs": 997, "min_sentence1_length": 26, "average_sentence1_length": 141.74623871614844, "max_sentence1_length": 355, + "unique_sentence1": 996, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 266515 + "unique_sentence2": 997 }, "eng-bod": { + "num_samples": 997, + "number_of_characters": 266495, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 26, "average_sentence2_length": 141.72617853560683, "max_sentence2_length": 355, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 266495 + "unique_sentence2": 996 }, "boy-eng": { + "num_samples": 997, + "number_of_characters": 260174, + "unique_pairs": 997, "min_sentence1_length": 31, "average_sentence1_length": 135.38615847542627, "max_sentence1_length": 312, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 260174 + "unique_sentence2": 997 }, "eng-boy": { + "num_samples": 997, + "number_of_characters": 260174, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 31, "average_sentence2_length": 135.38615847542627, "max_sentence2_length": 312, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 260174 + "unique_sentence2": 997 }, "gbm-eng": { + "num_samples": 997, + "number_of_characters": 247009, + "unique_pairs": 997, "min_sentence1_length": 30, "average_sentence1_length": 122.18154463390171, "max_sentence1_length": 344, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 247009 + "unique_sentence2": 997 }, "eng-gbm": { + "num_samples": 997, + "number_of_characters": 247009, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 30, "average_sentence2_length": 122.18154463390171, "max_sentence2_length": 344, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 247009 + "unique_sentence2": 997 }, "gom-eng": { + "num_samples": 997, + "number_of_characters": 244553, + "unique_pairs": 997, "min_sentence1_length": 31, "average_sentence1_length": 119.71815446339016, "max_sentence1_length": 306, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 244553 + "unique_sentence2": 997 }, "eng-gom": { + "num_samples": 997, + "number_of_characters": 244553, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 31, "average_sentence2_length": 119.71815446339016, "max_sentence2_length": 306, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 244553 + "unique_sentence2": 997 }, "hne-eng": { + "num_samples": 997, + "number_of_characters": 246416, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 121.58676028084253, "max_sentence1_length": 321, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 246416 + "unique_sentence2": 997 }, "eng-hne": { + "num_samples": 997, + "number_of_characters": 246405, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 121.57572718154464, "max_sentence2_length": 321, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 246405 + "unique_sentence2": 997 }, "raj-eng": { + "num_samples": 997, + "number_of_characters": 249541, + "unique_pairs": 997, "min_sentence1_length": 32, "average_sentence1_length": 124.72116349047141, "max_sentence1_length": 313, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 249541 + "unique_sentence2": 997 }, "eng-raj": { + "num_samples": 997, + "number_of_characters": 249541, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 32, "average_sentence2_length": 124.72116349047141, "max_sentence2_length": 313, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 249541 + "unique_sentence2": 997 }, "mai-eng": { + "num_samples": 997, + "number_of_characters": 247991, + "unique_pairs": 997, "min_sentence1_length": 29, "average_sentence1_length": 123.16649949849548, "max_sentence1_length": 312, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 247991 + "unique_sentence2": 997 }, "eng-mai": { + "num_samples": 997, + "number_of_characters": 247994, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 29, "average_sentence2_length": 123.16950852557673, "max_sentence2_length": 312, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 247994 + "unique_sentence2": 997 }, "mni-eng": { + "num_samples": 997, + "number_of_characters": 254308, + "unique_pairs": 997, "min_sentence1_length": 39, "average_sentence1_length": 129.5025075225677, "max_sentence1_length": 310, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 254308 + "unique_sentence2": 997 }, "eng-mni": { + "num_samples": 997, + "number_of_characters": 254312, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 39, "average_sentence2_length": 129.50651955867602, "max_sentence2_length": 310, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 254312 + "unique_sentence2": 997 }, "mup-eng": { + "num_samples": 997, + "number_of_characters": 248486, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 123.6629889669007, "max_sentence1_length": 312, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 248486 + "unique_sentence2": 997 }, "eng-mup": { + "num_samples": 997, + "number_of_characters": 248486, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 123.6629889669007, "max_sentence2_length": 312, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 248486 + "unique_sentence2": 997 }, "mwr-eng": { + "num_samples": 997, + "number_of_characters": 248641, + "unique_pairs": 997, "min_sentence1_length": 31, "average_sentence1_length": 123.81845536609829, "max_sentence1_length": 324, + "unique_sentence1": 997, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 248641 + "unique_sentence2": 997 }, "eng-mwr": { + "num_samples": 997, + "number_of_characters": 248641, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 31, "average_sentence2_length": 123.81845536609829, "max_sentence2_length": 324, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 248641 + "unique_sentence2": 997 }, "sat-eng": { + "num_samples": 997, + "number_of_characters": 258279, + "unique_pairs": 997, "min_sentence1_length": 37, "average_sentence1_length": 133.4854563691073, "max_sentence1_length": 333, + "unique_sentence1": 995, "min_sentence2_length": 28, "average_sentence2_length": 125.57071213640923, "max_sentence2_length": 297, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 258279 + "unique_sentence2": 997 }, "eng-sat": { + "num_samples": 997, + "number_of_characters": 258279, + "unique_pairs": 997, "min_sentence1_length": 28, "average_sentence1_length": 125.57071213640923, "max_sentence1_length": 297, + "unique_sentence1": 997, "min_sentence2_length": 37, "average_sentence2_length": 133.4854563691073, "max_sentence2_length": 333, - "num_samples": 997, - "num_samples_sentence2": 997, - "number_of_characters": 258279 + "unique_sentence2": 995 } } }, "test": { + "num_samples": 58696, + "number_of_characters": 15359416, + "unique_pairs": 58690, "min_sentence1_length": 33, "average_sentence1_length": 130.84266389532507, "max_sentence1_length": 431, + "unique_sentence1": 30351, "min_sentence2_length": 33, "average_sentence2_length": 130.834724683113, "max_sentence2_length": 431, - "num_samples": 58696, - "num_samples_sentence2": 58696, - "number_of_characters": 15359416, + "unique_sentence2": 30351, "hf_subset_descriptive_stats": { "ben-eng": { + "num_samples": 1012, + "number_of_characters": 261008, + "unique_pairs": 1012, "min_sentence1_length": 38, "average_sentence1_length": 127.51185770750988, "max_sentence1_length": 333, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 261008 + "unique_sentence2": 1012 }, "eng-ben": { + "num_samples": 1012, + "number_of_characters": 261008, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 38, "average_sentence2_length": 127.51185770750988, "max_sentence2_length": 333, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 261008 + "unique_sentence2": 1012 }, "guj-eng": { + "num_samples": 1012, + "number_of_characters": 258394, + "unique_pairs": 1012, "min_sentence1_length": 38, "average_sentence1_length": 124.92885375494072, "max_sentence1_length": 349, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 258394 + "unique_sentence2": 1012 }, "eng-guj": { + "num_samples": 1012, + "number_of_characters": 258394, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 38, "average_sentence2_length": 124.92885375494072, "max_sentence2_length": 349, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 258394 + "unique_sentence2": 1012 }, "hin-eng": { + "num_samples": 1012, + "number_of_characters": 263040, + "unique_pairs": 1012, "min_sentence1_length": 41, "average_sentence1_length": 129.5197628458498, "max_sentence1_length": 381, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 263040 + "unique_sentence2": 1012 }, "eng-hin": { + "num_samples": 1012, + "number_of_characters": 263029, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 41, "average_sentence2_length": 129.5088932806324, "max_sentence2_length": 381, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 263029 + "unique_sentence2": 1012 }, "kan-eng": { + "num_samples": 1012, + "number_of_characters": 270091, + "unique_pairs": 1012, "min_sentence1_length": 43, "average_sentence1_length": 136.48715415019763, "max_sentence1_length": 388, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 270091 + "unique_sentence2": 1012 }, "eng-kan": { + "num_samples": 1012, + "number_of_characters": 270021, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 43, "average_sentence2_length": 136.4179841897233, "max_sentence2_length": 388, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 270021 + "unique_sentence2": 1012 }, "mal-eng": { + "num_samples": 1012, + "number_of_characters": 281302, + "unique_pairs": 1012, "min_sentence1_length": 48, "average_sentence1_length": 147.56521739130434, "max_sentence1_length": 376, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 281302 + "unique_sentence2": 1012 }, "eng-mal": { + "num_samples": 1012, + "number_of_characters": 281302, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 48, "average_sentence2_length": 147.56521739130434, "max_sentence2_length": 376, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 281302 + "unique_sentence2": 1012 }, "mar-eng": { + "num_samples": 1012, + "number_of_characters": 265212, + "unique_pairs": 1012, "min_sentence1_length": 34, "average_sentence1_length": 131.66600790513834, "max_sentence1_length": 356, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 265212 + "unique_sentence2": 1012 }, "eng-mar": { + "num_samples": 1012, + "number_of_characters": 265023, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 34, "average_sentence2_length": 131.47924901185772, "max_sentence2_length": 355, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 265023 + "unique_sentence2": 1012 }, "tam-eng": { + "num_samples": 1012, + "number_of_characters": 286099, + "unique_pairs": 1012, "min_sentence1_length": 48, "average_sentence1_length": 152.30533596837944, "max_sentence1_length": 404, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 286099 + "unique_sentence2": 1012 }, "eng-tam": { + "num_samples": 1012, + "number_of_characters": 286099, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 48, "average_sentence2_length": 152.30533596837944, "max_sentence2_length": 404, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 286099 + "unique_sentence2": 1012 }, "tel-eng": { + "num_samples": 1012, + "number_of_characters": 264460, + "unique_pairs": 1012, "min_sentence1_length": 39, "average_sentence1_length": 130.92292490118578, "max_sentence1_length": 359, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 264460 + "unique_sentence2": 1012 }, "eng-tel": { + "num_samples": 1012, + "number_of_characters": 264447, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 39, "average_sentence2_length": 130.9100790513834, "max_sentence2_length": 359, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 264447 + "unique_sentence2": 1012 }, "urd-eng": { + "num_samples": 1012, + "number_of_characters": 261886, + "unique_pairs": 1012, "min_sentence1_length": 34, "average_sentence1_length": 128.37944664031622, "max_sentence1_length": 348, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 261886 + "unique_sentence2": 1012 }, "eng-urd": { + "num_samples": 1012, + "number_of_characters": 261885, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 34, "average_sentence2_length": 128.37845849802372, "max_sentence2_length": 348, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 261885 + "unique_sentence2": 1012 }, "asm-eng": { + "num_samples": 1012, + "number_of_characters": 257902, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 124.44268774703558, "max_sentence1_length": 329, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 257902 + "unique_sentence2": 1012 }, "eng-asm": { + "num_samples": 1012, + "number_of_characters": 257909, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 124.449604743083, "max_sentence2_length": 329, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 257909 + "unique_sentence2": 1012 }, "bho-eng": { + "num_samples": 1012, + "number_of_characters": 260578, + "unique_pairs": 1012, "min_sentence1_length": 36, "average_sentence1_length": 127.08695652173913, "max_sentence1_length": 367, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 260578 + "unique_sentence2": 1012 }, "eng-bho": { + "num_samples": 1012, + "number_of_characters": 260601, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 36, "average_sentence2_length": 127.1096837944664, "max_sentence2_length": 367, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 260601 + "unique_sentence2": 1012 }, "nep-eng": { + "num_samples": 1012, + "number_of_characters": 258869, + "unique_pairs": 1012, "min_sentence1_length": 34, "average_sentence1_length": 125.39822134387352, "max_sentence1_length": 362, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 258869 + "unique_sentence2": 1012 }, "eng-nep": { + "num_samples": 1012, + "number_of_characters": 258869, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 34, "average_sentence2_length": 125.39822134387352, "max_sentence2_length": 362, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 258869 + "unique_sentence2": 1012 }, "ory-eng": { + "num_samples": 1012, + "number_of_characters": 266805, + "unique_pairs": 1012, "min_sentence1_length": 38, "average_sentence1_length": 133.2401185770751, "max_sentence1_length": 354, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 266805 + "unique_sentence2": 1012 }, "eng-ory": { + "num_samples": 1012, + "number_of_characters": 266805, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 38, "average_sentence2_length": 133.2401185770751, "max_sentence2_length": 354, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 266805 + "unique_sentence2": 1012 }, "pan-eng": { + "num_samples": 1012, + "number_of_characters": 265391, + "unique_pairs": 1012, "min_sentence1_length": 37, "average_sentence1_length": 131.84288537549406, "max_sentence1_length": 380, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 265391 + "unique_sentence2": 1012 }, "eng-pan": { + "num_samples": 1012, + "number_of_characters": 265391, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 37, "average_sentence2_length": 131.84288537549406, "max_sentence2_length": 380, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 265391 + "unique_sentence2": 1012 }, "pus-eng": { + "num_samples": 1012, + "number_of_characters": 254422, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 121.00395256916995, "max_sentence1_length": 325, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 254422 + "unique_sentence2": 1012 }, "eng-pus": { + "num_samples": 1012, + "number_of_characters": 254421, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 121.00296442687747, "max_sentence2_length": 325, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 254421 + "unique_sentence2": 1012 }, "san-eng": { + "num_samples": 1012, + "number_of_characters": 260339, + "unique_pairs": 1012, "min_sentence1_length": 33, "average_sentence1_length": 126.85079051383399, "max_sentence1_length": 358, + "unique_sentence1": 1011, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 260339 + "unique_sentence2": 1012 }, "eng-san": { + "num_samples": 1012, + "number_of_characters": 260224, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 33, "average_sentence2_length": 126.73715415019763, "max_sentence2_length": 358, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 260224 + "unique_sentence2": 1011 }, "awa-eng": { + "num_samples": 1012, + "number_of_characters": 260179, + "unique_pairs": 1012, "min_sentence1_length": 34, "average_sentence1_length": 126.69268774703558, "max_sentence1_length": 378, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 260179 + "unique_sentence2": 1012 }, "eng-awa": { + "num_samples": 1012, + "number_of_characters": 260137, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 34, "average_sentence2_length": 126.65118577075098, "max_sentence2_length": 378, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 260137 + "unique_sentence2": 1012 }, "bgc-eng": { + "num_samples": 1012, + "number_of_characters": 257450, + "unique_pairs": 1012, "min_sentence1_length": 38, "average_sentence1_length": 123.99604743083005, "max_sentence1_length": 332, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 257450 + "unique_sentence2": 1012 }, "eng-bgc": { + "num_samples": 1012, + "number_of_characters": 257450, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 38, "average_sentence2_length": 123.99604743083005, "max_sentence2_length": 332, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 257450 + "unique_sentence2": 1012 }, "bod-eng": { + "num_samples": 1012, + "number_of_characters": 280188, + "unique_pairs": 1012, "min_sentence1_length": 42, "average_sentence1_length": 146.46442687747034, "max_sentence1_length": 431, + "unique_sentence1": 1009, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 280188 + "unique_sentence2": 1012 }, "eng-bod": { + "num_samples": 1012, + "number_of_characters": 280126, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 42, "average_sentence2_length": 146.40316205533597, "max_sentence2_length": 431, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 280126 + "unique_sentence2": 1009 }, "boy-eng": { + "num_samples": 1012, + "number_of_characters": 277538, + "unique_pairs": 1012, "min_sentence1_length": 36, "average_sentence1_length": 143.84584980237153, "max_sentence1_length": 396, + "unique_sentence1": 1011, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 277538 + "unique_sentence2": 1012 }, "eng-boy": { + "num_samples": 1012, + "number_of_characters": 277538, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 36, "average_sentence2_length": 143.84584980237153, "max_sentence2_length": 396, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 277538 + "unique_sentence2": 1011 }, "gbm-eng": { + "num_samples": 1012, + "number_of_characters": 261027, + "unique_pairs": 1012, "min_sentence1_length": 38, "average_sentence1_length": 127.53063241106719, "max_sentence1_length": 333, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 261027 + "unique_sentence2": 1012 }, "eng-gbm": { + "num_samples": 1012, + "number_of_characters": 261027, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 38, "average_sentence2_length": 127.53063241106719, "max_sentence2_length": 333, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 261027 + "unique_sentence2": 1012 }, "gom-eng": { + "num_samples": 1012, + "number_of_characters": 259182, + "unique_pairs": 1012, "min_sentence1_length": 37, "average_sentence1_length": 125.70750988142292, "max_sentence1_length": 335, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 259182 + "unique_sentence2": 1012 }, "eng-gom": { + "num_samples": 1012, + "number_of_characters": 259182, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 37, "average_sentence2_length": 125.70750988142292, "max_sentence2_length": 335, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 259182 + "unique_sentence2": 1012 }, "hne-eng": { + "num_samples": 1012, + "number_of_characters": 258911, + "unique_pairs": 1012, "min_sentence1_length": 42, "average_sentence1_length": 125.43972332015811, "max_sentence1_length": 327, + "unique_sentence1": 1011, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 258911 + "unique_sentence2": 1012 }, "eng-hne": { + "num_samples": 1012, + "number_of_characters": 258915, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 42, "average_sentence2_length": 125.44367588932806, "max_sentence2_length": 326, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 258915 + "unique_sentence2": 1011 }, "raj-eng": { + "num_samples": 1012, + "number_of_characters": 261987, + "unique_pairs": 1012, "min_sentence1_length": 38, "average_sentence1_length": 128.47924901185772, "max_sentence1_length": 338, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 261987 + "unique_sentence2": 1012 }, "eng-raj": { + "num_samples": 1012, + "number_of_characters": 261987, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 38, "average_sentence2_length": 128.47924901185772, "max_sentence2_length": 338, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 261987 + "unique_sentence2": 1012 }, "mai-eng": { + "num_samples": 1012, + "number_of_characters": 261374, + "unique_pairs": 1012, "min_sentence1_length": 36, "average_sentence1_length": 127.87351778656127, "max_sentence1_length": 350, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 261374 + "unique_sentence2": 1012 }, "eng-mai": { + "num_samples": 1012, + "number_of_characters": 261377, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 36, "average_sentence2_length": 127.87648221343873, "max_sentence2_length": 350, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 261377 + "unique_sentence2": 1012 }, "mni-eng": { + "num_samples": 1012, + "number_of_characters": 268767, + "unique_pairs": 1012, "min_sentence1_length": 38, "average_sentence1_length": 135.17885375494072, "max_sentence1_length": 353, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 268767 + "unique_sentence2": 1012 }, "eng-mni": { + "num_samples": 1012, + "number_of_characters": 268768, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 38, "average_sentence2_length": 135.1798418972332, "max_sentence2_length": 354, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 268768 + "unique_sentence2": 1012 }, "mup-eng": { + "num_samples": 1012, + "number_of_characters": 262034, + "unique_pairs": 1012, "min_sentence1_length": 40, "average_sentence1_length": 128.52569169960475, "max_sentence1_length": 340, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 262034 + "unique_sentence2": 1012 }, "eng-mup": { + "num_samples": 1012, + "number_of_characters": 262034, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 40, "average_sentence2_length": 128.52569169960475, "max_sentence2_length": 340, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 262034 + "unique_sentence2": 1012 }, "mwr-eng": { + "num_samples": 1012, + "number_of_characters": 263749, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.22035573122528, "max_sentence1_length": 345, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 263749 + "unique_sentence2": 1012 }, "eng-mwr": { + "num_samples": 1012, + "number_of_characters": 263749, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.22035573122528, "max_sentence2_length": 345, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 263749 + "unique_sentence2": 1012 }, "sat-eng": { + "num_samples": 1012, + "number_of_characters": 271757, + "unique_pairs": 1012, "min_sentence1_length": 43, "average_sentence1_length": 138.13339920948616, "max_sentence1_length": 366, + "unique_sentence1": 1012, "min_sentence2_length": 35, "average_sentence2_length": 130.401185770751, "max_sentence2_length": 368, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 271757 + "unique_sentence2": 1012 }, "eng-sat": { + "num_samples": 1012, + "number_of_characters": 271757, + "unique_pairs": 1012, "min_sentence1_length": 35, "average_sentence1_length": 130.401185770751, "max_sentence1_length": 368, + "unique_sentence1": 1012, "min_sentence2_length": 43, "average_sentence2_length": 138.13339920948616, "max_sentence2_length": 366, - "num_samples": 1012, - "num_samples_sentence2": 1012, - "number_of_characters": 271757 + "unique_sentence2": 1012 } } } diff --git a/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json b/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json index 9c1432ff7..3adf27b3d 100644 --- a/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json @@ -1,21090 +1,24924 @@ { "test": { + "num_samples": 3826252, + "number_of_characters": 988355274, + "unique_pairs": 3820263, "min_sentence1_length": 1, "average_sentence1_length": 129.15449296073547, "max_sentence1_length": 773, + "unique_sentence1": 241259, "min_sentence2_length": 1, "average_sentence2_length": 129.15449296073547, "max_sentence2_length": 773, - "num_samples": 3826252, - "num_samples_sentence2": 3826252, - "number_of_characters": 988355274, + "unique_sentence2": 241259, "hf_subset_descriptive_stats": { "afr_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 520490, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 134.37756634952427, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 126.25838758137206, "max_sentence2_length": 522, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520490 + "unique_sentence2": 1995 }, "afr_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 564002, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 134.37756634952427, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564002 + "unique_sentence2": 1996 }, "afr_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 516072, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 134.37756634952427, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516072 + "unique_sentence2": 1997 }, "afr_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 526155, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 134.37756634952427, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0951427140711, "max_sentence2_length": 433, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526155 + "unique_sentence2": 1997 }, "afr_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 530560, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 134.37756634952427, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 131.30095142714072, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530560 + "unique_sentence2": 1996 }, "afr_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 549109, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 134.37756634952427, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 140.58938407611416, "max_sentence2_length": 543, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549109 + "unique_sentence2": 1996 }, "afr_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 560267, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 134.37756634952427, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560267 + "unique_sentence2": 1996 }, "afr_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 516709, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 134.37756634952427, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.36504757135704, "max_sentence2_length": 417, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516709 + "unique_sentence2": 1996 }, "afr_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 519796, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 134.37756634952427, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.91086629944917, "max_sentence2_length": 482, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519796 + "unique_sentence2": 1996 }, "afr_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 520179, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 134.37756634952427, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520179 + "unique_sentence2": 1996 }, "amh_Ethi-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 415227, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 415227 + "unique_sentence2": 1997 }, "amh_Ethi-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 437473, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 437473 + "unique_sentence2": 1997 }, "amh_Ethi-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 413608, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 413608 + "unique_sentence2": 1997 }, "amh_Ethi-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 459006, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 459006 + "unique_sentence2": 1996 }, "amh_Ethi-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 404938, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 404938 + "unique_sentence2": 1984 }, "amh_Ethi-som_Latn": { + "num_samples": 1997, + "number_of_characters": 458799, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 458799 + "unique_sentence2": 1997 }, "amh_Ethi-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 455649, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 455649 + "unique_sentence2": 1996 }, "amh_Ethi-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 440016, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 440016 + "unique_sentence2": 1997 }, "amh_Ethi-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 332745, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 332745 + "unique_sentence2": 1996 }, "amh_Ethi-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 501790, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 501790 + "unique_sentence2": 1997 }, "amh_Ethi-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 407310, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 407310 + "unique_sentence2": 1990 }, "amh_Ethi-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 435597, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 435597 + "unique_sentence2": 1997 }, "amh_Ethi-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 483595, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 483595 + "unique_sentence2": 1996 }, "amh_Ethi-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 425239, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 83.87931897846771, "max_sentence1_length": 290, + "unique_sentence1": 1994, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 425239 + "unique_sentence2": 1996 }, "arb_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 474983, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 474983 + "unique_sentence2": 1997 }, "arb_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 483548, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 126.37305958938407, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 483548 + "unique_sentence2": 1995 }, "arb_Arab-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 526831, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526831 + "unique_sentence2": 1996 }, "arb_Arab-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 530308, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530308 + "unique_sentence2": 1996 }, "arb_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 478901, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 478901 + "unique_sentence2": 1997 }, "arb_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 474520, + "unique_pairs": 1995, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 474520 + "unique_sentence2": 1995 }, "arb_Arab-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 500981, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500981 + "unique_sentence2": 1996 }, "arb_Arab-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 524289, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524289 + "unique_sentence2": 1996 }, "arb_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 431477, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 431477 + "unique_sentence2": 1996 }, "arb_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 492756, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492756 + "unique_sentence2": 1996 }, "arb_Arab-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 509557, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509557 + "unique_sentence2": 1997 }, "arb_Arab-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 518153, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518153 + "unique_sentence2": 1997 }, "arb_Arab-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 342807, + "unique_pairs": 1995, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 342807 + "unique_sentence2": 1994 }, "arb_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 477127, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 123.15773660490736, "max_sentence2_length": 420, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 477127 + "unique_sentence2": 1996 }, "arb_Arab-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 364586, + "unique_pairs": 1995, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 364586 + "unique_sentence2": 1995 }, "arb_Arab-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 490578, + "unique_pairs": 1995, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 490578 + "unique_sentence2": 1995 }, "arb_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 445016, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 107.07811717576365, "max_sentence2_length": 392, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 445016 + "unique_sentence2": 1993 }, "arb_Arab-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 523096, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523096 + "unique_sentence2": 1996 }, "arb_Arab-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 509047, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509047 + "unique_sentence2": 1996 }, "arb_Arab-por_Latn": { + "num_samples": 1997, + "number_of_characters": 508396, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508396 + "unique_sentence2": 1996 }, "arb_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 473717, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 121.45017526289435, "max_sentence2_length": 365, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 473717 + "unique_sentence2": 1997 }, "arb_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 473814, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 121.49874812218327, "max_sentence2_length": 366, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 473814 + "unique_sentence2": 1996 }, "arb_Arab-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506074, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506074 + "unique_sentence2": 1996 }, "arb_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 446094, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 107.6179268903355, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 446094 + "unique_sentence2": 1996 }, "arb_Arab-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 519381, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519381 + "unique_sentence2": 1996 }, "arb_Arab-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 503690, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503690 + "unique_sentence2": 1997 }, "arb_Arab-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 483008, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 483008 + "unique_sentence2": 1996 }, "arb_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 541142, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 541142 + "unique_sentence2": 1997 }, "arb_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 505328, + "unique_pairs": 1995, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 137.27941912869304, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505328 + "unique_sentence2": 1995 }, "arb_Arab-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 496794, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496794 + "unique_sentence2": 1997 }, "arb_Arab-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 502302, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 502302 + "unique_sentence2": 1996 }, "arb_Arab-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 322659, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 322659 + "unique_sentence2": 1996 }, "arb_Arab-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 488913, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 115.76414621932899, "max_sentence1_length": 362, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 488913 + "unique_sentence2": 1996 }, "aze_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515960, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 135.0195292939409, "max_sentence1_length": 398, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 123.34802203304957, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515960 + "unique_sentence2": 1995 }, "aze_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517354, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 135.0195292939409, "max_sentence1_length": 398, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517354 + "unique_sentence2": 1997 }, "aze_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 529910, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 135.0195292939409, "max_sentence1_length": 398, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 130.33350025037555, "max_sentence2_length": 473, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529910 + "unique_sentence2": 1996 }, "aze_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520498, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 135.0195292939409, "max_sentence1_length": 398, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 125.62043064596895, "max_sentence2_length": 395, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520498 + "unique_sentence2": 1996 }, "aze_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515560, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 135.0195292939409, "max_sentence1_length": 398, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 123.14772158237356, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515560 + "unique_sentence2": 1996 }, "aze_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 554908, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 135.0195292939409, "max_sentence1_length": 398, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 142.85127691537306, "max_sentence2_length": 576, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554908 + "unique_sentence2": 1996 }, "aze_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 535247, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 135.0195292939409, "max_sentence1_length": 398, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535247 + "unique_sentence2": 1997 }, "aze_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 580656, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 135.0195292939409, "max_sentence1_length": 398, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 155.74461692538807, "max_sentence2_length": 592, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580656 + "unique_sentence2": 1996 }, "aze_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 563329, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 135.0195292939409, "max_sentence1_length": 398, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 147.06810215322986, "max_sentence2_length": 470, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563329 + "unique_sentence2": 1996 }, "bak_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 515960, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 123.34802203304957, "max_sentence1_length": 437, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 135.0195292939409, "max_sentence2_length": 398, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515960 + "unique_sentence2": 1997 }, "bak_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 494046, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 123.34802203304957, "max_sentence1_length": 437, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 494046 + "unique_sentence2": 1997 }, "bak_Cyrl-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506602, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 123.34802203304957, "max_sentence1_length": 437, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 130.33350025037555, "max_sentence2_length": 473, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506602 + "unique_sentence2": 1996 }, "bak_Cyrl-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 497190, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 123.34802203304957, "max_sentence1_length": 437, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 125.62043064596895, "max_sentence2_length": 395, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497190 + "unique_sentence2": 1996 }, "bak_Cyrl-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 492252, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 123.34802203304957, "max_sentence1_length": 437, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 123.14772158237356, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492252 + "unique_sentence2": 1996 }, "bak_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 531600, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 123.34802203304957, "max_sentence1_length": 437, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 142.85127691537306, "max_sentence2_length": 576, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531600 + "unique_sentence2": 1996 }, "bak_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 511939, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 123.34802203304957, "max_sentence1_length": 437, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511939 + "unique_sentence2": 1997 }, "bak_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 557348, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 123.34802203304957, "max_sentence1_length": 437, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 155.74461692538807, "max_sentence2_length": 592, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557348 + "unique_sentence2": 1996 }, "bak_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 540021, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 123.34802203304957, "max_sentence1_length": 437, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 147.06810215322986, "max_sentence2_length": 470, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540021 + "unique_sentence2": 1996 }, "bel_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 511000, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511000 + "unique_sentence2": 1996 }, "bel_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525979, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525979 + "unique_sentence2": 1996 }, "bel_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 497408, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497408 + "unique_sentence2": 1997 }, "bel_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 503810, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503810 + "unique_sentence2": 1997 }, "bel_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 512015, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512015 + "unique_sentence2": 1997 }, "bel_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 523981, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523981 + "unique_sentence2": 1997 }, "bel_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 533956, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 533956 + "unique_sentence2": 1996 }, "bel_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530983, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530983 + "unique_sentence2": 1996 }, "bel_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 509059, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509059 + "unique_sentence2": 1996 }, "bel_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 508986, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508986 + "unique_sentence2": 1996 }, "bel_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508393, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508393 + "unique_sentence2": 1995 }, "bel_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 512231, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512231 + "unique_sentence2": 1996 }, "bel_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518873, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 128.2373560340511, "max_sentence1_length": 422, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518873 + "unique_sentence2": 1996 }, "bem_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 546212, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 149.47020530796195, "max_sentence1_length": 465, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546212 + "unique_sentence2": 1997 }, "bem_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 537470, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 149.47020530796195, "max_sentence1_length": 465, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 119.6685027541312, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537470 + "unique_sentence2": 1994 }, "bem_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 526972, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 149.47020530796195, "max_sentence1_length": 465, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 114.4116174261392, "max_sentence2_length": 376, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526972 + "unique_sentence2": 1996 }, "bem_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 602279, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 149.47020530796195, "max_sentence1_length": 465, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 152.12168252378567, "max_sentence2_length": 541, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 602279 + "unique_sentence2": 1996 }, "bem_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 596231, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 149.47020530796195, "max_sentence1_length": 465, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 149.09313970956435, "max_sentence2_length": 590, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596231 + "unique_sentence2": 1997 }, "bem_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 582774, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 149.47020530796195, "max_sentence1_length": 465, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 142.35453179769655, "max_sentence2_length": 464, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582774 + "unique_sentence2": 1993 }, "bem_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 596822, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 149.47020530796195, "max_sentence1_length": 465, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 149.38908362543816, "max_sentence2_length": 511, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596822 + "unique_sentence2": 1995 }, "bem_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 598248, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 149.47020530796195, "max_sentence1_length": 465, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 150.10315473209815, "max_sentence2_length": 535, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 598248 + "unique_sentence2": 1993 }, "ben_Beng-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 474983, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 474983 + "unique_sentence2": 1995 }, "ben_Beng-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 539452, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539452 + "unique_sentence2": 1996 }, "ben_Beng-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 547650, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547650 + "unique_sentence2": 1996 }, "ben_Beng-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 542929, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542929 + "unique_sentence2": 1996 }, "ben_Beng-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 491522, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 491522 + "unique_sentence2": 1997 }, "ben_Beng-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 519005, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519005 + "unique_sentence2": 1997 }, "ben_Beng-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 487141, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 487141 + "unique_sentence2": 1995 }, "ben_Beng-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 513602, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513602 + "unique_sentence2": 1996 }, "ben_Beng-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 536910, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536910 + "unique_sentence2": 1996 }, "ben_Beng-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 488733, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 488733 + "unique_sentence2": 1997 }, "ben_Beng-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 444098, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 444098 + "unique_sentence2": 1996 }, "ben_Beng-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 505377, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505377 + "unique_sentence2": 1996 }, "ben_Beng-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 522178, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522178 + "unique_sentence2": 1997 }, "ben_Beng-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 530774, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530774 + "unique_sentence2": 1997 }, "ben_Beng-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 355428, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 355428 + "unique_sentence2": 1994 }, "ben_Beng-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 509338, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509338 + "unique_sentence2": 1996 }, "ben_Beng-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 377207, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 377207 + "unique_sentence2": 1995 }, "ben_Beng-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 503199, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503199 + "unique_sentence2": 1995 }, "ben_Beng-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 504689, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 504689 + "unique_sentence2": 1995 }, "ben_Beng-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 492025, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492025 + "unique_sentence2": 1996 }, "ben_Beng-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 535717, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535717 + "unique_sentence2": 1996 }, "ben_Beng-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 494224, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 494224 + "unique_sentence2": 1996 }, "ben_Beng-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 521668, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521668 + "unique_sentence2": 1996 }, "ben_Beng-por_Latn": { + "num_samples": 1997, + "number_of_characters": 521017, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521017 + "unique_sentence2": 1996 }, "ben_Beng-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518695, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518695 + "unique_sentence2": 1996 }, "ben_Beng-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 502543, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 502543 + "unique_sentence2": 1996 }, "ben_Beng-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 464129, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 464129 + "unique_sentence2": 1996 }, "ben_Beng-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532002 + "unique_sentence2": 1996 }, "ben_Beng-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 516311, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516311 + "unique_sentence2": 1997 }, "ben_Beng-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 495629, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495629 + "unique_sentence2": 1996 }, "ben_Beng-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 553763, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553763 + "unique_sentence2": 1997 }, "ben_Beng-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 491329, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 491329 + "unique_sentence2": 1996 }, "ben_Beng-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 509415, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509415 + "unique_sentence2": 1997 }, "ben_Beng-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 491800, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 491800 + "unique_sentence2": 1996 }, "ben_Beng-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 514923, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514923 + "unique_sentence2": 1996 }, "ben_Beng-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 335280, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 335280 + "unique_sentence2": 1996 }, "ben_Beng-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 501534, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 122.08412618928392, "max_sentence1_length": 402, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 501534 + "unique_sentence2": 1996 }, "bod_Tibt-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 543850, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 150.54031046569855, "max_sentence1_length": 478, + "unique_sentence1": 1993, "min_sentence2_length": 5, "average_sentence2_length": 121.79318978467701, "max_sentence2_length": 411, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543850 + "unique_sentence2": 1992 }, "bod_Tibt-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 548349, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 150.54031046569855, "max_sentence1_length": 478, + "unique_sentence1": 1993, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548349 + "unique_sentence2": 1997 }, "bod_Tibt-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 589120, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 150.54031046569855, "max_sentence1_length": 478, + "unique_sentence1": 1993, "min_sentence2_length": 6, "average_sentence2_length": 144.4621932899349, "max_sentence2_length": 517, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 589120 + "unique_sentence2": 1996 }, "bod_Tibt-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 567609, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 150.54031046569855, "max_sentence1_length": 478, + "unique_sentence1": 1993, "min_sentence2_length": 7, "average_sentence2_length": 133.69053580370556, "max_sentence2_length": 507, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 567609 + "unique_sentence2": 1997 }, "bod_Tibt-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 559677, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 150.54031046569855, "max_sentence1_length": 478, + "unique_sentence1": 1993, "min_sentence2_length": 11, "average_sentence2_length": 129.7185778668002, "max_sentence2_length": 414, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559677 + "unique_sentence2": 1997 }, "bod_Tibt-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 612483, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 150.54031046569855, "max_sentence1_length": 478, + "unique_sentence1": 1993, "min_sentence2_length": 9, "average_sentence2_length": 156.16124186279418, "max_sentence2_length": 773, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 612483 + "unique_sentence2": 1997 }, "bod_Tibt-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 538097, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 150.54031046569855, "max_sentence1_length": 478, + "unique_sentence1": 1993, "min_sentence2_length": 7, "average_sentence2_length": 118.91236855282925, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538097 + "unique_sentence2": 1996 }, "bos_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511000, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511000 + "unique_sentence2": 1996 }, "bos_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 524799, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524799 + "unique_sentence2": 1996 }, "bos_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 496228, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496228 + "unique_sentence2": 1997 }, "bos_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 502630, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 502630 + "unique_sentence2": 1997 }, "bos_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 510835, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510835 + "unique_sentence2": 1997 }, "bos_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522801, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522801 + "unique_sentence2": 1997 }, "bos_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 532776, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532776 + "unique_sentence2": 1996 }, "bos_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 529803, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529803 + "unique_sentence2": 1996 }, "bos_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 507879, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507879 + "unique_sentence2": 1996 }, "bos_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 507806, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507806 + "unique_sentence2": 1996 }, "bos_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 507213, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507213 + "unique_sentence2": 1995 }, "bos_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 511051, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511051 + "unique_sentence2": 1996 }, "bos_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 517693, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 127.64646970455684, "max_sentence1_length": 434, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517693 + "unique_sentence2": 1996 }, "bul_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525979, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525979 + "unique_sentence2": 1996 }, "bul_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 524799, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524799 + "unique_sentence2": 1996 }, "bul_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 511207, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511207 + "unique_sentence2": 1997 }, "bul_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517609, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517609 + "unique_sentence2": 1997 }, "bul_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 525814, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525814 + "unique_sentence2": 1997 }, "bul_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537780, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537780 + "unique_sentence2": 1997 }, "bul_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 547755, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547755 + "unique_sentence2": 1996 }, "bul_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544782, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544782 + "unique_sentence2": 1996 }, "bul_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 522858, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522858 + "unique_sentence2": 1996 }, "bul_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 522785, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522785 + "unique_sentence2": 1996 }, "bul_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522192, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522192 + "unique_sentence2": 1995 }, "bul_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 526030, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526030 + "unique_sentence2": 1996 }, "bul_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 532672, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 135.14722083124687, "max_sentence1_length": 493, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532672 + "unique_sentence2": 1996 }, "cat_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 530680, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 141.6925388082123, "max_sentence1_length": 460, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530680 + "unique_sentence2": 1997 }, "cat_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 576068, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 141.6925388082123, "max_sentence1_length": 460, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 576068 + "unique_sentence2": 1996 }, "cat_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 554946, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 141.6925388082123, "max_sentence1_length": 460, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 136.19729594391586, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554946 + "unique_sentence2": 1996 }, "cat_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 572177, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 141.6925388082123, "max_sentence1_length": 460, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 144.82573860791186, "max_sentence2_length": 623, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 572177 + "unique_sentence2": 1996 }, "cat_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 560435, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 141.6925388082123, "max_sentence1_length": 460, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 138.94591887831749, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560435 + "unique_sentence2": 1996 }, "cat_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 560175, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 141.6925388082123, "max_sentence1_length": 460, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560175 + "unique_sentence2": 1996 }, "cat_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 575445, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 141.6925388082123, "max_sentence1_length": 460, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 146.4621932899349, "max_sentence2_length": 518, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 575445 + "unique_sentence2": 1997 }, "cat_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 571160, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 141.6925388082123, "max_sentence1_length": 460, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571160 + "unique_sentence2": 1996 }, "ces_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 497408, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497408 + "unique_sentence2": 1996 }, "ces_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 496228, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496228 + "unique_sentence2": 1996 }, "ces_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511207, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511207 + "unique_sentence2": 1996 }, "ces_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 489038, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 489038 + "unique_sentence2": 1997 }, "ces_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 497243, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497243 + "unique_sentence2": 1997 }, "ces_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 509209, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509209 + "unique_sentence2": 1997 }, "ces_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 519184, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519184 + "unique_sentence2": 1996 }, "ces_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516211, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516211 + "unique_sentence2": 1996 }, "ces_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 494287, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 494287 + "unique_sentence2": 1996 }, "ces_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 494214, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 494214 + "unique_sentence2": 1996 }, "ces_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 493621, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493621 + "unique_sentence2": 1995 }, "ces_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 497459, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497459 + "unique_sentence2": 1996 }, "ces_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 504101, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 120.84026039058588, "max_sentence1_length": 474, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 504101 + "unique_sentence2": 1996 }, "ckb_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 483548, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 126.37305958938407, "max_sentence1_length": 399, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 483548 + "unique_sentence2": 1995 }, "ckb_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500087, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 126.37305958938407, "max_sentence1_length": 399, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500087 + "unique_sentence2": 1997 }, "ckb_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 495706, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 126.37305958938407, "max_sentence1_length": 399, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495706 + "unique_sentence2": 1995 }, "ckb_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 452663, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 126.37305958938407, "max_sentence1_length": 399, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 452663 + "unique_sentence2": 1996 }, "ckb_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 498313, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 126.37305958938407, "max_sentence1_length": 399, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 123.15773660490736, "max_sentence2_length": 420, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 498313 + "unique_sentence2": 1996 }, "ckb_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 466202, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 126.37305958938407, "max_sentence1_length": 399, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 107.07811717576365, "max_sentence2_length": 392, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 466202 + "unique_sentence2": 1993 }, "ckb_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 494903, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 126.37305958938407, "max_sentence1_length": 399, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 121.45017526289435, "max_sentence2_length": 365, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 494903 + "unique_sentence2": 1997 }, "ckb_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 495000, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 126.37305958938407, "max_sentence1_length": 399, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 121.49874812218327, "max_sentence2_length": 366, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495000 + "unique_sentence2": 1996 }, "ckb_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 467280, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 126.37305958938407, "max_sentence1_length": 399, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 107.6179268903355, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 467280 + "unique_sentence2": 1996 }, "ckb_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 526514, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 126.37305958938407, "max_sentence1_length": 399, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 137.27941912869304, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526514 + "unique_sentence2": 1995 }, "cym_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 514225, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.4526790185278, "max_sentence1_length": 444, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514225 + "unique_sentence2": 1997 }, "cym_Latn-gle_Latn": { + "num_samples": 1997, + "number_of_characters": 561314, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.4526790185278, "max_sentence1_length": 444, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 147.62593890836254, "max_sentence2_length": 461, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561314 + "unique_sentence2": 1997 }, "dan_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 520490, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 126.25838758137206, "max_sentence1_length": 522, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 134.37756634952427, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520490 + "unique_sentence2": 1996 }, "dan_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 547788, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 126.25838758137206, "max_sentence1_length": 522, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547788 + "unique_sentence2": 1996 }, "dan_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 499858, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 126.25838758137206, "max_sentence1_length": 522, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499858 + "unique_sentence2": 1997 }, "dan_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 509941, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 126.25838758137206, "max_sentence1_length": 522, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 129.0951427140711, "max_sentence2_length": 433, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509941 + "unique_sentence2": 1997 }, "dan_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 514346, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 126.25838758137206, "max_sentence1_length": 522, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 131.30095142714072, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514346 + "unique_sentence2": 1996 }, "dan_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 532895, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 126.25838758137206, "max_sentence1_length": 522, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 140.58938407611416, "max_sentence2_length": 543, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532895 + "unique_sentence2": 1996 }, "dan_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 544053, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 126.25838758137206, "max_sentence1_length": 522, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544053 + "unique_sentence2": 1996 }, "dan_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 500495, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 126.25838758137206, "max_sentence1_length": 522, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.36504757135704, "max_sentence2_length": 417, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500495 + "unique_sentence2": 1996 }, "dan_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 503582, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 126.25838758137206, "max_sentence1_length": 522, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 125.91086629944917, "max_sentence2_length": 482, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503582 + "unique_sentence2": 1996 }, "dan_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 503965, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 126.25838758137206, "max_sentence1_length": 522, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503965 + "unique_sentence2": 1996 }, "deu_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 564002, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 134.37756634952427, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564002 + "unique_sentence2": 1996 }, "deu_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 526831, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526831 + "unique_sentence2": 1995 }, "deu_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 539452, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539452 + "unique_sentence2": 1997 }, "deu_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 547788, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 126.25838758137206, "max_sentence2_length": 522, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547788 + "unique_sentence2": 1995 }, "deu_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 594777, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 594777 + "unique_sentence2": 1996 }, "deu_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 543370, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543370 + "unique_sentence2": 1997 }, "deu_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 553453, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0951427140711, "max_sentence2_length": 433, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553453 + "unique_sentence2": 1997 }, "deu_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 538989, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538989 + "unique_sentence2": 1995 }, "deu_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 565450, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565450 + "unique_sentence2": 1996 }, "deu_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 588758, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 588758 + "unique_sentence2": 1996 }, "deu_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 495946, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495946 + "unique_sentence2": 1996 }, "deu_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 557225, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557225 + "unique_sentence2": 1996 }, "deu_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 574026, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 574026 + "unique_sentence2": 1997 }, "deu_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 582622, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582622 + "unique_sentence2": 1997 }, "deu_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 557858, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 131.30095142714072, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557858 + "unique_sentence2": 1996 }, "deu_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 407276, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 407276 + "unique_sentence2": 1994 }, "deu_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 429055, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 429055 + "unique_sentence2": 1995 }, "deu_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 555047, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555047 + "unique_sentence2": 1995 }, "deu_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 576407, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 140.58938407611416, "max_sentence2_length": 543, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 576407 + "unique_sentence2": 1996 }, "deu_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 587565, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587565 + "unique_sentence2": 1996 }, "deu_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 544007, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.36504757135704, "max_sentence2_length": 417, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544007 + "unique_sentence2": 1996 }, "deu_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 547094, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.91086629944917, "max_sentence2_length": 482, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547094 + "unique_sentence2": 1996 }, "deu_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 573516, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 573516 + "unique_sentence2": 1996 }, "deu_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 572865, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 572865 + "unique_sentence2": 1996 }, "deu_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 570543, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570543 + "unique_sentence2": 1996 }, "deu_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 583850, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 583850 + "unique_sentence2": 1996 }, "deu_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 568159, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 568159 + "unique_sentence2": 1997 }, "deu_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 547477, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547477 + "unique_sentence2": 1996 }, "deu_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 605611, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 605611 + "unique_sentence2": 1997 }, "deu_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 561263, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561263 + "unique_sentence2": 1997 }, "deu_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 566771, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566771 + "unique_sentence2": 1996 }, "deu_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 387128, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 387128 + "unique_sentence2": 1996 }, "deu_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 553382, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 148.04707060590886, "max_sentence1_length": 508, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553382 + "unique_sentence2": 1996 }, "div_Thaa-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 547650, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547650 + "unique_sentence2": 1997 }, "div_Thaa-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 551568, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551568 + "unique_sentence2": 1997 }, "div_Thaa-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 579051, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 579051 + "unique_sentence2": 1997 }, "div_Thaa-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 548779, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548779 + "unique_sentence2": 1997 }, "div_Thaa-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 565423, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565423 + "unique_sentence2": 1996 }, "div_Thaa-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 569384, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569384 + "unique_sentence2": 1996 }, "div_Thaa-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 564735, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564735 + "unique_sentence2": 1995 }, "div_Thaa-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 552071, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 552071 + "unique_sentence2": 1996 }, "div_Thaa-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 554270, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554270 + "unique_sentence2": 1996 }, "div_Thaa-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 562589, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 562589 + "unique_sentence2": 1996 }, "div_Thaa-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 524175, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524175 + "unique_sentence2": 1996 }, "div_Thaa-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 613809, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 613809 + "unique_sentence2": 1997 }, "div_Thaa-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 551375, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551375 + "unique_sentence2": 1996 }, "div_Thaa-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 551846, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 152.15222834251378, "max_sentence1_length": 609, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551846 + "unique_sentence2": 1996 }, "dzo_Tibt-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 543850, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 121.79318978467701, "max_sentence1_length": 411, + "unique_sentence1": 1992, "min_sentence2_length": 8, "average_sentence2_length": 150.54031046569855, "max_sentence2_length": 478, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543850 + "unique_sentence2": 1993 }, "dzo_Tibt-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 490941, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 121.79318978467701, "max_sentence1_length": 411, + "unique_sentence1": 1992, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 490941 + "unique_sentence2": 1997 }, "dzo_Tibt-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 531712, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 121.79318978467701, "max_sentence1_length": 411, + "unique_sentence1": 1992, "min_sentence2_length": 6, "average_sentence2_length": 144.4621932899349, "max_sentence2_length": 517, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531712 + "unique_sentence2": 1996 }, "dzo_Tibt-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 510201, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 121.79318978467701, "max_sentence1_length": 411, + "unique_sentence1": 1992, "min_sentence2_length": 7, "average_sentence2_length": 133.69053580370556, "max_sentence2_length": 507, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510201 + "unique_sentence2": 1997 }, "dzo_Tibt-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 502269, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 121.79318978467701, "max_sentence1_length": 411, + "unique_sentence1": 1992, "min_sentence2_length": 11, "average_sentence2_length": 129.7185778668002, "max_sentence2_length": 414, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 502269 + "unique_sentence2": 1997 }, "dzo_Tibt-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 555075, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 121.79318978467701, "max_sentence1_length": 411, + "unique_sentence1": 1992, "min_sentence2_length": 9, "average_sentence2_length": 156.16124186279418, "max_sentence2_length": 773, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555075 + "unique_sentence2": 1997 }, "dzo_Tibt-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 480689, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 121.79318978467701, "max_sentence1_length": 411, + "unique_sentence1": 1992, "min_sentence2_length": 7, "average_sentence2_length": 118.91236855282925, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 480689 + "unique_sentence2": 1996 }, "ell_Grek-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 530308, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530308 + "unique_sentence2": 1995 }, "ell_Grek-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 542929, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542929 + "unique_sentence2": 1997 }, "ell_Grek-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 594777, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 594777 + "unique_sentence2": 1996 }, "ell_Grek-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 546847, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546847 + "unique_sentence2": 1997 }, "ell_Grek-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 542466, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542466 + "unique_sentence2": 1995 }, "ell_Grek-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 568927, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 568927 + "unique_sentence2": 1996 }, "ell_Grek-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 592235, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 592235 + "unique_sentence2": 1996 }, "ell_Grek-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 499423, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499423 + "unique_sentence2": 1996 }, "ell_Grek-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 560702, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560702 + "unique_sentence2": 1996 }, "ell_Grek-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 577503, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 577503 + "unique_sentence2": 1997 }, "ell_Grek-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 563842, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 132.55633450175262, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563842 + "unique_sentence2": 1996 }, "ell_Grek-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 586099, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 586099 + "unique_sentence2": 1997 }, "ell_Grek-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 410753, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 410753 + "unique_sentence2": 1994 }, "ell_Grek-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 565719, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 133.49624436654983, "max_sentence2_length": 503, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565719 + "unique_sentence2": 1995 }, "ell_Grek-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 432532, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 432532 + "unique_sentence2": 1995 }, "ell_Grek-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 558524, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558524 + "unique_sentence2": 1995 }, "ell_Grek-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 591042, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 591042 + "unique_sentence2": 1996 }, "ell_Grek-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 576993, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 576993 + "unique_sentence2": 1996 }, "ell_Grek-por_Latn": { + "num_samples": 1997, + "number_of_characters": 576342, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 576342 + "unique_sentence2": 1996 }, "ell_Grek-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 574020, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 574020 + "unique_sentence2": 1996 }, "ell_Grek-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 587327, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587327 + "unique_sentence2": 1996 }, "ell_Grek-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 582734, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 142.01652478718077, "max_sentence2_length": 461, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582734 + "unique_sentence2": 1996 }, "ell_Grek-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 571636, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571636 + "unique_sentence2": 1997 }, "ell_Grek-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 550954, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550954 + "unique_sentence2": 1996 }, "ell_Grek-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 609088, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 609088 + "unique_sentence2": 1997 }, "ell_Grek-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 564740, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564740 + "unique_sentence2": 1997 }, "ell_Grek-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 570248, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570248 + "unique_sentence2": 1996 }, "ell_Grek-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 390605, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 390605 + "unique_sentence2": 1996 }, "ell_Grek-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 556859, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 149.78818227341011, "max_sentence1_length": 584, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 556859 + "unique_sentence2": 1996 }, "eng_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 516072, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 134.37756634952427, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516072 + "unique_sentence2": 1996 }, "eng_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 415227, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 415227 + "unique_sentence2": 1994 }, "eng_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 478901, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 478901 + "unique_sentence2": 1995 }, "eng_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 517354, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 135.0195292939409, "max_sentence2_length": 398, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517354 + "unique_sentence2": 1997 }, "eng_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 494046, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 123.34802203304957, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 494046 + "unique_sentence2": 1995 }, "eng_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 503810, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503810 + "unique_sentence2": 1996 }, "eng_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 546212, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 149.47020530796195, "max_sentence2_length": 465, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546212 + "unique_sentence2": 1997 }, "eng_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 491522, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 491522 + "unique_sentence2": 1997 }, "eng_Latn-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 548349, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 150.54031046569855, "max_sentence2_length": 478, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548349 + "unique_sentence2": 1993 }, "eng_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 502630, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 502630 + "unique_sentence2": 1996 }, "eng_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 517609, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517609 + "unique_sentence2": 1996 }, "eng_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 530680, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 141.6925388082123, "max_sentence2_length": 460, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530680 + "unique_sentence2": 1997 }, "eng_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 489038, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 489038 + "unique_sentence2": 1997 }, "eng_Latn-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 500087, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 126.37305958938407, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500087 + "unique_sentence2": 1995 }, "eng_Latn-cym_Latn": { + "num_samples": 1997, + "number_of_characters": 514225, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 133.4526790185278, "max_sentence2_length": 444, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514225 + "unique_sentence2": 1997 }, "eng_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 499858, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 126.25838758137206, "max_sentence2_length": 522, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499858 + "unique_sentence2": 1995 }, "eng_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 543370, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543370 + "unique_sentence2": 1996 }, "eng_Latn-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 551568, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551568 + "unique_sentence2": 1996 }, "eng_Latn-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 490941, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 121.79318978467701, "max_sentence2_length": 411, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 490941 + "unique_sentence2": 1992 }, "eng_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 546847, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546847 + "unique_sentence2": 1996 }, "eng_Latn-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 522923, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522923 + "unique_sentence2": 1997 }, "eng_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 486698, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 119.6685027541312, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 486698 + "unique_sentence2": 1994 }, "eng_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 505523, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0951427140711, "max_sentence2_length": 433, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505523 + "unique_sentence2": 1997 }, "eng_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 491059, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 491059 + "unique_sentence2": 1995 }, "eng_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 548225, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 150.478217325989, "max_sentence2_length": 448, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548225 + "unique_sentence2": 1988 }, "eng_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 541140, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 146.93039559339007, "max_sentence2_length": 554, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 541140 + "unique_sentence2": 1997 }, "eng_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 517520, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517520 + "unique_sentence2": 1996 }, "eng_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 540828, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540828 + "unique_sentence2": 1996 }, "eng_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 476200, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 114.4116174261392, "max_sentence2_length": 376, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 476200 + "unique_sentence2": 1996 }, "eng_Latn-gle_Latn": { + "num_samples": 1997, + "number_of_characters": 542529, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 147.62593890836254, "max_sentence2_length": 461, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542529 + "unique_sentence2": 1997 }, "eng_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 519706, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 136.19729594391586, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519706 + "unique_sentence2": 1996 }, "eng_Latn-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 492651, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492651 + "unique_sentence2": 1997 }, "eng_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 517686, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517686 + "unique_sentence2": 1997 }, "eng_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 448016, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 448016 + "unique_sentence2": 1996 }, "eng_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509295, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509295 + "unique_sentence2": 1996 }, "eng_Latn-hmn_Latn": { + "num_samples": 1997, + "number_of_characters": 578510, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 165.6434651977967, "max_sentence2_length": 643, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578510 + "unique_sentence2": 1997 }, "eng_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 503645, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503645 + "unique_sentence2": 1997 }, "eng_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 526096, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526096 + "unique_sentence2": 1997 }, "eng_Latn-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 512435, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 132.55633450175262, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512435 + "unique_sentence2": 1996 }, "eng_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 493821, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493821 + "unique_sentence2": 1997 }, "eng_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 534692, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534692 + "unique_sentence2": 1997 }, "eng_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 509928, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 131.30095142714072, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509928 + "unique_sentence2": 1996 }, "eng_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 536937, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 144.82573860791186, "max_sentence2_length": 623, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536937 + "unique_sentence2": 1996 }, "eng_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 359346, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 359346 + "unique_sentence2": 1994 }, "eng_Latn-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513256, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513256 + "unique_sentence2": 1996 }, "eng_Latn-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 514312, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 133.49624436654983, "max_sentence2_length": 503, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514312 + "unique_sentence2": 1995 }, "eng_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 507996, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 130.33350025037555, "max_sentence2_length": 473, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507996 + "unique_sentence2": 1996 }, "eng_Latn-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 536211, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 144.4621932899349, "max_sentence2_length": 517, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536211 + "unique_sentence2": 1996 }, "eng_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 551507, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 152.12168252378567, "max_sentence2_length": 541, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551507 + "unique_sentence2": 1996 }, "eng_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 498584, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 125.62043064596895, "max_sentence2_length": 395, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 498584 + "unique_sentence2": 1996 }, "eng_Latn-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 493666, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 123.15773660490736, "max_sentence2_length": 420, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493666 + "unique_sentence2": 1996 }, "eng_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 381125, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 381125 + "unique_sentence2": 1995 }, "eng_Latn-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 514700, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 133.69053580370556, "max_sentence2_length": 507, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514700 + "unique_sentence2": 1997 }, "eng_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 515908, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 134.29544316474713, "max_sentence2_length": 503, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515908 + "unique_sentence2": 1994 }, "eng_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 507117, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507117 + "unique_sentence2": 1995 }, "eng_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 528477, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 140.58938407611416, "max_sentence2_length": 543, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 528477 + "unique_sentence2": 1996 }, "eng_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 551872, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 152.30445668502753, "max_sentence2_length": 540, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551872 + "unique_sentence2": 1996 }, "eng_Latn-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 508607, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508607 + "unique_sentence2": 1995 }, "eng_Latn-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 461555, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 107.07811717576365, "max_sentence2_length": 392, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 461555 + "unique_sentence2": 1993 }, "eng_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515611, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515611 + "unique_sentence2": 1997 }, "eng_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 568028, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 160.39459188783175, "max_sentence2_length": 559, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 568028 + "unique_sentence2": 1994 }, "eng_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 525195, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 138.94591887831749, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525195 + "unique_sentence2": 1996 }, "eng_Latn-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 506768, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 129.7185778668002, "max_sentence2_length": 414, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506768 + "unique_sentence2": 1997 }, "eng_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 521844, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 137.26790185277918, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521844 + "unique_sentence2": 1997 }, "eng_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 524903, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 138.79969954932398, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524903 + "unique_sentence2": 1997 }, "eng_Latn-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 559574, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 156.16124186279418, "max_sentence2_length": 773, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559574 + "unique_sentence2": 1997 }, "eng_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 545459, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 149.09313970956435, "max_sentence2_length": 590, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 545459 + "unique_sentence2": 1997 }, "eng_Latn-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 495943, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495943 + "unique_sentence2": 1996 }, "eng_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 539635, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539635 + "unique_sentence2": 1996 }, "eng_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 496077, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.36504757135704, "max_sentence2_length": 417, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496077 + "unique_sentence2": 1996 }, "eng_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 499164, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 125.91086629944917, "max_sentence2_length": 482, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499164 + "unique_sentence2": 1996 }, "eng_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 539219, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539219 + "unique_sentence2": 1996 }, "eng_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 142.35453179769655, "max_sentence2_length": 464, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532002 + "unique_sentence2": 1993 }, "eng_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 485151, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485151 + "unique_sentence2": 1984 }, "eng_Latn-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 498142, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 498142 + "unique_sentence2": 1996 }, "eng_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 525586, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525586 + "unique_sentence2": 1996 }, "eng_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 524935, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524935 + "unique_sentence2": 1996 }, "eng_Latn-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 490256, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 121.45017526289435, "max_sentence2_length": 365, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 490256 + "unique_sentence2": 1997 }, "eng_Latn-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 490353, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 121.49874812218327, "max_sentence2_length": 366, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 490353 + "unique_sentence2": 1996 }, "eng_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 540205, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 146.4621932899349, "max_sentence2_length": 518, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540205 + "unique_sentence2": 1997 }, "eng_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522613, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522613 + "unique_sentence2": 1996 }, "eng_Latn-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 462633, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 107.6179268903355, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 462633 + "unique_sentence2": 1996 }, "eng_Latn-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506461, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506461 + "unique_sentence2": 1996 }, "eng_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 500689, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500689 + "unique_sentence2": 1996 }, "eng_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 500616, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500616 + "unique_sentence2": 1996 }, "eng_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 525575, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 139.1362043064597, "max_sentence2_length": 431, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525575 + "unique_sentence2": 1996 }, "eng_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 546050, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 149.38908362543816, "max_sentence2_length": 511, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546050 + "unique_sentence2": 1995 }, "eng_Latn-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 468047, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 468047 + "unique_sentence2": 1996 }, "eng_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 539012, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539012 + "unique_sentence2": 1997 }, "eng_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 535920, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535920 + "unique_sentence2": 1996 }, "eng_Latn-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 531327, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 142.01652478718077, "max_sentence2_length": 461, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531327 + "unique_sentence2": 1996 }, "eng_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 500023, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500023 + "unique_sentence2": 1995 }, "eng_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 503861, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503861 + "unique_sentence2": 1996 }, "eng_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 535862, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535862 + "unique_sentence2": 1996 }, "eng_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 520229, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520229 + "unique_sentence2": 1997 }, "eng_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 499547, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499547 + "unique_sentence2": 1996 }, "eng_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 557343, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 155.04406609914872, "max_sentence2_length": 524, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557343 + "unique_sentence2": 1997 }, "eng_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 557681, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557681 + "unique_sentence2": 1997 }, "eng_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 493646, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 123.14772158237356, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493646 + "unique_sentence2": 1996 }, "eng_Latn-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 495247, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495247 + "unique_sentence2": 1996 }, "eng_Latn-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 521867, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 137.27941912869304, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521867 + "unique_sentence2": 1995 }, "eng_Latn-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 485188, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 118.91236855282925, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485188 + "unique_sentence2": 1996 }, "eng_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 412958, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 412958 + "unique_sentence2": 1996 }, "eng_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 561360, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 157.05558337506258, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561360 + "unique_sentence2": 1997 }, "eng_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 582003, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582003 + "unique_sentence2": 1997 }, "eng_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 532994, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 142.85127691537306, "max_sentence2_length": 576, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532994 + "unique_sentence2": 1996 }, "eng_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 513333, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513333 + "unique_sentence2": 1997 }, "eng_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 558742, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 155.74461692538807, "max_sentence2_length": 592, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558742 + "unique_sentence2": 1996 }, "eng_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 510503, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510503 + "unique_sentence2": 1996 }, "eng_Latn-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 495718, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495718 + "unique_sentence2": 1996 }, "eng_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 541415, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 147.06810215322986, "max_sentence2_length": 470, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 541415 + "unique_sentence2": 1996 }, "eng_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 547476, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 150.10315473209815, "max_sentence2_length": 535, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547476 + "unique_sentence2": 1993 }, "eng_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 518841, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518841 + "unique_sentence2": 1996 }, "eng_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 487523, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 487523 + "unique_sentence2": 1990 }, "eng_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 515810, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515810 + "unique_sentence2": 1997 }, "eng_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 563808, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563808 + "unique_sentence2": 1996 }, "eng_Latn-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 326607, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 39.502754131196795, "max_sentence2_length": 133, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 326607 + "unique_sentence2": 1996 }, "eng_Latn-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 332681, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 42.54431647471207, "max_sentence2_length": 263, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 332681 + "unique_sentence2": 1997 }, "eng_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 339198, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 339198 + "unique_sentence2": 1996 }, "eng_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 505452, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.04606910365548, "max_sentence1_length": 437, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505452 + "unique_sentence2": 1996 }, "eus_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 519005, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519005 + "unique_sentence2": 1997 }, "eus_Latn-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 579051, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 579051 + "unique_sentence2": 1996 }, "eus_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 522923, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522923 + "unique_sentence2": 1997 }, "eus_Latn-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 520134, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520134 + "unique_sentence2": 1997 }, "eus_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 536778, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536778 + "unique_sentence2": 1996 }, "eus_Latn-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 540739, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540739 + "unique_sentence2": 1996 }, "eus_Latn-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 536090, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536090 + "unique_sentence2": 1995 }, "eus_Latn-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 523426, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523426 + "unique_sentence2": 1996 }, "eus_Latn-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 525625, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525625 + "unique_sentence2": 1996 }, "eus_Latn-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 533944, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 533944 + "unique_sentence2": 1996 }, "eus_Latn-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 495530, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495530 + "unique_sentence2": 1996 }, "eus_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 585164, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 585164 + "unique_sentence2": 1997 }, "eus_Latn-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 522730, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522730 + "unique_sentence2": 1996 }, "eus_Latn-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 523201, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 137.80821231847773, "max_sentence1_length": 393, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523201 + "unique_sentence2": 1996 }, "ewe_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 537470, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 119.6685027541312, "max_sentence1_length": 493, + "unique_sentence1": 1994, "min_sentence2_length": 8, "average_sentence2_length": 149.47020530796195, "max_sentence2_length": 465, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537470 + "unique_sentence2": 1997 }, "ewe_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 486698, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 119.6685027541312, "max_sentence1_length": 493, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 486698 + "unique_sentence2": 1997 }, "ewe_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 467458, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 119.6685027541312, "max_sentence1_length": 493, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 114.4116174261392, "max_sentence2_length": 376, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 467458 + "unique_sentence2": 1996 }, "ewe_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 542765, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 119.6685027541312, "max_sentence1_length": 493, + "unique_sentence1": 1994, "min_sentence2_length": 10, "average_sentence2_length": 152.12168252378567, "max_sentence2_length": 541, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542765 + "unique_sentence2": 1996 }, "ewe_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 536717, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 119.6685027541312, "max_sentence1_length": 493, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 149.09313970956435, "max_sentence2_length": 590, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536717 + "unique_sentence2": 1997 }, "ewe_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 523260, + "unique_pairs": 1995, "min_sentence1_length": 6, "average_sentence1_length": 119.6685027541312, "max_sentence1_length": 493, + "unique_sentence1": 1994, "min_sentence2_length": 10, "average_sentence2_length": 142.35453179769655, "max_sentence2_length": 464, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523260 + "unique_sentence2": 1993 }, "ewe_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 537308, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 119.6685027541312, "max_sentence1_length": 493, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 149.38908362543816, "max_sentence2_length": 511, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537308 + "unique_sentence2": 1995 }, "ewe_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 538734, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 119.6685027541312, "max_sentence1_length": 493, + "unique_sentence1": 1994, "min_sentence2_length": 10, "average_sentence2_length": 150.10315473209815, "max_sentence2_length": 535, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538734 + "unique_sentence2": 1993 }, "fao_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 526155, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0951427140711, "max_sentence1_length": 433, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 134.37756634952427, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526155 + "unique_sentence2": 1996 }, "fao_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 509941, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0951427140711, "max_sentence1_length": 433, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 126.25838758137206, "max_sentence2_length": 522, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509941 + "unique_sentence2": 1995 }, "fao_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 553453, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0951427140711, "max_sentence1_length": 433, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553453 + "unique_sentence2": 1996 }, "fao_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 505523, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0951427140711, "max_sentence1_length": 433, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505523 + "unique_sentence2": 1997 }, "fao_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 520011, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0951427140711, "max_sentence1_length": 433, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 131.30095142714072, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520011 + "unique_sentence2": 1996 }, "fao_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 538560, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0951427140711, "max_sentence1_length": 433, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 140.58938407611416, "max_sentence2_length": 543, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538560 + "unique_sentence2": 1996 }, "fao_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 549718, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0951427140711, "max_sentence1_length": 433, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549718 + "unique_sentence2": 1996 }, "fao_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 506160, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0951427140711, "max_sentence1_length": 433, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.36504757135704, "max_sentence2_length": 417, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506160 + "unique_sentence2": 1996 }, "fao_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 509247, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0951427140711, "max_sentence1_length": 433, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 125.91086629944917, "max_sentence2_length": 482, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509247 + "unique_sentence2": 1996 }, "fao_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 509630, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0951427140711, "max_sentence1_length": 433, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509630 + "unique_sentence2": 1996 }, "fas_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 474520, + "unique_pairs": 1995, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 474520 + "unique_sentence2": 1995 }, "fas_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 487141, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 487141 + "unique_sentence2": 1997 }, "fas_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 495706, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 126.37305958938407, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495706 + "unique_sentence2": 1995 }, "fas_Arab-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 538989, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538989 + "unique_sentence2": 1996 }, "fas_Arab-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 542466, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542466 + "unique_sentence2": 1996 }, "fas_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 491059, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 491059 + "unique_sentence2": 1997 }, "fas_Arab-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 513139, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513139 + "unique_sentence2": 1996 }, "fas_Arab-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 536447, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536447 + "unique_sentence2": 1996 }, "fas_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 443635, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 443635 + "unique_sentence2": 1996 }, "fas_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 504914, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 504914 + "unique_sentence2": 1996 }, "fas_Arab-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 521715, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521715 + "unique_sentence2": 1997 }, "fas_Arab-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 530311, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530311 + "unique_sentence2": 1997 }, "fas_Arab-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 354965, + "unique_pairs": 1995, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 354965 + "unique_sentence2": 1994 }, "fas_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 489285, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 123.15773660490736, "max_sentence2_length": 420, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 489285 + "unique_sentence2": 1996 }, "fas_Arab-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 376744, + "unique_pairs": 1995, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 376744 + "unique_sentence2": 1995 }, "fas_Arab-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 502736, + "unique_pairs": 1995, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 502736 + "unique_sentence2": 1995 }, "fas_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 457174, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 107.07811717576365, "max_sentence2_length": 392, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 457174 + "unique_sentence2": 1993 }, "fas_Arab-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 535254, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535254 + "unique_sentence2": 1996 }, "fas_Arab-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 521205, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521205 + "unique_sentence2": 1996 }, "fas_Arab-por_Latn": { + "num_samples": 1997, + "number_of_characters": 520554, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520554 + "unique_sentence2": 1996 }, "fas_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 485875, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 121.45017526289435, "max_sentence2_length": 365, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485875 + "unique_sentence2": 1997 }, "fas_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 485972, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 121.49874812218327, "max_sentence2_length": 366, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485972 + "unique_sentence2": 1996 }, "fas_Arab-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518232, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518232 + "unique_sentence2": 1996 }, "fas_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 458252, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 107.6179268903355, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 458252 + "unique_sentence2": 1996 }, "fas_Arab-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 531539, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531539 + "unique_sentence2": 1996 }, "fas_Arab-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 515848, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515848 + "unique_sentence2": 1997 }, "fas_Arab-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 495166, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495166 + "unique_sentence2": 1996 }, "fas_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 553300, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553300 + "unique_sentence2": 1997 }, "fas_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 517486, + "unique_pairs": 1995, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 137.27941912869304, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517486 + "unique_sentence2": 1995 }, "fas_Arab-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 508952, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508952 + "unique_sentence2": 1997 }, "fas_Arab-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 514460, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514460 + "unique_sentence2": 1996 }, "fas_Arab-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 334817, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 334817 + "unique_sentence2": 1996 }, "fas_Arab-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 501071, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 121.85227841762644, "max_sentence1_length": 389, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 501071 + "unique_sentence2": 1996 }, "fij_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 548225, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.478217325989, "max_sentence1_length": 448, + "unique_sentence1": 1988, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548225 + "unique_sentence2": 1997 }, "fij_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 593925, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.478217325989, "max_sentence1_length": 448, + "unique_sentence1": 1988, "min_sentence2_length": 10, "average_sentence2_length": 146.93039559339007, "max_sentence2_length": 554, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 593925 + "unique_sentence2": 1997 }, "fij_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 587477, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.478217325989, "max_sentence1_length": 448, + "unique_sentence1": 1988, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587477 + "unique_sentence2": 1997 }, "fij_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 604657, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.478217325989, "max_sentence1_length": 448, + "unique_sentence1": 1988, "min_sentence2_length": 7, "average_sentence2_length": 152.30445668502753, "max_sentence2_length": 540, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 604657 + "unique_sentence2": 1996 }, "fij_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 620813, + "unique_pairs": 1995, "min_sentence1_length": 10, "average_sentence1_length": 150.478217325989, "max_sentence1_length": 448, + "unique_sentence1": 1988, "min_sentence2_length": 6, "average_sentence2_length": 160.39459188783175, "max_sentence2_length": 559, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 620813 + "unique_sentence2": 1994 }, "fij_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 574629, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.478217325989, "max_sentence1_length": 448, + "unique_sentence1": 1988, "min_sentence2_length": 9, "average_sentence2_length": 137.26790185277918, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 574629 + "unique_sentence2": 1997 }, "fij_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 577688, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.478217325989, "max_sentence1_length": 448, + "unique_sentence1": 1988, "min_sentence2_length": 6, "average_sentence2_length": 138.79969954932398, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 577688 + "unique_sentence2": 1997 }, "fij_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 578360, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.478217325989, "max_sentence1_length": 448, + "unique_sentence1": 1988, "min_sentence2_length": 8, "average_sentence2_length": 139.1362043064597, "max_sentence2_length": 431, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578360 + "unique_sentence2": 1996 }, "fij_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 610128, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.478217325989, "max_sentence1_length": 448, + "unique_sentence1": 1988, "min_sentence2_length": 9, "average_sentence2_length": 155.04406609914872, "max_sentence2_length": 524, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 610128 + "unique_sentence2": 1997 }, "fij_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 614145, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.478217325989, "max_sentence1_length": 448, + "unique_sentence1": 1988, "min_sentence2_length": 7, "average_sentence2_length": 157.05558337506258, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 614145 + "unique_sentence2": 1997 }, "fil_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 541140, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 146.93039559339007, "max_sentence1_length": 554, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 541140 + "unique_sentence2": 1997 }, "fil_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 593925, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 146.93039559339007, "max_sentence1_length": 554, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 150.478217325989, "max_sentence2_length": 448, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 593925 + "unique_sentence2": 1988 }, "fil_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 580392, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 146.93039559339007, "max_sentence1_length": 554, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580392 + "unique_sentence2": 1997 }, "fil_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 597572, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 146.93039559339007, "max_sentence1_length": 554, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 152.30445668502753, "max_sentence2_length": 540, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 597572 + "unique_sentence2": 1996 }, "fil_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 613728, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 146.93039559339007, "max_sentence1_length": 554, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 160.39459188783175, "max_sentence2_length": 559, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 613728 + "unique_sentence2": 1994 }, "fil_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 567544, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 146.93039559339007, "max_sentence1_length": 554, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 137.26790185277918, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 567544 + "unique_sentence2": 1997 }, "fil_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 570603, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 146.93039559339007, "max_sentence1_length": 554, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 138.79969954932398, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570603 + "unique_sentence2": 1997 }, "fil_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 571275, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 146.93039559339007, "max_sentence1_length": 554, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 139.1362043064597, "max_sentence2_length": 431, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571275 + "unique_sentence2": 1996 }, "fil_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 603043, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 146.93039559339007, "max_sentence1_length": 554, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 155.04406609914872, "max_sentence2_length": 524, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 603043 + "unique_sentence2": 1997 }, "fil_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 607060, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 146.93039559339007, "max_sentence1_length": 554, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 157.05558337506258, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 607060 + "unique_sentence2": 1997 }, "fin_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 500981, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500981 + "unique_sentence2": 1995 }, "fin_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 513602, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513602 + "unique_sentence2": 1997 }, "fin_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 565450, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565450 + "unique_sentence2": 1996 }, "fin_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 568927, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 568927 + "unique_sentence2": 1996 }, "fin_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517520, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517520 + "unique_sentence2": 1997 }, "fin_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 513139, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513139 + "unique_sentence2": 1995 }, "fin_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 562908, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 562908 + "unique_sentence2": 1996 }, "fin_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 470096, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 470096 + "unique_sentence2": 1996 }, "fin_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 531375, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531375 + "unique_sentence2": 1996 }, "fin_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 548176, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548176 + "unique_sentence2": 1997 }, "fin_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 556772, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 556772 + "unique_sentence2": 1997 }, "fin_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 381426, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 381426 + "unique_sentence2": 1994 }, "fin_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 403205, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 403205 + "unique_sentence2": 1995 }, "fin_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 537988, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 134.29544316474713, "max_sentence2_length": 503, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537988 + "unique_sentence2": 1994 }, "fin_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 529197, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529197 + "unique_sentence2": 1995 }, "fin_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 561715, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561715 + "unique_sentence2": 1996 }, "fin_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 547666, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547666 + "unique_sentence2": 1996 }, "fin_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 547015, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547015 + "unique_sentence2": 1996 }, "fin_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544693, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544693 + "unique_sentence2": 1996 }, "fin_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 558000, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558000 + "unique_sentence2": 1996 }, "fin_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 542309, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542309 + "unique_sentence2": 1997 }, "fin_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 521627, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521627 + "unique_sentence2": 1996 }, "fin_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 579761, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 579761 + "unique_sentence2": 1997 }, "fin_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 535413, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535413 + "unique_sentence2": 1997 }, "fin_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 540921, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540921 + "unique_sentence2": 1996 }, "fin_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 361278, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 361278 + "unique_sentence2": 1996 }, "fin_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 527532, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.10265398097147, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527532 + "unique_sentence2": 1996 }, "fra_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 524289, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524289 + "unique_sentence2": 1995 }, "fra_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 536910, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536910 + "unique_sentence2": 1997 }, "fra_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 576068, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 141.6925388082123, "max_sentence2_length": 460, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 576068 + "unique_sentence2": 1997 }, "fra_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 588758, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 588758 + "unique_sentence2": 1996 }, "fra_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 592235, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 592235 + "unique_sentence2": 1996 }, "fra_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 540828, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540828 + "unique_sentence2": 1997 }, "fra_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 536447, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536447 + "unique_sentence2": 1995 }, "fra_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 562908, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 562908 + "unique_sentence2": 1996 }, "fra_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 565094, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 136.19729594391586, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565094 + "unique_sentence2": 1996 }, "fra_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 493404, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493404 + "unique_sentence2": 1996 }, "fra_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 554683, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554683 + "unique_sentence2": 1996 }, "fra_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 571484, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571484 + "unique_sentence2": 1997 }, "fra_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 580080, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580080 + "unique_sentence2": 1997 }, "fra_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 582325, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 144.82573860791186, "max_sentence2_length": 623, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582325 + "unique_sentence2": 1996 }, "fra_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 404734, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 404734 + "unique_sentence2": 1994 }, "fra_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 426513, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 426513 + "unique_sentence2": 1995 }, "fra_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 552505, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 552505 + "unique_sentence2": 1995 }, "fra_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 570583, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 138.94591887831749, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570583 + "unique_sentence2": 1996 }, "fra_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 585023, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 585023 + "unique_sentence2": 1996 }, "fra_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 570974, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570974 + "unique_sentence2": 1996 }, "fra_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 570323, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570323 + "unique_sentence2": 1996 }, "fra_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 585593, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 146.4621932899349, "max_sentence2_length": 518, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 585593 + "unique_sentence2": 1997 }, "fra_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 568001, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 568001 + "unique_sentence2": 1996 }, "fra_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 581308, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 581308 + "unique_sentence2": 1996 }, "fra_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 565617, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565617 + "unique_sentence2": 1997 }, "fra_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 544935, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544935 + "unique_sentence2": 1996 }, "fra_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 603069, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 603069 + "unique_sentence2": 1997 }, "fra_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 558721, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558721 + "unique_sentence2": 1997 }, "fra_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 564229, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564229 + "unique_sentence2": 1996 }, "fra_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 384586, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 384586 + "unique_sentence2": 1996 }, "fra_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 550840, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.7741612418628, "max_sentence1_length": 512, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550840 + "unique_sentence2": 1996 }, "fuc_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 526972, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 114.4116174261392, "max_sentence1_length": 376, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 149.47020530796195, "max_sentence2_length": 465, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526972 + "unique_sentence2": 1997 }, "fuc_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 476200, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 114.4116174261392, "max_sentence1_length": 376, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 476200 + "unique_sentence2": 1997 }, "fuc_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 467458, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 114.4116174261392, "max_sentence1_length": 376, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 119.6685027541312, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 467458 + "unique_sentence2": 1994 }, "fuc_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 532267, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 114.4116174261392, "max_sentence1_length": 376, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 152.12168252378567, "max_sentence2_length": 541, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532267 + "unique_sentence2": 1996 }, "fuc_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 526219, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 114.4116174261392, "max_sentence1_length": 376, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 149.09313970956435, "max_sentence2_length": 590, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526219 + "unique_sentence2": 1997 }, "fuc_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 512762, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 114.4116174261392, "max_sentence1_length": 376, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 142.35453179769655, "max_sentence2_length": 464, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512762 + "unique_sentence2": 1993 }, "fuc_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 526810, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 114.4116174261392, "max_sentence1_length": 376, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 149.38908362543816, "max_sentence2_length": 511, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526810 + "unique_sentence2": 1995 }, "fuc_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 528236, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 114.4116174261392, "max_sentence1_length": 376, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 150.10315473209815, "max_sentence2_length": 535, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 528236 + "unique_sentence2": 1993 }, "gle_Latn-cym_Latn": { + "num_samples": 1997, + "number_of_characters": 561314, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 147.62593890836254, "max_sentence1_length": 461, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 133.4526790185278, "max_sentence2_length": 444, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561314 + "unique_sentence2": 1997 }, "gle_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 542529, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 147.62593890836254, "max_sentence1_length": 461, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542529 + "unique_sentence2": 1997 }, "glg_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 554946, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 136.19729594391586, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 141.6925388082123, "max_sentence2_length": 460, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554946 + "unique_sentence2": 1997 }, "glg_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 519706, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 136.19729594391586, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519706 + "unique_sentence2": 1997 }, "glg_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 565094, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 136.19729594391586, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565094 + "unique_sentence2": 1996 }, "glg_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 561203, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 136.19729594391586, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 144.82573860791186, "max_sentence2_length": 623, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561203 + "unique_sentence2": 1996 }, "glg_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 549461, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 136.19729594391586, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 138.94591887831749, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549461 + "unique_sentence2": 1996 }, "glg_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 549201, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 136.19729594391586, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549201 + "unique_sentence2": 1996 }, "glg_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 564471, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 136.19729594391586, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 146.4621932899349, "max_sentence2_length": 518, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564471 + "unique_sentence2": 1997 }, "glg_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 560186, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 136.19729594391586, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560186 + "unique_sentence2": 1996 }, "guj_Gujr-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 488733, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 488733 + "unique_sentence2": 1997 }, "guj_Gujr-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 548779, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548779 + "unique_sentence2": 1996 }, "guj_Gujr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 492651, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492651 + "unique_sentence2": 1997 }, "guj_Gujr-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 520134, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520134 + "unique_sentence2": 1997 }, "guj_Gujr-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 506506, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506506 + "unique_sentence2": 1996 }, "guj_Gujr-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 510467, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510467 + "unique_sentence2": 1996 }, "guj_Gujr-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 505818, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505818 + "unique_sentence2": 1995 }, "guj_Gujr-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 493154, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493154 + "unique_sentence2": 1996 }, "guj_Gujr-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 495353, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495353 + "unique_sentence2": 1996 }, "guj_Gujr-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 503672, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503672 + "unique_sentence2": 1996 }, "guj_Gujr-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 465258, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 465258 + "unique_sentence2": 1996 }, "guj_Gujr-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 554892, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554892 + "unique_sentence2": 1997 }, "guj_Gujr-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 492458, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492458 + "unique_sentence2": 1996 }, "guj_Gujr-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 492929, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 122.64947421131697, "max_sentence1_length": 378, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492929 + "unique_sentence2": 1996 }, "hau_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 437473, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 437473 + "unique_sentence2": 1994 }, "hau_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517686, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517686 + "unique_sentence2": 1997 }, "hau_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 516067, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516067 + "unique_sentence2": 1997 }, "hau_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 561465, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561465 + "unique_sentence2": 1996 }, "hau_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 507397, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507397 + "unique_sentence2": 1984 }, "hau_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 561258, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561258 + "unique_sentence2": 1997 }, "hau_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 558108, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558108 + "unique_sentence2": 1996 }, "hau_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 542475, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542475 + "unique_sentence2": 1997 }, "hau_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 435204, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 435204 + "unique_sentence2": 1996 }, "hau_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 604249, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 604249 + "unique_sentence2": 1997 }, "hau_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 509769, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509769 + "unique_sentence2": 1990 }, "hau_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 538056, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538056 + "unique_sentence2": 1997 }, "hau_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 586054, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 586054 + "unique_sentence2": 1996 }, "hau_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 527698, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 135.185778668002, "max_sentence1_length": 483, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527698 + "unique_sentence2": 1996 }, "heb_Hebr-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 431477, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 431477 + "unique_sentence2": 1995 }, "heb_Hebr-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 444098, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 444098 + "unique_sentence2": 1997 }, "heb_Hebr-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 452663, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 126.37305958938407, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 452663 + "unique_sentence2": 1995 }, "heb_Hebr-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 495946, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495946 + "unique_sentence2": 1996 }, "heb_Hebr-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 499423, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499423 + "unique_sentence2": 1996 }, "heb_Hebr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 448016, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 448016 + "unique_sentence2": 1997 }, "heb_Hebr-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 443635, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 443635 + "unique_sentence2": 1995 }, "heb_Hebr-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 470096, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 470096 + "unique_sentence2": 1996 }, "heb_Hebr-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 493404, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493404 + "unique_sentence2": 1996 }, "heb_Hebr-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 461871, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 461871 + "unique_sentence2": 1996 }, "heb_Hebr-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 478672, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 478672 + "unique_sentence2": 1997 }, "heb_Hebr-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 487268, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 487268 + "unique_sentence2": 1997 }, "heb_Hebr-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 311922, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 311922 + "unique_sentence2": 1994 }, "heb_Hebr-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 446242, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 123.15773660490736, "max_sentence2_length": 420, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 446242 + "unique_sentence2": 1996 }, "heb_Hebr-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 333701, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 333701 + "unique_sentence2": 1995 }, "heb_Hebr-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 459693, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 459693 + "unique_sentence2": 1995 }, "heb_Hebr-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 414131, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 107.07811717576365, "max_sentence2_length": 392, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 414131 + "unique_sentence2": 1993 }, "heb_Hebr-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 492211, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492211 + "unique_sentence2": 1996 }, "heb_Hebr-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 478162, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 478162 + "unique_sentence2": 1996 }, "heb_Hebr-por_Latn": { + "num_samples": 1997, + "number_of_characters": 477511, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 477511 + "unique_sentence2": 1996 }, "heb_Hebr-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 442832, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 121.45017526289435, "max_sentence2_length": 365, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 442832 + "unique_sentence2": 1997 }, "heb_Hebr-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 442929, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 121.49874812218327, "max_sentence2_length": 366, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 442929 + "unique_sentence2": 1996 }, "heb_Hebr-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 475189, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 475189 + "unique_sentence2": 1996 }, "heb_Hebr-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 415209, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 107.6179268903355, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 415209 + "unique_sentence2": 1996 }, "heb_Hebr-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 488496, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 488496 + "unique_sentence2": 1996 }, "heb_Hebr-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 472805, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 472805 + "unique_sentence2": 1997 }, "heb_Hebr-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 452123, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 452123 + "unique_sentence2": 1996 }, "heb_Hebr-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 510257, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510257 + "unique_sentence2": 1997 }, "heb_Hebr-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 474443, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 137.27941912869304, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 474443 + "unique_sentence2": 1995 }, "heb_Hebr-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 465909, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 465909 + "unique_sentence2": 1997 }, "heb_Hebr-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 471417, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 471417 + "unique_sentence2": 1996 }, "heb_Hebr-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 291774, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 291774 + "unique_sentence2": 1996 }, "heb_Hebr-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 458028, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 100.29844767150726, "max_sentence1_length": 375, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 458028 + "unique_sentence2": 1996 }, "hin_Deva-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 492756, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492756 + "unique_sentence2": 1995 }, "hin_Deva-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 505377, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505377 + "unique_sentence2": 1997 }, "hin_Deva-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 557225, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557225 + "unique_sentence2": 1996 }, "hin_Deva-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 565423, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565423 + "unique_sentence2": 1996 }, "hin_Deva-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 560702, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560702 + "unique_sentence2": 1996 }, "hin_Deva-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 509295, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509295 + "unique_sentence2": 1997 }, "hin_Deva-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 536778, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536778 + "unique_sentence2": 1997 }, "hin_Deva-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 504914, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 504914 + "unique_sentence2": 1995 }, "hin_Deva-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 531375, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531375 + "unique_sentence2": 1996 }, "hin_Deva-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 554683, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554683 + "unique_sentence2": 1996 }, "hin_Deva-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 506506, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506506 + "unique_sentence2": 1997 }, "hin_Deva-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 461871, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 461871 + "unique_sentence2": 1996 }, "hin_Deva-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 539951, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539951 + "unique_sentence2": 1997 }, "hin_Deva-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 548547, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548547 + "unique_sentence2": 1997 }, "hin_Deva-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 373201, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 373201 + "unique_sentence2": 1994 }, "hin_Deva-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 527111, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527111 + "unique_sentence2": 1996 }, "hin_Deva-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 394980, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 394980 + "unique_sentence2": 1995 }, "hin_Deva-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 520972, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520972 + "unique_sentence2": 1995 }, "hin_Deva-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 522462, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522462 + "unique_sentence2": 1995 }, "hin_Deva-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 509798, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509798 + "unique_sentence2": 1996 }, "hin_Deva-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 553490, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553490 + "unique_sentence2": 1996 }, "hin_Deva-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 511997, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511997 + "unique_sentence2": 1996 }, "hin_Deva-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 539441, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539441 + "unique_sentence2": 1996 }, "hin_Deva-por_Latn": { + "num_samples": 1997, + "number_of_characters": 538790, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538790 + "unique_sentence2": 1996 }, "hin_Deva-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 536468, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536468 + "unique_sentence2": 1996 }, "hin_Deva-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 520316, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520316 + "unique_sentence2": 1996 }, "hin_Deva-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 481902, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 481902 + "unique_sentence2": 1996 }, "hin_Deva-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 549775, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549775 + "unique_sentence2": 1996 }, "hin_Deva-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 534084, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534084 + "unique_sentence2": 1997 }, "hin_Deva-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 513402, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513402 + "unique_sentence2": 1996 }, "hin_Deva-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 571536, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571536 + "unique_sentence2": 1997 }, "hin_Deva-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 509102, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509102 + "unique_sentence2": 1996 }, "hin_Deva-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 527188, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527188 + "unique_sentence2": 1997 }, "hin_Deva-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 509573, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509573 + "unique_sentence2": 1996 }, "hin_Deva-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 532696, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532696 + "unique_sentence2": 1996 }, "hin_Deva-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 353053, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 353053 + "unique_sentence2": 1996 }, "hin_Deva-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 519307, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 130.9839759639459, "max_sentence1_length": 394, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519307 + "unique_sentence2": 1996 }, "hmn_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 578510, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 165.6434651977967, "max_sentence1_length": 643, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578510 + "unique_sentence2": 1997 }, "hrv_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 512015, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512015 + "unique_sentence2": 1996 }, "hrv_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 510835, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510835 + "unique_sentence2": 1996 }, "hrv_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525814, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525814 + "unique_sentence2": 1996 }, "hrv_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 497243, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497243 + "unique_sentence2": 1997 }, "hrv_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 503645, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503645 + "unique_sentence2": 1997 }, "hrv_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 523816, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523816 + "unique_sentence2": 1997 }, "hrv_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 533791, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 533791 + "unique_sentence2": 1996 }, "hrv_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530818, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530818 + "unique_sentence2": 1996 }, "hrv_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 508894, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508894 + "unique_sentence2": 1996 }, "hrv_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 508821, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508821 + "unique_sentence2": 1996 }, "hrv_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508228, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508228 + "unique_sentence2": 1995 }, "hrv_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 512066, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512066 + "unique_sentence2": 1996 }, "hrv_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518708, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 128.1547320981472, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518708 + "unique_sentence2": 1996 }, "hun_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 509557, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509557 + "unique_sentence2": 1995 }, "hun_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 522178, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522178 + "unique_sentence2": 1997 }, "hun_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 574026, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 574026 + "unique_sentence2": 1996 }, "hun_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 577503, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 577503 + "unique_sentence2": 1996 }, "hun_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 526096, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526096 + "unique_sentence2": 1997 }, "hun_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 521715, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521715 + "unique_sentence2": 1995 }, "hun_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 548176, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548176 + "unique_sentence2": 1996 }, "hun_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 571484, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571484 + "unique_sentence2": 1996 }, "hun_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 478672, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 478672 + "unique_sentence2": 1996 }, "hun_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 539951, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539951 + "unique_sentence2": 1996 }, "hun_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 565348, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565348 + "unique_sentence2": 1997 }, "hun_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 390002, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 390002 + "unique_sentence2": 1994 }, "hun_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 411781, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 411781 + "unique_sentence2": 1995 }, "hun_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 546564, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 134.29544316474713, "max_sentence2_length": 503, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546564 + "unique_sentence2": 1994 }, "hun_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 537773, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537773 + "unique_sentence2": 1995 }, "hun_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 570291, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570291 + "unique_sentence2": 1996 }, "hun_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 556242, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 556242 + "unique_sentence2": 1996 }, "hun_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 555591, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555591 + "unique_sentence2": 1996 }, "hun_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 553269, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553269 + "unique_sentence2": 1996 }, "hun_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 566576, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566576 + "unique_sentence2": 1996 }, "hun_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 550885, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550885 + "unique_sentence2": 1997 }, "hun_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 530203, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530203 + "unique_sentence2": 1996 }, "hun_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 588337, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 588337 + "unique_sentence2": 1997 }, "hun_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 543989, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543989 + "unique_sentence2": 1997 }, "hun_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 549497, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549497 + "unique_sentence2": 1996 }, "hun_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 369854, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 369854 + "unique_sentence2": 1996 }, "hun_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 536108, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 139.3970956434652, "max_sentence1_length": 508, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536108 + "unique_sentence2": 1996 }, "hye_Armn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 563842, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 132.55633450175262, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563842 + "unique_sentence2": 1996 }, "hye_Armn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 512435, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 132.55633450175262, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512435 + "unique_sentence2": 1997 }, "hye_Armn-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 531307, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 132.55633450175262, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 133.49624436654983, "max_sentence2_length": 503, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531307 + "unique_sentence2": 1995 }, "hye_Armn-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 548322, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 132.55633450175262, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 142.01652478718077, "max_sentence2_length": 461, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548322 + "unique_sentence2": 1996 }, "ibo_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 413608, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 413608 + "unique_sentence2": 1994 }, "ibo_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 493821, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493821 + "unique_sentence2": 1997 }, "ibo_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 516067, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516067 + "unique_sentence2": 1997 }, "ibo_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 537600, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537600 + "unique_sentence2": 1996 }, "ibo_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 483532, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 483532 + "unique_sentence2": 1984 }, "ibo_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 537393, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537393 + "unique_sentence2": 1997 }, "ibo_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 534243, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534243 + "unique_sentence2": 1996 }, "ibo_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 518610, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518610 + "unique_sentence2": 1997 }, "ibo_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 411339, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 411339 + "unique_sentence2": 1996 }, "ibo_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 580384, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580384 + "unique_sentence2": 1997 }, "ibo_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 485904, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485904 + "unique_sentence2": 1990 }, "ibo_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 514191, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514191 + "unique_sentence2": 1997 }, "ibo_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 562189, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 562189 + "unique_sentence2": 1996 }, "ibo_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 503833, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 123.23535302954431, "max_sentence1_length": 469, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503833 + "unique_sentence2": 1996 }, "ind_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 518153, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518153 + "unique_sentence2": 1995 }, "ind_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 530774, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530774 + "unique_sentence2": 1997 }, "ind_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 582622, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582622 + "unique_sentence2": 1996 }, "ind_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 586099, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 586099 + "unique_sentence2": 1996 }, "ind_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 534692, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534692 + "unique_sentence2": 1997 }, "ind_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 530311, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530311 + "unique_sentence2": 1995 }, "ind_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 587477, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 150.478217325989, "max_sentence2_length": 448, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587477 + "unique_sentence2": 1988 }, "ind_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 580392, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 146.93039559339007, "max_sentence2_length": 554, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580392 + "unique_sentence2": 1997 }, "ind_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 556772, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 556772 + "unique_sentence2": 1996 }, "ind_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 580080, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580080 + "unique_sentence2": 1996 }, "ind_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 487268, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 487268 + "unique_sentence2": 1996 }, "ind_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 548547, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548547 + "unique_sentence2": 1996 }, "ind_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 565348, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565348 + "unique_sentence2": 1997 }, "ind_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 398598, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 398598 + "unique_sentence2": 1994 }, "ind_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 420377, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 420377 + "unique_sentence2": 1995 }, "ind_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 546369, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546369 + "unique_sentence2": 1995 }, "ind_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 591124, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 152.30445668502753, "max_sentence2_length": 540, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 591124 + "unique_sentence2": 1996 }, "ind_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 607280, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 160.39459188783175, "max_sentence2_length": 559, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 607280 + "unique_sentence2": 1994 }, "ind_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 561096, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 137.26790185277918, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561096 + "unique_sentence2": 1997 }, "ind_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 564155, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 138.79969954932398, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564155 + "unique_sentence2": 1997 }, "ind_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 578887, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578887 + "unique_sentence2": 1996 }, "ind_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 564838, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564838 + "unique_sentence2": 1996 }, "ind_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 564187, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564187 + "unique_sentence2": 1996 }, "ind_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 561865, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561865 + "unique_sentence2": 1996 }, "ind_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 564827, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 139.1362043064597, "max_sentence2_length": 431, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564827 + "unique_sentence2": 1996 }, "ind_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 575172, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 575172 + "unique_sentence2": 1996 }, "ind_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 559481, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559481 + "unique_sentence2": 1997 }, "ind_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 538799, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538799 + "unique_sentence2": 1996 }, "ind_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 596595, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 155.04406609914872, "max_sentence2_length": 524, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596595 + "unique_sentence2": 1997 }, "ind_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 596933, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596933 + "unique_sentence2": 1997 }, "ind_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 600612, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 157.05558337506258, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 600612 + "unique_sentence2": 1997 }, "ind_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 552585, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 552585 + "unique_sentence2": 1997 }, "ind_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 558093, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558093 + "unique_sentence2": 1996 }, "ind_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 378450, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 378450 + "unique_sentence2": 1996 }, "ind_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 544704, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 143.70155232849274, "max_sentence1_length": 486, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544704 + "unique_sentence2": 1996 }, "isl_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 530560, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 131.30095142714072, "max_sentence1_length": 399, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 134.37756634952427, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530560 + "unique_sentence2": 1996 }, "isl_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 514346, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 131.30095142714072, "max_sentence1_length": 399, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 126.25838758137206, "max_sentence2_length": 522, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514346 + "unique_sentence2": 1995 }, "isl_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 557858, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 131.30095142714072, "max_sentence1_length": 399, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557858 + "unique_sentence2": 1996 }, "isl_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 509928, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 131.30095142714072, "max_sentence1_length": 399, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509928 + "unique_sentence2": 1997 }, "isl_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 520011, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 131.30095142714072, "max_sentence1_length": 399, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0951427140711, "max_sentence2_length": 433, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520011 + "unique_sentence2": 1997 }, "isl_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 542965, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 131.30095142714072, "max_sentence1_length": 399, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 140.58938407611416, "max_sentence2_length": 543, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542965 + "unique_sentence2": 1996 }, "isl_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 554123, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 131.30095142714072, "max_sentence1_length": 399, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554123 + "unique_sentence2": 1996 }, "isl_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 510565, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 131.30095142714072, "max_sentence1_length": 399, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.36504757135704, "max_sentence2_length": 417, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510565 + "unique_sentence2": 1996 }, "isl_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 513652, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 131.30095142714072, "max_sentence1_length": 399, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.91086629944917, "max_sentence2_length": 482, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513652 + "unique_sentence2": 1996 }, "isl_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 514035, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 131.30095142714072, "max_sentence1_length": 399, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514035 + "unique_sentence2": 1996 }, "ita_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 572177, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 144.82573860791186, "max_sentence1_length": 623, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 141.6925388082123, "max_sentence2_length": 460, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 572177 + "unique_sentence2": 1997 }, "ita_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 536937, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 144.82573860791186, "max_sentence1_length": 623, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536937 + "unique_sentence2": 1997 }, "ita_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 582325, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 144.82573860791186, "max_sentence1_length": 623, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582325 + "unique_sentence2": 1996 }, "ita_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 561203, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 144.82573860791186, "max_sentence1_length": 623, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 136.19729594391586, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561203 + "unique_sentence2": 1996 }, "ita_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 566692, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 144.82573860791186, "max_sentence1_length": 623, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 138.94591887831749, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566692 + "unique_sentence2": 1996 }, "ita_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 566432, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 144.82573860791186, "max_sentence1_length": 623, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566432 + "unique_sentence2": 1996 }, "ita_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 581702, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 144.82573860791186, "max_sentence1_length": 623, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 146.4621932899349, "max_sentence2_length": 518, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 581702 + "unique_sentence2": 1997 }, "ita_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 577417, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 144.82573860791186, "max_sentence1_length": 623, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 577417 + "unique_sentence2": 1996 }, "jpn_Jpan-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 342807, + "unique_pairs": 1995, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 342807 + "unique_sentence2": 1995 }, "jpn_Jpan-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 355428, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 355428 + "unique_sentence2": 1997 }, "jpn_Jpan-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 407276, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 407276 + "unique_sentence2": 1996 }, "jpn_Jpan-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 410753, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 410753 + "unique_sentence2": 1996 }, "jpn_Jpan-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 359346, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 359346 + "unique_sentence2": 1997 }, "jpn_Jpan-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 354965, + "unique_pairs": 1995, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 354965 + "unique_sentence2": 1995 }, "jpn_Jpan-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 381426, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 381426 + "unique_sentence2": 1996 }, "jpn_Jpan-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 404734, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 404734 + "unique_sentence2": 1996 }, "jpn_Jpan-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 311922, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 311922 + "unique_sentence2": 1996 }, "jpn_Jpan-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 373201, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 373201 + "unique_sentence2": 1996 }, "jpn_Jpan-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 390002, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 390002 + "unique_sentence2": 1997 }, "jpn_Jpan-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 398598, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 398598 + "unique_sentence2": 1997 }, "jpn_Jpan-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 245031, + "unique_pairs": 1995, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 245031 + "unique_sentence2": 1995 }, "jpn_Jpan-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 371023, + "unique_pairs": 1995, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 371023 + "unique_sentence2": 1995 }, "jpn_Jpan-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 403541, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 403541 + "unique_sentence2": 1996 }, "jpn_Jpan-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 389492, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 389492 + "unique_sentence2": 1996 }, "jpn_Jpan-por_Latn": { + "num_samples": 1997, + "number_of_characters": 388841, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 388841 + "unique_sentence2": 1996 }, "jpn_Jpan-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 386519, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 386519 + "unique_sentence2": 1996 }, "jpn_Jpan-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 399826, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 399826 + "unique_sentence2": 1996 }, "jpn_Jpan-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 384135, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 384135 + "unique_sentence2": 1997 }, "jpn_Jpan-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 363453, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 363453 + "unique_sentence2": 1996 }, "jpn_Jpan-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 421587, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 421587 + "unique_sentence2": 1997 }, "jpn_Jpan-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 377239, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 377239 + "unique_sentence2": 1997 }, "jpn_Jpan-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 382747, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 382747 + "unique_sentence2": 1996 }, "jpn_Jpan-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 190513, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 4, "average_sentence2_length": 39.502754131196795, "max_sentence2_length": 133, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 190513 + "unique_sentence2": 1996 }, "jpn_Jpan-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 196587, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 4, "average_sentence2_length": 42.54431647471207, "max_sentence2_length": 263, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 196587 + "unique_sentence2": 1997 }, "jpn_Jpan-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 203104, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 203104 + "unique_sentence2": 1996 }, "jpn_Jpan-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 369358, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 55.89684526790185, "max_sentence1_length": 189, + "unique_sentence1": 1994, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 369358 + "unique_sentence2": 1996 }, "kan_Knda-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 509338, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509338 + "unique_sentence2": 1997 }, "kan_Knda-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 569384, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569384 + "unique_sentence2": 1996 }, "kan_Knda-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 513256, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513256 + "unique_sentence2": 1997 }, "kan_Knda-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 540739, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540739 + "unique_sentence2": 1997 }, "kan_Knda-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 510467, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510467 + "unique_sentence2": 1997 }, "kan_Knda-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 527111, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527111 + "unique_sentence2": 1996 }, "kan_Knda-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 526423, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526423 + "unique_sentence2": 1995 }, "kan_Knda-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 513759, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513759 + "unique_sentence2": 1996 }, "kan_Knda-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 515958, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515958 + "unique_sentence2": 1996 }, "kan_Knda-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 524277, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524277 + "unique_sentence2": 1996 }, "kan_Knda-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 485863, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485863 + "unique_sentence2": 1996 }, "kan_Knda-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 575497, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 575497 + "unique_sentence2": 1997 }, "kan_Knda-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 513063, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513063 + "unique_sentence2": 1996 }, "kan_Knda-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 513534, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 132.96745117676514, "max_sentence1_length": 449, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513534 + "unique_sentence2": 1996 }, "kat_Geor-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 565719, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 133.49624436654983, "max_sentence1_length": 503, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565719 + "unique_sentence2": 1996 }, "kat_Geor-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 514312, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 133.49624436654983, "max_sentence1_length": 503, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514312 + "unique_sentence2": 1997 }, "kat_Geor-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 531307, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 133.49624436654983, "max_sentence1_length": 503, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 132.55633450175262, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531307 + "unique_sentence2": 1996 }, "kat_Geor-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 550199, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 133.49624436654983, "max_sentence1_length": 503, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 142.01652478718077, "max_sentence2_length": 461, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550199 + "unique_sentence2": 1996 }, "kaz_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 529910, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 130.33350025037555, "max_sentence1_length": 473, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 135.0195292939409, "max_sentence2_length": 398, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529910 + "unique_sentence2": 1997 }, "kaz_Cyrl-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506602, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 130.33350025037555, "max_sentence1_length": 473, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 123.34802203304957, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506602 + "unique_sentence2": 1995 }, "kaz_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 507996, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 130.33350025037555, "max_sentence1_length": 473, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507996 + "unique_sentence2": 1997 }, "kaz_Cyrl-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511140, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 130.33350025037555, "max_sentence1_length": 473, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.62043064596895, "max_sentence2_length": 395, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511140 + "unique_sentence2": 1996 }, "kaz_Cyrl-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506202, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 130.33350025037555, "max_sentence1_length": 473, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 123.14772158237356, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506202 + "unique_sentence2": 1996 }, "kaz_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 545550, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 130.33350025037555, "max_sentence1_length": 473, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 142.85127691537306, "max_sentence2_length": 576, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 545550 + "unique_sentence2": 1996 }, "kaz_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 525889, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 130.33350025037555, "max_sentence1_length": 473, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525889 + "unique_sentence2": 1997 }, "kaz_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 571298, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 130.33350025037555, "max_sentence1_length": 473, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 155.74461692538807, "max_sentence2_length": 592, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571298 + "unique_sentence2": 1996 }, "kaz_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 553971, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 130.33350025037555, "max_sentence1_length": 473, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 147.06810215322986, "max_sentence2_length": 470, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553971 + "unique_sentence2": 1996 }, "khm_Khmr-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 589120, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 144.4621932899349, "max_sentence1_length": 517, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 150.54031046569855, "max_sentence2_length": 478, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 589120 + "unique_sentence2": 1993 }, "khm_Khmr-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 531712, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 144.4621932899349, "max_sentence1_length": 517, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 121.79318978467701, "max_sentence2_length": 411, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531712 + "unique_sentence2": 1992 }, "khm_Khmr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 536211, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 144.4621932899349, "max_sentence1_length": 517, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536211 + "unique_sentence2": 1997 }, "khm_Khmr-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 555471, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 144.4621932899349, "max_sentence1_length": 517, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 133.69053580370556, "max_sentence2_length": 507, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555471 + "unique_sentence2": 1997 }, "khm_Khmr-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 547539, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 144.4621932899349, "max_sentence1_length": 517, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 129.7185778668002, "max_sentence2_length": 414, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547539 + "unique_sentence2": 1997 }, "khm_Khmr-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 600345, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 144.4621932899349, "max_sentence1_length": 517, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 156.16124186279418, "max_sentence2_length": 773, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 600345 + "unique_sentence2": 1997 }, "khm_Khmr-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 525959, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 144.4621932899349, "max_sentence1_length": 517, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 118.91236855282925, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525959 + "unique_sentence2": 1996 }, "kin_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 602279, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 152.12168252378567, "max_sentence1_length": 541, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 149.47020530796195, "max_sentence2_length": 465, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 602279 + "unique_sentence2": 1997 }, "kin_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 551507, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 152.12168252378567, "max_sentence1_length": 541, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551507 + "unique_sentence2": 1997 }, "kin_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 542765, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 152.12168252378567, "max_sentence1_length": 541, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 119.6685027541312, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542765 + "unique_sentence2": 1994 }, "kin_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 532267, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 152.12168252378567, "max_sentence1_length": 541, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 114.4116174261392, "max_sentence2_length": 376, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532267 + "unique_sentence2": 1996 }, "kin_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 601526, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 152.12168252378567, "max_sentence1_length": 541, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 149.09313970956435, "max_sentence2_length": 590, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 601526 + "unique_sentence2": 1997 }, "kin_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 588069, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 152.12168252378567, "max_sentence1_length": 541, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 142.35453179769655, "max_sentence2_length": 464, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 588069 + "unique_sentence2": 1993 }, "kin_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 602117, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 152.12168252378567, "max_sentence1_length": 541, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 149.38908362543816, "max_sentence2_length": 511, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 602117 + "unique_sentence2": 1995 }, "kin_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 603543, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 152.12168252378567, "max_sentence1_length": 541, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 150.10315473209815, "max_sentence2_length": 535, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 603543 + "unique_sentence2": 1993 }, "kir_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 520498, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 125.62043064596895, "max_sentence1_length": 395, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 135.0195292939409, "max_sentence2_length": 398, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520498 + "unique_sentence2": 1997 }, "kir_Cyrl-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 497190, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 125.62043064596895, "max_sentence1_length": 395, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 123.34802203304957, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497190 + "unique_sentence2": 1995 }, "kir_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 498584, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 125.62043064596895, "max_sentence1_length": 395, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 498584 + "unique_sentence2": 1997 }, "kir_Cyrl-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511140, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.62043064596895, "max_sentence1_length": 395, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 130.33350025037555, "max_sentence2_length": 473, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511140 + "unique_sentence2": 1996 }, "kir_Cyrl-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 496790, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.62043064596895, "max_sentence1_length": 395, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 123.14772158237356, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496790 + "unique_sentence2": 1996 }, "kir_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 536138, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.62043064596895, "max_sentence1_length": 395, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 142.85127691537306, "max_sentence2_length": 576, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536138 + "unique_sentence2": 1996 }, "kir_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 516477, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 125.62043064596895, "max_sentence1_length": 395, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516477 + "unique_sentence2": 1997 }, "kir_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 561886, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 125.62043064596895, "max_sentence1_length": 395, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 155.74461692538807, "max_sentence2_length": 592, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561886 + "unique_sentence2": 1996 }, "kir_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 544559, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.62043064596895, "max_sentence1_length": 395, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 147.06810215322986, "max_sentence2_length": 470, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544559 + "unique_sentence2": 1996 }, "kmr_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 477127, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 123.15773660490736, "max_sentence1_length": 420, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 477127 + "unique_sentence2": 1995 }, "kmr_Latn-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 498313, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 123.15773660490736, "max_sentence1_length": 420, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 126.37305958938407, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 498313 + "unique_sentence2": 1995 }, "kmr_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 493666, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 123.15773660490736, "max_sentence1_length": 420, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493666 + "unique_sentence2": 1997 }, "kmr_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 489285, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 123.15773660490736, "max_sentence1_length": 420, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 489285 + "unique_sentence2": 1995 }, "kmr_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 446242, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 123.15773660490736, "max_sentence1_length": 420, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 446242 + "unique_sentence2": 1996 }, "kmr_Latn-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 459781, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 123.15773660490736, "max_sentence1_length": 420, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 107.07811717576365, "max_sentence2_length": 392, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 459781 + "unique_sentence2": 1993 }, "kmr_Latn-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 488482, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 123.15773660490736, "max_sentence1_length": 420, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 121.45017526289435, "max_sentence2_length": 365, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 488482 + "unique_sentence2": 1997 }, "kmr_Latn-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 488579, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 123.15773660490736, "max_sentence1_length": 420, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 121.49874812218327, "max_sentence2_length": 366, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 488579 + "unique_sentence2": 1996 }, "kmr_Latn-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 460859, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 123.15773660490736, "max_sentence1_length": 420, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 107.6179268903355, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 460859 + "unique_sentence2": 1996 }, "kmr_Latn-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520093, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 123.15773660490736, "max_sentence1_length": 420, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 137.27941912869304, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520093 + "unique_sentence2": 1995 }, "kor_Hang-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 364586, + "unique_pairs": 1995, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 364586 + "unique_sentence2": 1995 }, "kor_Hang-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 377207, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 377207 + "unique_sentence2": 1997 }, "kor_Hang-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 429055, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 429055 + "unique_sentence2": 1996 }, "kor_Hang-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 432532, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 432532 + "unique_sentence2": 1996 }, "kor_Hang-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 381125, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 381125 + "unique_sentence2": 1997 }, "kor_Hang-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 376744, + "unique_pairs": 1995, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 376744 + "unique_sentence2": 1995 }, "kor_Hang-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 403205, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 403205 + "unique_sentence2": 1996 }, "kor_Hang-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 426513, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 426513 + "unique_sentence2": 1996 }, "kor_Hang-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 333701, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 333701 + "unique_sentence2": 1996 }, "kor_Hang-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 394980, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 394980 + "unique_sentence2": 1996 }, "kor_Hang-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 411781, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 411781 + "unique_sentence2": 1997 }, "kor_Hang-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 420377, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 420377 + "unique_sentence2": 1997 }, "kor_Hang-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 245031, + "unique_pairs": 1995, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 245031 + "unique_sentence2": 1994 }, "kor_Hang-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 392802, + "unique_pairs": 1995, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 392802 + "unique_sentence2": 1995 }, "kor_Hang-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 425320, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 425320 + "unique_sentence2": 1996 }, "kor_Hang-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 411271, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 411271 + "unique_sentence2": 1996 }, "kor_Hang-por_Latn": { + "num_samples": 1997, + "number_of_characters": 410620, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 410620 + "unique_sentence2": 1996 }, "kor_Hang-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 408298, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 408298 + "unique_sentence2": 1996 }, "kor_Hang-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 421605, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 421605 + "unique_sentence2": 1996 }, "kor_Hang-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 405914, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 405914 + "unique_sentence2": 1997 }, "kor_Hang-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 385232, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 385232 + "unique_sentence2": 1996 }, "kor_Hang-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 443366, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 443366 + "unique_sentence2": 1997 }, "kor_Hang-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 399018, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 399018 + "unique_sentence2": 1997 }, "kor_Hang-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 404526, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 404526 + "unique_sentence2": 1996 }, "kor_Hang-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 212292, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 4, "average_sentence2_length": 39.502754131196795, "max_sentence2_length": 133, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 212292 + "unique_sentence2": 1996 }, "kor_Hang-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 218366, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 4, "average_sentence2_length": 42.54431647471207, "max_sentence2_length": 263, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 218366 + "unique_sentence2": 1997 }, "kor_Hang-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 224883, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 224883 + "unique_sentence2": 1996 }, "kor_Hang-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 391137, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 66.80270405608412, "max_sentence1_length": 217, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 391137 + "unique_sentence2": 1996 }, "lao_Laoo-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 567609, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 133.69053580370556, "max_sentence1_length": 507, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 150.54031046569855, "max_sentence2_length": 478, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 567609 + "unique_sentence2": 1993 }, "lao_Laoo-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 510201, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 133.69053580370556, "max_sentence1_length": 507, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 121.79318978467701, "max_sentence2_length": 411, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510201 + "unique_sentence2": 1992 }, "lao_Laoo-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 514700, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 133.69053580370556, "max_sentence1_length": 507, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514700 + "unique_sentence2": 1997 }, "lao_Laoo-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 555471, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 133.69053580370556, "max_sentence1_length": 507, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 144.4621932899349, "max_sentence2_length": 517, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555471 + "unique_sentence2": 1996 }, "lao_Laoo-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 526028, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 133.69053580370556, "max_sentence1_length": 507, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 129.7185778668002, "max_sentence2_length": 414, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526028 + "unique_sentence2": 1997 }, "lao_Laoo-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 578834, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 133.69053580370556, "max_sentence1_length": 507, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 156.16124186279418, "max_sentence2_length": 773, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578834 + "unique_sentence2": 1997 }, "lao_Laoo-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 504448, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 133.69053580370556, "max_sentence1_length": 507, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 118.91236855282925, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 504448 + "unique_sentence2": 1996 }, "lav_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 515908, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 134.29544316474713, "max_sentence1_length": 503, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515908 + "unique_sentence2": 1997 }, "lav_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 537988, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 134.29544316474713, "max_sentence1_length": 503, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537988 + "unique_sentence2": 1996 }, "lav_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 546564, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 134.29544316474713, "max_sentence1_length": 503, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546564 + "unique_sentence2": 1997 }, "lav_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 527585, + "unique_pairs": 1995, "min_sentence1_length": 7, "average_sentence1_length": 134.29544316474713, "max_sentence1_length": 503, + "unique_sentence1": 1994, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527585 + "unique_sentence2": 1995 }, "lit_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 490578, + "unique_pairs": 1995, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 490578 + "unique_sentence2": 1995 }, "lit_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 503199, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503199 + "unique_sentence2": 1997 }, "lit_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 555047, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555047 + "unique_sentence2": 1996 }, "lit_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 558524, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558524 + "unique_sentence2": 1996 }, "lit_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 507117, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507117 + "unique_sentence2": 1997 }, "lit_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 502736, + "unique_pairs": 1995, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 502736 + "unique_sentence2": 1995 }, "lit_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 529197, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529197 + "unique_sentence2": 1996 }, "lit_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 552505, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 552505 + "unique_sentence2": 1996 }, "lit_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 459693, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 459693 + "unique_sentence2": 1996 }, "lit_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 520972, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520972 + "unique_sentence2": 1996 }, "lit_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 537773, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537773 + "unique_sentence2": 1997 }, "lit_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 546369, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546369 + "unique_sentence2": 1997 }, "lit_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 371023, + "unique_pairs": 1995, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 371023 + "unique_sentence2": 1994 }, "lit_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 392802, + "unique_pairs": 1995, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 392802 + "unique_sentence2": 1995 }, "lit_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 527585, + "unique_pairs": 1995, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 134.29544316474713, "max_sentence2_length": 503, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527585 + "unique_sentence2": 1994 }, "lit_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 551312, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551312 + "unique_sentence2": 1996 }, "lit_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 537263, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537263 + "unique_sentence2": 1996 }, "lit_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 536612, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536612 + "unique_sentence2": 1996 }, "lit_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 534290, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534290 + "unique_sentence2": 1996 }, "lit_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 547597, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547597 + "unique_sentence2": 1996 }, "lit_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 531906, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531906 + "unique_sentence2": 1997 }, "lit_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 511224, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511224 + "unique_sentence2": 1996 }, "lit_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 569358, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569358 + "unique_sentence2": 1997 }, "lit_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 525010, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525010 + "unique_sentence2": 1997 }, "lit_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 530518, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530518 + "unique_sentence2": 1996 }, "lit_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 350875, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 350875 + "unique_sentence2": 1996 }, "lit_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 517129, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 129.893340010015, "max_sentence1_length": 446, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517129 + "unique_sentence2": 1996 }, "ltz_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 549109, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 140.58938407611416, "max_sentence1_length": 543, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 134.37756634952427, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549109 + "unique_sentence2": 1996 }, "ltz_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 532895, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 140.58938407611416, "max_sentence1_length": 543, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 126.25838758137206, "max_sentence2_length": 522, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532895 + "unique_sentence2": 1995 }, "ltz_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 576407, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 140.58938407611416, "max_sentence1_length": 543, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 576407 + "unique_sentence2": 1996 }, "ltz_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 528477, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 140.58938407611416, "max_sentence1_length": 543, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 528477 + "unique_sentence2": 1997 }, "ltz_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 538560, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 140.58938407611416, "max_sentence1_length": 543, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0951427140711, "max_sentence2_length": 433, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538560 + "unique_sentence2": 1997 }, "ltz_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 542965, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 140.58938407611416, "max_sentence1_length": 543, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 131.30095142714072, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542965 + "unique_sentence2": 1996 }, "ltz_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 572672, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 140.58938407611416, "max_sentence1_length": 543, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 572672 + "unique_sentence2": 1996 }, "ltz_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 529114, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 140.58938407611416, "max_sentence1_length": 543, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.36504757135704, "max_sentence2_length": 417, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529114 + "unique_sentence2": 1996 }, "ltz_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 532201, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 140.58938407611416, "max_sentence1_length": 543, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.91086629944917, "max_sentence2_length": 482, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532201 + "unique_sentence2": 1996 }, "ltz_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 532584, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 140.58938407611416, "max_sentence1_length": 543, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532584 + "unique_sentence2": 1996 }, "mal_Mlym-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 551872, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 152.30445668502753, "max_sentence1_length": 540, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551872 + "unique_sentence2": 1997 }, "mal_Mlym-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 604657, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 152.30445668502753, "max_sentence1_length": 540, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 150.478217325989, "max_sentence2_length": 448, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 604657 + "unique_sentence2": 1988 }, "mal_Mlym-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 597572, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 152.30445668502753, "max_sentence1_length": 540, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 146.93039559339007, "max_sentence2_length": 554, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 597572 + "unique_sentence2": 1997 }, "mal_Mlym-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 591124, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 152.30445668502753, "max_sentence1_length": 540, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 591124 + "unique_sentence2": 1997 }, "mal_Mlym-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 624460, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 152.30445668502753, "max_sentence1_length": 540, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 160.39459188783175, "max_sentence2_length": 559, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 624460 + "unique_sentence2": 1994 }, "mal_Mlym-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 578276, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 152.30445668502753, "max_sentence1_length": 540, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 137.26790185277918, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578276 + "unique_sentence2": 1997 }, "mal_Mlym-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 581335, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 152.30445668502753, "max_sentence1_length": 540, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 138.79969954932398, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 581335 + "unique_sentence2": 1997 }, "mal_Mlym-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 582007, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 152.30445668502753, "max_sentence1_length": 540, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 139.1362043064597, "max_sentence2_length": 431, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582007 + "unique_sentence2": 1996 }, "mal_Mlym-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 613775, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 152.30445668502753, "max_sentence1_length": 540, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 155.04406609914872, "max_sentence2_length": 524, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 613775 + "unique_sentence2": 1997 }, "mal_Mlym-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 617792, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 152.30445668502753, "max_sentence1_length": 540, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 157.05558337506258, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 617792 + "unique_sentence2": 1997 }, "mar_Deva-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 504689, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 504689 + "unique_sentence2": 1997 }, "mar_Deva-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 564735, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564735 + "unique_sentence2": 1996 }, "mar_Deva-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 508607, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508607 + "unique_sentence2": 1997 }, "mar_Deva-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 536090, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536090 + "unique_sentence2": 1997 }, "mar_Deva-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 505818, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505818 + "unique_sentence2": 1997 }, "mar_Deva-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 522462, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522462 + "unique_sentence2": 1996 }, "mar_Deva-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 526423, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526423 + "unique_sentence2": 1996 }, "mar_Deva-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509110 + "unique_sentence2": 1996 }, "mar_Deva-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 511309, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511309 + "unique_sentence2": 1996 }, "mar_Deva-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 519628, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519628 + "unique_sentence2": 1996 }, "mar_Deva-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 481214, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 481214 + "unique_sentence2": 1996 }, "mar_Deva-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 570848, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570848 + "unique_sentence2": 1997 }, "mar_Deva-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 508414, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508414 + "unique_sentence2": 1996 }, "mar_Deva-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 508885, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 130.63945918878318, "max_sentence1_length": 443, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508885 + "unique_sentence2": 1996 }, "mey_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 445016, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 107.07811717576365, "max_sentence1_length": 392, + "unique_sentence1": 1993, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 445016 + "unique_sentence2": 1995 }, "mey_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 466202, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 107.07811717576365, "max_sentence1_length": 392, + "unique_sentence1": 1993, "min_sentence2_length": 5, "average_sentence2_length": 126.37305958938407, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 466202 + "unique_sentence2": 1995 }, "mey_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 461555, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 107.07811717576365, "max_sentence1_length": 392, + "unique_sentence1": 1993, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 461555 + "unique_sentence2": 1997 }, "mey_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 457174, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 107.07811717576365, "max_sentence1_length": 392, + "unique_sentence1": 1993, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 457174 + "unique_sentence2": 1995 }, "mey_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 414131, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 107.07811717576365, "max_sentence1_length": 392, + "unique_sentence1": 1993, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 414131 + "unique_sentence2": 1996 }, "mey_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 459781, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 107.07811717576365, "max_sentence1_length": 392, + "unique_sentence1": 1993, "min_sentence2_length": 7, "average_sentence2_length": 123.15773660490736, "max_sentence2_length": 420, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 459781 + "unique_sentence2": 1996 }, "mey_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 456371, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 107.07811717576365, "max_sentence1_length": 392, + "unique_sentence1": 1993, "min_sentence2_length": 8, "average_sentence2_length": 121.45017526289435, "max_sentence2_length": 365, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 456371 + "unique_sentence2": 1997 }, "mey_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 456468, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 107.07811717576365, "max_sentence1_length": 392, + "unique_sentence1": 1993, "min_sentence2_length": 8, "average_sentence2_length": 121.49874812218327, "max_sentence2_length": 366, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 456468 + "unique_sentence2": 1996 }, "mey_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 428748, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 107.07811717576365, "max_sentence1_length": 392, + "unique_sentence1": 1993, "min_sentence2_length": 3, "average_sentence2_length": 107.6179268903355, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 428748 + "unique_sentence2": 1996 }, "mey_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 487982, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 107.07811717576365, "max_sentence1_length": 392, + "unique_sentence1": 1993, "min_sentence2_length": 3, "average_sentence2_length": 137.27941912869304, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 487982 + "unique_sentence2": 1995 }, "mkd_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 523981, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523981 + "unique_sentence2": 1996 }, "mkd_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 522801, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522801 + "unique_sentence2": 1996 }, "mkd_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537780, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537780 + "unique_sentence2": 1996 }, "mkd_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 509209, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509209 + "unique_sentence2": 1997 }, "mkd_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 515611, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515611 + "unique_sentence2": 1997 }, "mkd_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 523816, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523816 + "unique_sentence2": 1997 }, "mkd_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 545757, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 545757 + "unique_sentence2": 1996 }, "mkd_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 542784, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542784 + "unique_sentence2": 1996 }, "mkd_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 520860, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520860 + "unique_sentence2": 1996 }, "mkd_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 520787, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520787 + "unique_sentence2": 1996 }, "mkd_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520194, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520194 + "unique_sentence2": 1995 }, "mkd_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 524032, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524032 + "unique_sentence2": 1996 }, "mkd_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530674, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.1467200801202, "max_sentence1_length": 451, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530674 + "unique_sentence2": 1996 }, "mlg_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 568028, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 160.39459188783175, "max_sentence1_length": 559, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 568028 + "unique_sentence2": 1997 }, "mlg_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 620813, + "unique_pairs": 1995, "min_sentence1_length": 6, "average_sentence1_length": 160.39459188783175, "max_sentence1_length": 559, + "unique_sentence1": 1994, "min_sentence2_length": 10, "average_sentence2_length": 150.478217325989, "max_sentence2_length": 448, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 620813 + "unique_sentence2": 1988 }, "mlg_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 613728, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 160.39459188783175, "max_sentence1_length": 559, + "unique_sentence1": 1994, "min_sentence2_length": 10, "average_sentence2_length": 146.93039559339007, "max_sentence2_length": 554, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 613728 + "unique_sentence2": 1997 }, "mlg_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 607280, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 160.39459188783175, "max_sentence1_length": 559, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 607280 + "unique_sentence2": 1997 }, "mlg_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 624460, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 160.39459188783175, "max_sentence1_length": 559, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 152.30445668502753, "max_sentence2_length": 540, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 624460 + "unique_sentence2": 1996 }, "mlg_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 594432, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 160.39459188783175, "max_sentence1_length": 559, + "unique_sentence1": 1994, "min_sentence2_length": 9, "average_sentence2_length": 137.26790185277918, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 594432 + "unique_sentence2": 1997 }, "mlg_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 597491, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 160.39459188783175, "max_sentence1_length": 559, + "unique_sentence1": 1994, "min_sentence2_length": 6, "average_sentence2_length": 138.79969954932398, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 597491 + "unique_sentence2": 1997 }, "mlg_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 598163, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 160.39459188783175, "max_sentence1_length": 559, + "unique_sentence1": 1994, "min_sentence2_length": 8, "average_sentence2_length": 139.1362043064597, "max_sentence2_length": 431, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 598163 + "unique_sentence2": 1996 }, "mlg_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 629931, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 160.39459188783175, "max_sentence1_length": 559, + "unique_sentence1": 1994, "min_sentence2_length": 9, "average_sentence2_length": 155.04406609914872, "max_sentence2_length": 524, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 629931 + "unique_sentence2": 1997 }, "mlg_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 633948, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 160.39459188783175, "max_sentence1_length": 559, + "unique_sentence1": 1994, "min_sentence2_length": 7, "average_sentence2_length": 157.05558337506258, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 633948 + "unique_sentence2": 1997 }, "mlt_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 560435, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 138.94591887831749, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 141.6925388082123, "max_sentence2_length": 460, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560435 + "unique_sentence2": 1997 }, "mlt_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 525195, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 138.94591887831749, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525195 + "unique_sentence2": 1997 }, "mlt_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 570583, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 138.94591887831749, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570583 + "unique_sentence2": 1996 }, "mlt_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 549461, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 138.94591887831749, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 136.19729594391586, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549461 + "unique_sentence2": 1996 }, "mlt_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 566692, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 138.94591887831749, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 144.82573860791186, "max_sentence2_length": 623, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566692 + "unique_sentence2": 1996 }, "mlt_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 554690, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 138.94591887831749, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554690 + "unique_sentence2": 1996 }, "mlt_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 569960, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 138.94591887831749, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 146.4621932899349, "max_sentence2_length": 518, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569960 + "unique_sentence2": 1997 }, "mlt_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 565675, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 138.94591887831749, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565675 + "unique_sentence2": 1996 }, "mon_Mong-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 559677, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 129.7185778668002, "max_sentence1_length": 414, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 150.54031046569855, "max_sentence2_length": 478, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559677 + "unique_sentence2": 1993 }, "mon_Mong-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 502269, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 129.7185778668002, "max_sentence1_length": 414, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 121.79318978467701, "max_sentence2_length": 411, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 502269 + "unique_sentence2": 1992 }, "mon_Mong-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 506768, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 129.7185778668002, "max_sentence1_length": 414, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506768 + "unique_sentence2": 1997 }, "mon_Mong-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 547539, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 129.7185778668002, "max_sentence1_length": 414, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 144.4621932899349, "max_sentence2_length": 517, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547539 + "unique_sentence2": 1996 }, "mon_Mong-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 526028, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 129.7185778668002, "max_sentence1_length": 414, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 133.69053580370556, "max_sentence2_length": 507, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526028 + "unique_sentence2": 1997 }, "mon_Mong-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 570902, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 129.7185778668002, "max_sentence1_length": 414, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 156.16124186279418, "max_sentence2_length": 773, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570902 + "unique_sentence2": 1997 }, "mon_Mong-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 496516, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 129.7185778668002, "max_sentence1_length": 414, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 118.91236855282925, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496516 + "unique_sentence2": 1996 }, "mri_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 521844, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 137.26790185277918, "max_sentence1_length": 443, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521844 + "unique_sentence2": 1997 }, "mri_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 574629, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 137.26790185277918, "max_sentence1_length": 443, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 150.478217325989, "max_sentence2_length": 448, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 574629 + "unique_sentence2": 1988 }, "mri_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 567544, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 137.26790185277918, "max_sentence1_length": 443, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 146.93039559339007, "max_sentence2_length": 554, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 567544 + "unique_sentence2": 1997 }, "mri_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 561096, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 137.26790185277918, "max_sentence1_length": 443, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561096 + "unique_sentence2": 1997 }, "mri_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 578276, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 137.26790185277918, "max_sentence1_length": 443, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 152.30445668502753, "max_sentence2_length": 540, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578276 + "unique_sentence2": 1996 }, "mri_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 594432, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 137.26790185277918, "max_sentence1_length": 443, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 160.39459188783175, "max_sentence2_length": 559, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 594432 + "unique_sentence2": 1994 }, "mri_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 551307, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 137.26790185277918, "max_sentence1_length": 443, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 138.79969954932398, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551307 + "unique_sentence2": 1997 }, "mri_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 551979, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 137.26790185277918, "max_sentence1_length": 443, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 139.1362043064597, "max_sentence2_length": 431, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551979 + "unique_sentence2": 1996 }, "mri_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 583747, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 137.26790185277918, "max_sentence1_length": 443, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 155.04406609914872, "max_sentence2_length": 524, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 583747 + "unique_sentence2": 1997 }, "mri_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 587764, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 137.26790185277918, "max_sentence1_length": 443, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 157.05558337506258, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587764 + "unique_sentence2": 1997 }, "msa_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 524903, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 138.79969954932398, "max_sentence1_length": 463, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524903 + "unique_sentence2": 1997 }, "msa_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 577688, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 138.79969954932398, "max_sentence1_length": 463, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 150.478217325989, "max_sentence2_length": 448, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 577688 + "unique_sentence2": 1988 }, "msa_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 570603, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 138.79969954932398, "max_sentence1_length": 463, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 146.93039559339007, "max_sentence2_length": 554, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570603 + "unique_sentence2": 1997 }, "msa_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564155, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 138.79969954932398, "max_sentence1_length": 463, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564155 + "unique_sentence2": 1997 }, "msa_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 581335, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 138.79969954932398, "max_sentence1_length": 463, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 152.30445668502753, "max_sentence2_length": 540, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 581335 + "unique_sentence2": 1996 }, "msa_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 597491, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 138.79969954932398, "max_sentence1_length": 463, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 160.39459188783175, "max_sentence2_length": 559, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 597491 + "unique_sentence2": 1994 }, "msa_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 551307, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 138.79969954932398, "max_sentence1_length": 463, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 137.26790185277918, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551307 + "unique_sentence2": 1997 }, "msa_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 555038, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 138.79969954932398, "max_sentence1_length": 463, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 139.1362043064597, "max_sentence2_length": 431, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555038 + "unique_sentence2": 1996 }, "msa_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 586806, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 138.79969954932398, "max_sentence1_length": 463, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 155.04406609914872, "max_sentence2_length": 524, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 586806 + "unique_sentence2": 1997 }, "msa_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 590823, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 138.79969954932398, "max_sentence1_length": 463, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 157.05558337506258, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 590823 + "unique_sentence2": 1997 }, "mya_Mymr-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 612483, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 156.16124186279418, "max_sentence1_length": 773, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 150.54031046569855, "max_sentence2_length": 478, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 612483 + "unique_sentence2": 1993 }, "mya_Mymr-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 555075, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 156.16124186279418, "max_sentence1_length": 773, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 121.79318978467701, "max_sentence2_length": 411, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555075 + "unique_sentence2": 1992 }, "mya_Mymr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 559574, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 156.16124186279418, "max_sentence1_length": 773, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559574 + "unique_sentence2": 1997 }, "mya_Mymr-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 600345, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 156.16124186279418, "max_sentence1_length": 773, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 144.4621932899349, "max_sentence2_length": 517, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 600345 + "unique_sentence2": 1996 }, "mya_Mymr-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 578834, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 156.16124186279418, "max_sentence1_length": 773, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 133.69053580370556, "max_sentence2_length": 507, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578834 + "unique_sentence2": 1997 }, "mya_Mymr-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 570902, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 156.16124186279418, "max_sentence1_length": 773, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 129.7185778668002, "max_sentence2_length": 414, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570902 + "unique_sentence2": 1997 }, "mya_Mymr-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 549322, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 156.16124186279418, "max_sentence1_length": 773, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 118.91236855282925, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549322 + "unique_sentence2": 1996 }, "nde_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 596231, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.09313970956435, "max_sentence1_length": 590, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 149.47020530796195, "max_sentence2_length": 465, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596231 + "unique_sentence2": 1997 }, "nde_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 545459, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.09313970956435, "max_sentence1_length": 590, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 545459 + "unique_sentence2": 1997 }, "nde_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 536717, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.09313970956435, "max_sentence1_length": 590, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 119.6685027541312, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536717 + "unique_sentence2": 1994 }, "nde_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 526219, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.09313970956435, "max_sentence1_length": 590, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 114.4116174261392, "max_sentence2_length": 376, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526219 + "unique_sentence2": 1996 }, "nde_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 601526, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.09313970956435, "max_sentence1_length": 590, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 152.12168252378567, "max_sentence2_length": 541, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 601526 + "unique_sentence2": 1996 }, "nde_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 582021, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.09313970956435, "max_sentence1_length": 590, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 142.35453179769655, "max_sentence2_length": 464, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582021 + "unique_sentence2": 1993 }, "nde_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 596069, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.09313970956435, "max_sentence1_length": 590, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 149.38908362543816, "max_sentence2_length": 511, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596069 + "unique_sentence2": 1995 }, "nde_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 597495, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.09313970956435, "max_sentence1_length": 590, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 150.10315473209815, "max_sentence2_length": 535, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 597495 + "unique_sentence2": 1993 }, "nep_Deva-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 492025, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492025 + "unique_sentence2": 1997 }, "nep_Deva-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 552071, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 552071 + "unique_sentence2": 1996 }, "nep_Deva-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 495943, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495943 + "unique_sentence2": 1997 }, "nep_Deva-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 523426, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523426 + "unique_sentence2": 1997 }, "nep_Deva-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 493154, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493154 + "unique_sentence2": 1997 }, "nep_Deva-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509798, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509798 + "unique_sentence2": 1996 }, "nep_Deva-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513759, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513759 + "unique_sentence2": 1996 }, "nep_Deva-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509110 + "unique_sentence2": 1995 }, "nep_Deva-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 498645, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 498645 + "unique_sentence2": 1996 }, "nep_Deva-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506964, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506964 + "unique_sentence2": 1996 }, "nep_Deva-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 468550, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 468550 + "unique_sentence2": 1996 }, "nep_Deva-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 558184, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558184 + "unique_sentence2": 1997 }, "nep_Deva-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 495750, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495750 + "unique_sentence2": 1996 }, "nep_Deva-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 496221, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 124.29794692038057, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496221 + "unique_sentence2": 1996 }, "nld_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 560267, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 134.37756634952427, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560267 + "unique_sentence2": 1996 }, "nld_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 523096, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523096 + "unique_sentence2": 1995 }, "nld_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 535717, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535717 + "unique_sentence2": 1997 }, "nld_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 544053, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 126.25838758137206, "max_sentence2_length": 522, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544053 + "unique_sentence2": 1995 }, "nld_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 587565, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587565 + "unique_sentence2": 1996 }, "nld_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 591042, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 591042 + "unique_sentence2": 1996 }, "nld_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 539635, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539635 + "unique_sentence2": 1997 }, "nld_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 549718, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0951427140711, "max_sentence2_length": 433, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549718 + "unique_sentence2": 1997 }, "nld_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 535254, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535254 + "unique_sentence2": 1995 }, "nld_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 561715, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561715 + "unique_sentence2": 1996 }, "nld_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 585023, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 585023 + "unique_sentence2": 1996 }, "nld_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 492211, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492211 + "unique_sentence2": 1996 }, "nld_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 553490, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553490 + "unique_sentence2": 1996 }, "nld_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 570291, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570291 + "unique_sentence2": 1997 }, "nld_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 578887, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578887 + "unique_sentence2": 1997 }, "nld_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 554123, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 131.30095142714072, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554123 + "unique_sentence2": 1996 }, "nld_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 403541, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 403541 + "unique_sentence2": 1994 }, "nld_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 425320, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 425320 + "unique_sentence2": 1995 }, "nld_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 551312, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551312 + "unique_sentence2": 1995 }, "nld_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 572672, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 140.58938407611416, "max_sentence2_length": 543, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 572672 + "unique_sentence2": 1996 }, "nld_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 540272, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.36504757135704, "max_sentence2_length": 417, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540272 + "unique_sentence2": 1996 }, "nld_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 543359, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.91086629944917, "max_sentence2_length": 482, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543359 + "unique_sentence2": 1996 }, "nld_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 569781, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569781 + "unique_sentence2": 1996 }, "nld_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 569130, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569130 + "unique_sentence2": 1996 }, "nld_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 566808, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566808 + "unique_sentence2": 1996 }, "nld_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 580115, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580115 + "unique_sentence2": 1996 }, "nld_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 564424, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564424 + "unique_sentence2": 1997 }, "nld_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 543742, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543742 + "unique_sentence2": 1996 }, "nld_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 601876, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 601876 + "unique_sentence2": 1997 }, "nld_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 557528, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557528 + "unique_sentence2": 1997 }, "nld_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 563036, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563036 + "unique_sentence2": 1996 }, "nld_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 383393, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 383393 + "unique_sentence2": 1996 }, "nld_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 549647, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 146.1767651477216, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549647 + "unique_sentence2": 1996 }, "nno_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 516709, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.36504757135704, "max_sentence1_length": 417, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 134.37756634952427, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516709 + "unique_sentence2": 1996 }, "nno_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 500495, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.36504757135704, "max_sentence1_length": 417, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 126.25838758137206, "max_sentence2_length": 522, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500495 + "unique_sentence2": 1995 }, "nno_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 544007, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.36504757135704, "max_sentence1_length": 417, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544007 + "unique_sentence2": 1996 }, "nno_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 496077, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.36504757135704, "max_sentence1_length": 417, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496077 + "unique_sentence2": 1997 }, "nno_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 506160, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.36504757135704, "max_sentence1_length": 417, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0951427140711, "max_sentence2_length": 433, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506160 + "unique_sentence2": 1997 }, "nno_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 510565, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.36504757135704, "max_sentence1_length": 417, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 131.30095142714072, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510565 + "unique_sentence2": 1996 }, "nno_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 529114, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.36504757135704, "max_sentence1_length": 417, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 140.58938407611416, "max_sentence2_length": 543, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529114 + "unique_sentence2": 1996 }, "nno_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 540272, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.36504757135704, "max_sentence1_length": 417, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540272 + "unique_sentence2": 1996 }, "nno_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 499801, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.36504757135704, "max_sentence1_length": 417, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.91086629944917, "max_sentence2_length": 482, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499801 + "unique_sentence2": 1996 }, "nno_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 500184, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.36504757135704, "max_sentence1_length": 417, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500184 + "unique_sentence2": 1996 }, "nob_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 519796, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.91086629944917, "max_sentence1_length": 482, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 134.37756634952427, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519796 + "unique_sentence2": 1996 }, "nob_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 503582, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.91086629944917, "max_sentence1_length": 482, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 126.25838758137206, "max_sentence2_length": 522, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503582 + "unique_sentence2": 1995 }, "nob_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 547094, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.91086629944917, "max_sentence1_length": 482, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547094 + "unique_sentence2": 1996 }, "nob_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 499164, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 125.91086629944917, "max_sentence1_length": 482, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499164 + "unique_sentence2": 1997 }, "nob_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 509247, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 125.91086629944917, "max_sentence1_length": 482, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0951427140711, "max_sentence2_length": 433, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509247 + "unique_sentence2": 1997 }, "nob_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 513652, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.91086629944917, "max_sentence1_length": 482, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 131.30095142714072, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513652 + "unique_sentence2": 1996 }, "nob_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 532201, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.91086629944917, "max_sentence1_length": 482, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 140.58938407611416, "max_sentence2_length": 543, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532201 + "unique_sentence2": 1996 }, "nob_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 543359, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.91086629944917, "max_sentence1_length": 482, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543359 + "unique_sentence2": 1996 }, "nob_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 499801, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.91086629944917, "max_sentence1_length": 482, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.36504757135704, "max_sentence2_length": 417, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499801 + "unique_sentence2": 1996 }, "nob_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 503271, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 125.91086629944917, "max_sentence1_length": 482, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503271 + "unique_sentence2": 1996 }, "nso_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 459006, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 459006 + "unique_sentence2": 1994 }, "nso_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 539219, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539219 + "unique_sentence2": 1997 }, "nso_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 561465, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561465 + "unique_sentence2": 1997 }, "nso_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 537600, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537600 + "unique_sentence2": 1997 }, "nso_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 528930, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 528930 + "unique_sentence2": 1984 }, "nso_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 582791, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582791 + "unique_sentence2": 1997 }, "nso_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 579641, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 579641 + "unique_sentence2": 1996 }, "nso_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 564008, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564008 + "unique_sentence2": 1997 }, "nso_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 456737, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 456737 + "unique_sentence2": 1996 }, "nso_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 625782, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 625782 + "unique_sentence2": 1997 }, "nso_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 531302, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531302 + "unique_sentence2": 1990 }, "nso_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 559589, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559589 + "unique_sentence2": 1997 }, "nso_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 607587, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 607587 + "unique_sentence2": 1996 }, "nso_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 549231, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 145.96845267901853, "max_sentence1_length": 487, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549231 + "unique_sentence2": 1996 }, "nya_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 582774, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 142.35453179769655, "max_sentence1_length": 464, + "unique_sentence1": 1993, "min_sentence2_length": 8, "average_sentence2_length": 149.47020530796195, "max_sentence2_length": 465, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582774 + "unique_sentence2": 1997 }, "nya_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 142.35453179769655, "max_sentence1_length": 464, + "unique_sentence1": 1993, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532002 + "unique_sentence2": 1997 }, "nya_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 523260, + "unique_pairs": 1995, "min_sentence1_length": 10, "average_sentence1_length": 142.35453179769655, "max_sentence1_length": 464, + "unique_sentence1": 1993, "min_sentence2_length": 6, "average_sentence2_length": 119.6685027541312, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523260 + "unique_sentence2": 1994 }, "nya_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 512762, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 142.35453179769655, "max_sentence1_length": 464, + "unique_sentence1": 1993, "min_sentence2_length": 6, "average_sentence2_length": 114.4116174261392, "max_sentence2_length": 376, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512762 + "unique_sentence2": 1996 }, "nya_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 588069, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 142.35453179769655, "max_sentence1_length": 464, + "unique_sentence1": 1993, "min_sentence2_length": 10, "average_sentence2_length": 152.12168252378567, "max_sentence2_length": 541, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 588069 + "unique_sentence2": 1996 }, "nya_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 582021, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 142.35453179769655, "max_sentence1_length": 464, + "unique_sentence1": 1993, "min_sentence2_length": 6, "average_sentence2_length": 149.09313970956435, "max_sentence2_length": 590, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582021 + "unique_sentence2": 1997 }, "nya_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 582612, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 142.35453179769655, "max_sentence1_length": 464, + "unique_sentence1": 1993, "min_sentence2_length": 6, "average_sentence2_length": 149.38908362543816, "max_sentence2_length": 511, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582612 + "unique_sentence2": 1995 }, "nya_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 584038, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 142.35453179769655, "max_sentence1_length": 464, + "unique_sentence1": 1993, "min_sentence2_length": 10, "average_sentence2_length": 150.10315473209815, "max_sentence2_length": 535, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 584038 + "unique_sentence2": 1993 }, "orm_Ethi-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 404938, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 404938 + "unique_sentence2": 1994 }, "orm_Ethi-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 485151, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485151 + "unique_sentence2": 1997 }, "orm_Ethi-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 507397, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507397 + "unique_sentence2": 1997 }, "orm_Ethi-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 483532, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 483532 + "unique_sentence2": 1997 }, "orm_Ethi-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 528930, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 528930 + "unique_sentence2": 1996 }, "orm_Ethi-som_Latn": { + "num_samples": 1997, + "number_of_characters": 528723, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 528723 + "unique_sentence2": 1997 }, "orm_Ethi-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 525573, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525573 + "unique_sentence2": 1996 }, "orm_Ethi-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 509940, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509940 + "unique_sentence2": 1997 }, "orm_Ethi-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 402669, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 402669 + "unique_sentence2": 1996 }, "orm_Ethi-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 571714, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571714 + "unique_sentence2": 1997 }, "orm_Ethi-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 477234, + "unique_pairs": 1992, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 477234 + "unique_sentence2": 1990 }, "orm_Ethi-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 505521, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505521 + "unique_sentence2": 1997 }, "orm_Ethi-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 553519, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553519 + "unique_sentence2": 1996 }, "orm_Ethi-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 495163, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 118.89384076114172, "max_sentence1_length": 466, + "unique_sentence1": 1984, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495163 + "unique_sentence2": 1996 }, "pan_Guru-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 494224, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 494224 + "unique_sentence2": 1997 }, "pan_Guru-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 554270, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554270 + "unique_sentence2": 1996 }, "pan_Guru-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 498142, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 498142 + "unique_sentence2": 1997 }, "pan_Guru-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 525625, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525625 + "unique_sentence2": 1997 }, "pan_Guru-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 495353, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495353 + "unique_sentence2": 1997 }, "pan_Guru-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 511997, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511997 + "unique_sentence2": 1996 }, "pan_Guru-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 515958, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515958 + "unique_sentence2": 1996 }, "pan_Guru-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 511309, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511309 + "unique_sentence2": 1995 }, "pan_Guru-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 498645, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 498645 + "unique_sentence2": 1996 }, "pan_Guru-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 509163, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509163 + "unique_sentence2": 1996 }, "pan_Guru-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 470749, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 470749 + "unique_sentence2": 1996 }, "pan_Guru-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 560383, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560383 + "unique_sentence2": 1997 }, "pan_Guru-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 497949, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497949 + "unique_sentence2": 1996 }, "pan_Guru-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 498420, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 125.39909864797195, "max_sentence1_length": 383, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 498420 + "unique_sentence2": 1996 }, "pol_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 509047, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509047 + "unique_sentence2": 1995 }, "pol_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 533956, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 533956 + "unique_sentence2": 1996 }, "pol_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 521668, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521668 + "unique_sentence2": 1997 }, "pol_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 532776, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532776 + "unique_sentence2": 1996 }, "pol_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 547755, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547755 + "unique_sentence2": 1996 }, "pol_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 519184, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519184 + "unique_sentence2": 1997 }, "pol_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 573516, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 573516 + "unique_sentence2": 1996 }, "pol_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 576993, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 576993 + "unique_sentence2": 1996 }, "pol_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 525586, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525586 + "unique_sentence2": 1997 }, "pol_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 521205, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521205 + "unique_sentence2": 1995 }, "pol_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 547666, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547666 + "unique_sentence2": 1996 }, "pol_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 570974, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570974 + "unique_sentence2": 1996 }, "pol_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 478162, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 478162 + "unique_sentence2": 1996 }, "pol_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 539441, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539441 + "unique_sentence2": 1996 }, "pol_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 533791, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 533791 + "unique_sentence2": 1997 }, "pol_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 556242, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 556242 + "unique_sentence2": 1997 }, "pol_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564838, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564838 + "unique_sentence2": 1997 }, "pol_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 389492, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 389492 + "unique_sentence2": 1994 }, "pol_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 411271, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 411271 + "unique_sentence2": 1995 }, "pol_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 537263, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537263 + "unique_sentence2": 1995 }, "pol_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 545757, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 545757 + "unique_sentence2": 1997 }, "pol_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 569781, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569781 + "unique_sentence2": 1996 }, "pol_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 555081, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555081 + "unique_sentence2": 1996 }, "pol_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 552759, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 552759 + "unique_sentence2": 1996 }, "pol_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 530835, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530835 + "unique_sentence2": 1996 }, "pol_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 530762, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530762 + "unique_sentence2": 1996 }, "pol_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 566066, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566066 + "unique_sentence2": 1996 }, "pol_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530169, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530169 + "unique_sentence2": 1995 }, "pol_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 534007, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534007 + "unique_sentence2": 1996 }, "pol_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 550375, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550375 + "unique_sentence2": 1997 }, "pol_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 529693, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529693 + "unique_sentence2": 1996 }, "pol_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 587827, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587827 + "unique_sentence2": 1997 }, "pol_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 543479, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543479 + "unique_sentence2": 1997 }, "pol_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 540649, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540649 + "unique_sentence2": 1996 }, "pol_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 548987, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548987 + "unique_sentence2": 1996 }, "pol_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 369344, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 369344 + "unique_sentence2": 1996 }, "pol_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 535598, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 139.14171256885328, "max_sentence1_length": 468, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535598 + "unique_sentence2": 1996 }, "por_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 508396, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508396 + "unique_sentence2": 1995 }, "por_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 521017, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521017 + "unique_sentence2": 1997 }, "por_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 560175, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 141.6925388082123, "max_sentence2_length": 460, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560175 + "unique_sentence2": 1997 }, "por_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 572865, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 572865 + "unique_sentence2": 1996 }, "por_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 576342, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 576342 + "unique_sentence2": 1996 }, "por_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 524935, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524935 + "unique_sentence2": 1997 }, "por_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 520554, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520554 + "unique_sentence2": 1995 }, "por_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 547015, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547015 + "unique_sentence2": 1996 }, "por_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 570323, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570323 + "unique_sentence2": 1996 }, "por_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 549201, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 136.19729594391586, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549201 + "unique_sentence2": 1996 }, "por_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 477511, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 477511 + "unique_sentence2": 1996 }, "por_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 538790, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538790 + "unique_sentence2": 1996 }, "por_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 555591, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555591 + "unique_sentence2": 1997 }, "por_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564187, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564187 + "unique_sentence2": 1997 }, "por_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 566432, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 144.82573860791186, "max_sentence2_length": 623, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566432 + "unique_sentence2": 1996 }, "por_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 388841, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 388841 + "unique_sentence2": 1994 }, "por_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 410620, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 410620 + "unique_sentence2": 1995 }, "por_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 536612, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536612 + "unique_sentence2": 1995 }, "por_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 554690, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 138.94591887831749, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554690 + "unique_sentence2": 1996 }, "por_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 569130, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569130 + "unique_sentence2": 1996 }, "por_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 555081, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555081 + "unique_sentence2": 1996 }, "por_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 569700, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 146.4621932899349, "max_sentence2_length": 518, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569700 + "unique_sentence2": 1997 }, "por_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 552108, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 552108 + "unique_sentence2": 1996 }, "por_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 565415, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565415 + "unique_sentence2": 1996 }, "por_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 549724, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549724 + "unique_sentence2": 1997 }, "por_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 529042, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529042 + "unique_sentence2": 1996 }, "por_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 587176, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587176 + "unique_sentence2": 1997 }, "por_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 542828, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542828 + "unique_sentence2": 1997 }, "por_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 548336, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548336 + "unique_sentence2": 1996 }, "por_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 368693, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 368693 + "unique_sentence2": 1996 }, "por_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 534947, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 138.81572358537807, "max_sentence1_length": 497, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534947 + "unique_sentence2": 1996 }, "prs_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 473717, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.45017526289435, "max_sentence1_length": 365, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 473717 + "unique_sentence2": 1995 }, "prs_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 494903, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.45017526289435, "max_sentence1_length": 365, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 126.37305958938407, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 494903 + "unique_sentence2": 1995 }, "prs_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 490256, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.45017526289435, "max_sentence1_length": 365, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 490256 + "unique_sentence2": 1997 }, "prs_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 485875, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.45017526289435, "max_sentence1_length": 365, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485875 + "unique_sentence2": 1995 }, "prs_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 442832, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.45017526289435, "max_sentence1_length": 365, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 442832 + "unique_sentence2": 1996 }, "prs_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 488482, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.45017526289435, "max_sentence1_length": 365, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 123.15773660490736, "max_sentence2_length": 420, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 488482 + "unique_sentence2": 1996 }, "prs_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 456371, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.45017526289435, "max_sentence1_length": 365, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 107.07811717576365, "max_sentence2_length": 392, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 456371 + "unique_sentence2": 1993 }, "prs_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 485169, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.45017526289435, "max_sentence1_length": 365, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 121.49874812218327, "max_sentence2_length": 366, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485169 + "unique_sentence2": 1996 }, "prs_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 457449, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.45017526289435, "max_sentence1_length": 365, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 107.6179268903355, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 457449 + "unique_sentence2": 1996 }, "prs_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516683, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.45017526289435, "max_sentence1_length": 365, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 137.27941912869304, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516683 + "unique_sentence2": 1995 }, "pus_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 473814, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 121.49874812218327, "max_sentence1_length": 366, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 473814 + "unique_sentence2": 1995 }, "pus_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 495000, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 121.49874812218327, "max_sentence1_length": 366, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 126.37305958938407, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495000 + "unique_sentence2": 1995 }, "pus_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 490353, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.49874812218327, "max_sentence1_length": 366, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 490353 + "unique_sentence2": 1997 }, "pus_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 485972, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 121.49874812218327, "max_sentence1_length": 366, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485972 + "unique_sentence2": 1995 }, "pus_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 442929, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 121.49874812218327, "max_sentence1_length": 366, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 442929 + "unique_sentence2": 1996 }, "pus_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 488579, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 121.49874812218327, "max_sentence1_length": 366, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 123.15773660490736, "max_sentence2_length": 420, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 488579 + "unique_sentence2": 1996 }, "pus_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 456468, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.49874812218327, "max_sentence1_length": 366, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 107.07811717576365, "max_sentence2_length": 392, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 456468 + "unique_sentence2": 1993 }, "pus_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 485169, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.49874812218327, "max_sentence1_length": 366, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 121.45017526289435, "max_sentence2_length": 365, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485169 + "unique_sentence2": 1997 }, "pus_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 457546, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 121.49874812218327, "max_sentence1_length": 366, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 107.6179268903355, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 457546 + "unique_sentence2": 1996 }, "pus_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516780, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 121.49874812218327, "max_sentence1_length": 366, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 137.27941912869304, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516780 + "unique_sentence2": 1995 }, "ron_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 575445, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 146.4621932899349, "max_sentence1_length": 518, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 141.6925388082123, "max_sentence2_length": 460, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 575445 + "unique_sentence2": 1997 }, "ron_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 540205, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 146.4621932899349, "max_sentence1_length": 518, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540205 + "unique_sentence2": 1997 }, "ron_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 585593, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 146.4621932899349, "max_sentence1_length": 518, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 585593 + "unique_sentence2": 1996 }, "ron_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 564471, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 146.4621932899349, "max_sentence1_length": 518, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 136.19729594391586, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564471 + "unique_sentence2": 1996 }, "ron_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 581702, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 146.4621932899349, "max_sentence1_length": 518, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 144.82573860791186, "max_sentence2_length": 623, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 581702 + "unique_sentence2": 1996 }, "ron_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 569960, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 146.4621932899349, "max_sentence1_length": 518, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 138.94591887831749, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569960 + "unique_sentence2": 1996 }, "ron_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 569700, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 146.4621932899349, "max_sentence1_length": 518, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569700 + "unique_sentence2": 1996 }, "ron_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 580685, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 146.4621932899349, "max_sentence1_length": 518, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580685 + "unique_sentence2": 1996 }, "rus_Cyrl-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 506074, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506074 + "unique_sentence2": 1995 }, "rus_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530983, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530983 + "unique_sentence2": 1996 }, "rus_Cyrl-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 518695, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518695 + "unique_sentence2": 1997 }, "rus_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 529803, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529803 + "unique_sentence2": 1996 }, "rus_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544782, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544782 + "unique_sentence2": 1996 }, "rus_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 516211, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516211 + "unique_sentence2": 1997 }, "rus_Cyrl-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 570543, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570543 + "unique_sentence2": 1996 }, "rus_Cyrl-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 574020, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 574020 + "unique_sentence2": 1996 }, "rus_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 522613, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522613 + "unique_sentence2": 1997 }, "rus_Cyrl-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 518232, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518232 + "unique_sentence2": 1995 }, "rus_Cyrl-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 544693, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544693 + "unique_sentence2": 1996 }, "rus_Cyrl-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 568001, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 568001 + "unique_sentence2": 1996 }, "rus_Cyrl-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 475189, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 475189 + "unique_sentence2": 1996 }, "rus_Cyrl-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 536468, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536468 + "unique_sentence2": 1996 }, "rus_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 530818, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530818 + "unique_sentence2": 1997 }, "rus_Cyrl-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 553269, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553269 + "unique_sentence2": 1997 }, "rus_Cyrl-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 561865, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561865 + "unique_sentence2": 1997 }, "rus_Cyrl-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 386519, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 386519 + "unique_sentence2": 1994 }, "rus_Cyrl-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 408298, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 408298 + "unique_sentence2": 1995 }, "rus_Cyrl-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 534290, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534290 + "unique_sentence2": 1995 }, "rus_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 542784, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542784 + "unique_sentence2": 1997 }, "rus_Cyrl-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 566808, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566808 + "unique_sentence2": 1996 }, "rus_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 552759, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 552759 + "unique_sentence2": 1996 }, "rus_Cyrl-por_Latn": { + "num_samples": 1997, + "number_of_characters": 552108, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 552108 + "unique_sentence2": 1996 }, "rus_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 527862, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527862 + "unique_sentence2": 1996 }, "rus_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 527789, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527789 + "unique_sentence2": 1996 }, "rus_Cyrl-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 563093, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563093 + "unique_sentence2": 1996 }, "rus_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527196, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527196 + "unique_sentence2": 1995 }, "rus_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 531034, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531034 + "unique_sentence2": 1996 }, "rus_Cyrl-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 547402, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547402 + "unique_sentence2": 1997 }, "rus_Cyrl-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 526720, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526720 + "unique_sentence2": 1996 }, "rus_Cyrl-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 584854, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 584854 + "unique_sentence2": 1997 }, "rus_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 540506, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540506 + "unique_sentence2": 1997 }, "rus_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537676, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537676 + "unique_sentence2": 1996 }, "rus_Cyrl-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 546014, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546014 + "unique_sentence2": 1996 }, "rus_Cyrl-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 366371, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 366371 + "unique_sentence2": 1996 }, "rus_Cyrl-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 532625, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 137.6529794692038, "max_sentence1_length": 419, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532625 + "unique_sentence2": 1996 }, "shi_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 446094, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 107.6179268903355, "max_sentence1_length": 378, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 446094 + "unique_sentence2": 1995 }, "shi_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 467280, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 107.6179268903355, "max_sentence1_length": 378, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 126.37305958938407, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 467280 + "unique_sentence2": 1995 }, "shi_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 462633, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 107.6179268903355, "max_sentence1_length": 378, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 462633 + "unique_sentence2": 1997 }, "shi_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 458252, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 107.6179268903355, "max_sentence1_length": 378, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 458252 + "unique_sentence2": 1995 }, "shi_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 415209, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 107.6179268903355, "max_sentence1_length": 378, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 415209 + "unique_sentence2": 1996 }, "shi_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 460859, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 107.6179268903355, "max_sentence1_length": 378, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 123.15773660490736, "max_sentence2_length": 420, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 460859 + "unique_sentence2": 1996 }, "shi_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 428748, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 107.6179268903355, "max_sentence1_length": 378, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 107.07811717576365, "max_sentence2_length": 392, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 428748 + "unique_sentence2": 1993 }, "shi_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 457449, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 107.6179268903355, "max_sentence1_length": 378, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 121.45017526289435, "max_sentence2_length": 365, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 457449 + "unique_sentence2": 1997 }, "shi_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 457546, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 107.6179268903355, "max_sentence1_length": 378, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 121.49874812218327, "max_sentence2_length": 366, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 457546 + "unique_sentence2": 1996 }, "shi_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 489060, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 107.6179268903355, "max_sentence1_length": 378, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 137.27941912869304, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 489060 + "unique_sentence2": 1995 }, "sin_Sinh-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 502543, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 502543 + "unique_sentence2": 1997 }, "sin_Sinh-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 562589, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 562589 + "unique_sentence2": 1996 }, "sin_Sinh-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 506461, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506461 + "unique_sentence2": 1997 }, "sin_Sinh-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 533944, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 533944 + "unique_sentence2": 1997 }, "sin_Sinh-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 503672, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503672 + "unique_sentence2": 1997 }, "sin_Sinh-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 520316, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520316 + "unique_sentence2": 1996 }, "sin_Sinh-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 524277, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524277 + "unique_sentence2": 1996 }, "sin_Sinh-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 519628, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519628 + "unique_sentence2": 1995 }, "sin_Sinh-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 506964, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506964 + "unique_sentence2": 1996 }, "sin_Sinh-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 509163, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509163 + "unique_sentence2": 1996 }, "sin_Sinh-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 479068, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 479068 + "unique_sentence2": 1996 }, "sin_Sinh-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 568702, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 568702 + "unique_sentence2": 1997 }, "sin_Sinh-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 506268, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506268 + "unique_sentence2": 1996 }, "sin_Sinh-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 506739, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 129.56484727090637, "max_sentence1_length": 441, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506739 + "unique_sentence2": 1996 }, "slk_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 509059, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509059 + "unique_sentence2": 1996 }, "slk_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 507879, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507879 + "unique_sentence2": 1996 }, "slk_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522858, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522858 + "unique_sentence2": 1996 }, "slk_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 494287, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 494287 + "unique_sentence2": 1997 }, "slk_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500689, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500689 + "unique_sentence2": 1997 }, "slk_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 508894, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508894 + "unique_sentence2": 1997 }, "slk_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520860, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520860 + "unique_sentence2": 1997 }, "slk_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 530835, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530835 + "unique_sentence2": 1996 }, "slk_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527862, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527862 + "unique_sentence2": 1996 }, "slk_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 505865, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505865 + "unique_sentence2": 1996 }, "slk_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 505272, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505272 + "unique_sentence2": 1995 }, "slk_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509110 + "unique_sentence2": 1996 }, "slk_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515752, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 126.67451176765148, "max_sentence1_length": 403, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515752 + "unique_sentence2": 1996 }, "slv_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508986, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508986 + "unique_sentence2": 1996 }, "slv_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 507806, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507806 + "unique_sentence2": 1996 }, "slv_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522785, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522785 + "unique_sentence2": 1996 }, "slv_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 494214, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 494214 + "unique_sentence2": 1997 }, "slv_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500616, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500616 + "unique_sentence2": 1997 }, "slv_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 508821, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508821 + "unique_sentence2": 1997 }, "slv_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520787, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520787 + "unique_sentence2": 1997 }, "slv_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 530762, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530762 + "unique_sentence2": 1996 }, "slv_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527789, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527789 + "unique_sentence2": 1996 }, "slv_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 505865, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505865 + "unique_sentence2": 1996 }, "slv_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 505199, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505199 + "unique_sentence2": 1995 }, "slv_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 509037, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509037 + "unique_sentence2": 1996 }, "slv_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515679, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.63795693540311, "max_sentence1_length": 463, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515679 + "unique_sentence2": 1996 }, "smo_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 525575, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 139.1362043064597, "max_sentence1_length": 431, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525575 + "unique_sentence2": 1997 }, "smo_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 578360, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 139.1362043064597, "max_sentence1_length": 431, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 150.478217325989, "max_sentence2_length": 448, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578360 + "unique_sentence2": 1988 }, "smo_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 571275, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 139.1362043064597, "max_sentence1_length": 431, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 146.93039559339007, "max_sentence2_length": 554, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571275 + "unique_sentence2": 1997 }, "smo_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564827, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 139.1362043064597, "max_sentence1_length": 431, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564827 + "unique_sentence2": 1997 }, "smo_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 582007, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 139.1362043064597, "max_sentence1_length": 431, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 152.30445668502753, "max_sentence2_length": 540, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582007 + "unique_sentence2": 1996 }, "smo_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 598163, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 139.1362043064597, "max_sentence1_length": 431, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 160.39459188783175, "max_sentence2_length": 559, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 598163 + "unique_sentence2": 1994 }, "smo_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 551979, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 139.1362043064597, "max_sentence1_length": 431, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 137.26790185277918, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551979 + "unique_sentence2": 1997 }, "smo_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 555038, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 139.1362043064597, "max_sentence1_length": 431, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 138.79969954932398, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555038 + "unique_sentence2": 1997 }, "smo_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 587478, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 139.1362043064597, "max_sentence1_length": 431, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 155.04406609914872, "max_sentence2_length": 524, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587478 + "unique_sentence2": 1997 }, "smo_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 591495, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 139.1362043064597, "max_sentence1_length": 431, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 157.05558337506258, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 591495 + "unique_sentence2": 1997 }, "sna_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 596822, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.38908362543816, "max_sentence1_length": 511, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 149.47020530796195, "max_sentence2_length": 465, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596822 + "unique_sentence2": 1997 }, "sna_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 546050, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.38908362543816, "max_sentence1_length": 511, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546050 + "unique_sentence2": 1997 }, "sna_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 537308, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 149.38908362543816, "max_sentence1_length": 511, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 119.6685027541312, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537308 + "unique_sentence2": 1994 }, "sna_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 526810, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.38908362543816, "max_sentence1_length": 511, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 114.4116174261392, "max_sentence2_length": 376, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526810 + "unique_sentence2": 1996 }, "sna_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 602117, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.38908362543816, "max_sentence1_length": 511, + "unique_sentence1": 1995, "min_sentence2_length": 10, "average_sentence2_length": 152.12168252378567, "max_sentence2_length": 541, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 602117 + "unique_sentence2": 1996 }, "sna_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 596069, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 149.38908362543816, "max_sentence1_length": 511, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 149.09313970956435, "max_sentence2_length": 590, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596069 + "unique_sentence2": 1997 }, "sna_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 582612, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 149.38908362543816, "max_sentence1_length": 511, + "unique_sentence1": 1995, "min_sentence2_length": 10, "average_sentence2_length": 142.35453179769655, "max_sentence2_length": 464, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582612 + "unique_sentence2": 1993 }, "sna_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 598086, + "unique_pairs": 1995, "min_sentence1_length": 6, "average_sentence1_length": 149.38908362543816, "max_sentence1_length": 511, + "unique_sentence1": 1995, "min_sentence2_length": 10, "average_sentence2_length": 150.10315473209815, "max_sentence2_length": 535, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 598086 + "unique_sentence2": 1993 }, "snd_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 464129, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 464129 + "unique_sentence2": 1997 }, "snd_Arab-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 524175, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524175 + "unique_sentence2": 1996 }, "snd_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 468047, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 468047 + "unique_sentence2": 1997 }, "snd_Arab-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 495530, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495530 + "unique_sentence2": 1997 }, "snd_Arab-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 465258, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 465258 + "unique_sentence2": 1997 }, "snd_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 481902, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 481902 + "unique_sentence2": 1996 }, "snd_Arab-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 485863, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485863 + "unique_sentence2": 1996 }, "snd_Arab-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 481214, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 481214 + "unique_sentence2": 1995 }, "snd_Arab-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 468550, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 468550 + "unique_sentence2": 1996 }, "snd_Arab-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 470749, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 470749 + "unique_sentence2": 1996 }, "snd_Arab-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 479068, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 479068 + "unique_sentence2": 1996 }, "snd_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 530288, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530288 + "unique_sentence2": 1997 }, "snd_Arab-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 467854, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 467854 + "unique_sentence2": 1996 }, "snd_Arab-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 468325, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 110.32899349023535, "max_sentence1_length": 335, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 468325 + "unique_sentence2": 1996 }, "som_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 458799, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 458799 + "unique_sentence2": 1994 }, "som_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 539012, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539012 + "unique_sentence2": 1997 }, "som_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 561258, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561258 + "unique_sentence2": 1997 }, "som_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 537393, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537393 + "unique_sentence2": 1997 }, "som_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 582791, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582791 + "unique_sentence2": 1996 }, "som_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 528723, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 528723 + "unique_sentence2": 1984 }, "som_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 579434, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 579434 + "unique_sentence2": 1996 }, "som_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 563801, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563801 + "unique_sentence2": 1997 }, "som_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 456530, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 456530 + "unique_sentence2": 1996 }, "som_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 625575, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 625575 + "unique_sentence2": 1997 }, "som_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 531095, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531095 + "unique_sentence2": 1990 }, "som_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 559382, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559382 + "unique_sentence2": 1997 }, "som_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 607380, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 607380 + "unique_sentence2": 1996 }, "som_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 549024, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 145.8647971957937, "max_sentence1_length": 455, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549024 + "unique_sentence2": 1996 }, "spa_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 519381, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519381 + "unique_sentence2": 1995 }, "spa_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532002 + "unique_sentence2": 1997 }, "spa_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 571160, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 141.6925388082123, "max_sentence2_length": 460, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571160 + "unique_sentence2": 1997 }, "spa_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 583850, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 583850 + "unique_sentence2": 1996 }, "spa_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 587327, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587327 + "unique_sentence2": 1996 }, "spa_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 535920, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535920 + "unique_sentence2": 1997 }, "spa_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 531539, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531539 + "unique_sentence2": 1995 }, "spa_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 558000, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558000 + "unique_sentence2": 1996 }, "spa_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 581308, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 581308 + "unique_sentence2": 1996 }, "spa_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 560186, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 136.19729594391586, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560186 + "unique_sentence2": 1996 }, "spa_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 488496, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 488496 + "unique_sentence2": 1996 }, "spa_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 549775, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549775 + "unique_sentence2": 1996 }, "spa_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 566576, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566576 + "unique_sentence2": 1997 }, "spa_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 575172, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 575172 + "unique_sentence2": 1997 }, "spa_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 577417, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 144.82573860791186, "max_sentence2_length": 623, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 577417 + "unique_sentence2": 1996 }, "spa_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 399826, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 399826 + "unique_sentence2": 1994 }, "spa_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 421605, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 421605 + "unique_sentence2": 1995 }, "spa_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 547597, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547597 + "unique_sentence2": 1995 }, "spa_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 565675, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 138.94591887831749, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565675 + "unique_sentence2": 1996 }, "spa_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 580115, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580115 + "unique_sentence2": 1996 }, "spa_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 566066, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566066 + "unique_sentence2": 1996 }, "spa_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 565415, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565415 + "unique_sentence2": 1996 }, "spa_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 580685, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 146.4621932899349, "max_sentence2_length": 518, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580685 + "unique_sentence2": 1997 }, "spa_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 563093, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563093 + "unique_sentence2": 1996 }, "spa_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 560709, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560709 + "unique_sentence2": 1997 }, "spa_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 540027, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540027 + "unique_sentence2": 1996 }, "spa_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 598161, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 598161 + "unique_sentence2": 1997 }, "spa_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 553813, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553813 + "unique_sentence2": 1997 }, "spa_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 559321, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559321 + "unique_sentence2": 1996 }, "spa_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 379678, + "unique_pairs": 1996, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 379678 + "unique_sentence2": 1996 }, "spa_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 545932, + "unique_pairs": 1997, "min_sentence1_length": 1, "average_sentence1_length": 144.3164747120681, "max_sentence1_length": 504, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 545932 + "unique_sentence2": 1996 }, "sqi_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 582734, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 142.01652478718077, "max_sentence1_length": 461, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582734 + "unique_sentence2": 1996 }, "sqi_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 531327, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 142.01652478718077, "max_sentence1_length": 461, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531327 + "unique_sentence2": 1997 }, "sqi_Latn-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 548322, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 142.01652478718077, "max_sentence1_length": 461, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 132.55633450175262, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548322 + "unique_sentence2": 1996 }, "sqi_Latn-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 550199, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 142.01652478718077, "max_sentence1_length": 461, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 133.49624436654983, "max_sentence2_length": 503, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550199 + "unique_sentence2": 1995 }, "srp_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508393, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508393 + "unique_sentence2": 1996 }, "srp_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 507213, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507213 + "unique_sentence2": 1996 }, "srp_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522192, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522192 + "unique_sentence2": 1996 }, "srp_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 493621, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493621 + "unique_sentence2": 1997 }, "srp_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500023, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500023 + "unique_sentence2": 1997 }, "srp_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 508228, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508228 + "unique_sentence2": 1997 }, "srp_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520194, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520194 + "unique_sentence2": 1997 }, "srp_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 530169, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530169 + "unique_sentence2": 1996 }, "srp_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527196, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527196 + "unique_sentence2": 1996 }, "srp_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 505272, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505272 + "unique_sentence2": 1996 }, "srp_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 505199, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505199 + "unique_sentence2": 1996 }, "srp_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 508444, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508444 + "unique_sentence2": 1996 }, "srp_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515086, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 126.34101151727592, "max_sentence1_length": 439, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515086 + "unique_sentence2": 1996 }, "srp_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 512231, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512231 + "unique_sentence2": 1996 }, "srp_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 511051, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511051 + "unique_sentence2": 1996 }, "srp_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 526030, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526030 + "unique_sentence2": 1996 }, "srp_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 497459, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497459 + "unique_sentence2": 1997 }, "srp_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 503861, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503861 + "unique_sentence2": 1997 }, "srp_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 512066, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512066 + "unique_sentence2": 1997 }, "srp_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 524032, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524032 + "unique_sentence2": 1997 }, "srp_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 534007, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534007 + "unique_sentence2": 1996 }, "srp_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 531034, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531034 + "unique_sentence2": 1996 }, "srp_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509110 + "unique_sentence2": 1996 }, "srp_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 509037, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509037 + "unique_sentence2": 1996 }, "srp_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508444, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508444 + "unique_sentence2": 1995 }, "srp_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518924, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 128.26289434151226, "max_sentence1_length": 452, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 131.58888332498748, "max_sentence2_length": 440, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518924 + "unique_sentence2": 1996 }, "ssw_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 455649, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 455649 + "unique_sentence2": 1994 }, "ssw_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 535862, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535862 + "unique_sentence2": 1997 }, "ssw_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 558108, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558108 + "unique_sentence2": 1997 }, "ssw_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 534243, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534243 + "unique_sentence2": 1997 }, "ssw_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 579641, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 579641 + "unique_sentence2": 1996 }, "ssw_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 525573, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525573 + "unique_sentence2": 1984 }, "ssw_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 579434, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 579434 + "unique_sentence2": 1997 }, "ssw_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 560651, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560651 + "unique_sentence2": 1997 }, "ssw_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 453380, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 453380 + "unique_sentence2": 1996 }, "ssw_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 622425, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 622425 + "unique_sentence2": 1997 }, "ssw_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 527945, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527945 + "unique_sentence2": 1990 }, "ssw_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 556232, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 556232 + "unique_sentence2": 1997 }, "ssw_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 604230, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 604230 + "unique_sentence2": 1996 }, "ssw_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 545874, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 144.28743114672008, "max_sentence1_length": 510, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 545874 + "unique_sentence2": 1996 }, "swa_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 440016, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 440016 + "unique_sentence2": 1994 }, "swa_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 503690, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503690 + "unique_sentence2": 1995 }, "swa_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 516311, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516311 + "unique_sentence2": 1997 }, "swa_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 568159, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 568159 + "unique_sentence2": 1996 }, "swa_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 571636, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571636 + "unique_sentence2": 1996 }, "swa_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 520229, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520229 + "unique_sentence2": 1997 }, "swa_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 515848, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515848 + "unique_sentence2": 1995 }, "swa_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 542309, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542309 + "unique_sentence2": 1996 }, "swa_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 565617, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 565617 + "unique_sentence2": 1996 }, "swa_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 542475, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542475 + "unique_sentence2": 1997 }, "swa_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 472805, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 472805 + "unique_sentence2": 1996 }, "swa_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 534084, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534084 + "unique_sentence2": 1996 }, "swa_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 550885, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550885 + "unique_sentence2": 1997 }, "swa_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 518610, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518610 + "unique_sentence2": 1997 }, "swa_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 559481, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559481 + "unique_sentence2": 1997 }, "swa_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 384135, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 384135 + "unique_sentence2": 1994 }, "swa_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 405914, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 405914 + "unique_sentence2": 1995 }, "swa_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 531906, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531906 + "unique_sentence2": 1995 }, "swa_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 564424, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564424 + "unique_sentence2": 1996 }, "swa_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 564008, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564008 + "unique_sentence2": 1996 }, "swa_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 509940, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509940 + "unique_sentence2": 1984 }, "swa_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 550375, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550375 + "unique_sentence2": 1996 }, "swa_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 549724, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549724 + "unique_sentence2": 1996 }, "swa_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 547402, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547402 + "unique_sentence2": 1996 }, "swa_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 563801, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563801 + "unique_sentence2": 1997 }, "swa_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 560709, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560709 + "unique_sentence2": 1996 }, "swa_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 560651, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560651 + "unique_sentence2": 1996 }, "swa_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 524336, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524336 + "unique_sentence2": 1996 }, "swa_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 582470, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582470 + "unique_sentence2": 1997 }, "swa_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 437747, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 437747 + "unique_sentence2": 1996 }, "swa_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 606792, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 606792 + "unique_sentence2": 1997 }, "swa_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 538122, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538122 + "unique_sentence2": 1997 }, "swa_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 543630, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543630 + "unique_sentence2": 1996 }, "swa_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 512312, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512312 + "unique_sentence2": 1990 }, "swa_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 540599, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540599 + "unique_sentence2": 1997 }, "swa_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 588597, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 588597 + "unique_sentence2": 1996 }, "swa_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 363987, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 363987 + "unique_sentence2": 1996 }, "swa_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 530241, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 136.45918878317477, "max_sentence1_length": 430, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530241 + "unique_sentence2": 1996 }, "swe_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 520179, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 134.37756634952427, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520179 + "unique_sentence2": 1996 }, "swe_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 483008, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 483008 + "unique_sentence2": 1995 }, "swe_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 495629, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495629 + "unique_sentence2": 1997 }, "swe_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 503965, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 126.25838758137206, "max_sentence2_length": 522, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503965 + "unique_sentence2": 1995 }, "swe_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 547477, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547477 + "unique_sentence2": 1996 }, "swe_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 550954, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550954 + "unique_sentence2": 1996 }, "swe_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 499547, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499547 + "unique_sentence2": 1997 }, "swe_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 509630, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0951427140711, "max_sentence2_length": 433, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509630 + "unique_sentence2": 1997 }, "swe_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 495166, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495166 + "unique_sentence2": 1995 }, "swe_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 521627, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521627 + "unique_sentence2": 1996 }, "swe_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 544935, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544935 + "unique_sentence2": 1996 }, "swe_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 452123, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 452123 + "unique_sentence2": 1996 }, "swe_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 513402, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513402 + "unique_sentence2": 1996 }, "swe_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 530203, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530203 + "unique_sentence2": 1997 }, "swe_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 538799, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538799 + "unique_sentence2": 1997 }, "swe_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 514035, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 131.30095142714072, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514035 + "unique_sentence2": 1996 }, "swe_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 363453, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 363453 + "unique_sentence2": 1994 }, "swe_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 385232, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 385232 + "unique_sentence2": 1995 }, "swe_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 511224, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511224 + "unique_sentence2": 1995 }, "swe_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 532584, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 140.58938407611416, "max_sentence2_length": 543, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532584 + "unique_sentence2": 1996 }, "swe_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 543742, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543742 + "unique_sentence2": 1996 }, "swe_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 500184, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.36504757135704, "max_sentence2_length": 417, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 500184 + "unique_sentence2": 1996 }, "swe_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 503271, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.91086629944917, "max_sentence2_length": 482, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503271 + "unique_sentence2": 1996 }, "swe_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 529693, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529693 + "unique_sentence2": 1996 }, "swe_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 529042, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 529042 + "unique_sentence2": 1996 }, "swe_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 526720, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526720 + "unique_sentence2": 1996 }, "swe_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 540027, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540027 + "unique_sentence2": 1996 }, "swe_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 524336, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 524336 + "unique_sentence2": 1997 }, "swe_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 561788, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561788 + "unique_sentence2": 1997 }, "swe_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 517440, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517440 + "unique_sentence2": 1997 }, "swe_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 522948, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522948 + "unique_sentence2": 1996 }, "swe_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 343305, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 343305 + "unique_sentence2": 1996 }, "swe_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 509559, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 126.10265398097145, "max_sentence1_length": 430, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509559 + "unique_sentence2": 1996 }, "tah_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 557343, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 155.04406609914872, "max_sentence1_length": 524, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557343 + "unique_sentence2": 1997 }, "tah_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 610128, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 155.04406609914872, "max_sentence1_length": 524, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 150.478217325989, "max_sentence2_length": 448, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 610128 + "unique_sentence2": 1988 }, "tah_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 603043, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 155.04406609914872, "max_sentence1_length": 524, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 146.93039559339007, "max_sentence2_length": 554, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 603043 + "unique_sentence2": 1997 }, "tah_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 596595, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 155.04406609914872, "max_sentence1_length": 524, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596595 + "unique_sentence2": 1997 }, "tah_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 613775, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 155.04406609914872, "max_sentence1_length": 524, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 152.30445668502753, "max_sentence2_length": 540, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 613775 + "unique_sentence2": 1996 }, "tah_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 629931, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 155.04406609914872, "max_sentence1_length": 524, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 160.39459188783175, "max_sentence2_length": 559, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 629931 + "unique_sentence2": 1994 }, "tah_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 583747, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 155.04406609914872, "max_sentence1_length": 524, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 137.26790185277918, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 583747 + "unique_sentence2": 1997 }, "tah_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 586806, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 155.04406609914872, "max_sentence1_length": 524, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 138.79969954932398, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 586806 + "unique_sentence2": 1997 }, "tah_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 587478, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 155.04406609914872, "max_sentence1_length": 524, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 139.1362043064597, "max_sentence2_length": 431, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587478 + "unique_sentence2": 1996 }, "tah_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 623263, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 155.04406609914872, "max_sentence1_length": 524, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 157.05558337506258, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 623263 + "unique_sentence2": 1997 }, "tam_Taml-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 541142, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 541142 + "unique_sentence2": 1995 }, "tam_Taml-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 553763, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553763 + "unique_sentence2": 1997 }, "tam_Taml-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 605611, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 605611 + "unique_sentence2": 1996 }, "tam_Taml-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 613809, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 613809 + "unique_sentence2": 1996 }, "tam_Taml-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 609088, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 609088 + "unique_sentence2": 1996 }, "tam_Taml-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 557681, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557681 + "unique_sentence2": 1997 }, "tam_Taml-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 585164, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 585164 + "unique_sentence2": 1997 }, "tam_Taml-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 553300, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553300 + "unique_sentence2": 1995 }, "tam_Taml-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 579761, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 579761 + "unique_sentence2": 1996 }, "tam_Taml-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 603069, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 603069 + "unique_sentence2": 1996 }, "tam_Taml-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 554892, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554892 + "unique_sentence2": 1997 }, "tam_Taml-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 510257, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510257 + "unique_sentence2": 1996 }, "tam_Taml-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 571536, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571536 + "unique_sentence2": 1996 }, "tam_Taml-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 588337, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 588337 + "unique_sentence2": 1997 }, "tam_Taml-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 596933, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596933 + "unique_sentence2": 1997 }, "tam_Taml-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 421587, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 421587 + "unique_sentence2": 1994 }, "tam_Taml-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 575497, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 575497 + "unique_sentence2": 1996 }, "tam_Taml-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 443366, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 443366 + "unique_sentence2": 1995 }, "tam_Taml-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 569358, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 569358 + "unique_sentence2": 1995 }, "tam_Taml-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 570848, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570848 + "unique_sentence2": 1995 }, "tam_Taml-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 558184, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558184 + "unique_sentence2": 1996 }, "tam_Taml-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 601876, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 601876 + "unique_sentence2": 1996 }, "tam_Taml-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 560383, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 560383 + "unique_sentence2": 1996 }, "tam_Taml-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 587827, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587827 + "unique_sentence2": 1996 }, "tam_Taml-por_Latn": { + "num_samples": 1997, + "number_of_characters": 587176, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587176 + "unique_sentence2": 1996 }, "tam_Taml-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 584854, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 584854 + "unique_sentence2": 1996 }, "tam_Taml-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 568702, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 568702 + "unique_sentence2": 1996 }, "tam_Taml-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 530288, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530288 + "unique_sentence2": 1996 }, "tam_Taml-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 598161, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 598161 + "unique_sentence2": 1996 }, "tam_Taml-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 582470, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582470 + "unique_sentence2": 1997 }, "tam_Taml-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 561788, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561788 + "unique_sentence2": 1996 }, "tam_Taml-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 557488, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557488 + "unique_sentence2": 1996 }, "tam_Taml-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 575574, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 575574 + "unique_sentence2": 1997 }, "tam_Taml-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 557959, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557959 + "unique_sentence2": 1996 }, "tam_Taml-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 581082, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 581082 + "unique_sentence2": 1996 }, "tam_Taml-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 401439, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 401439 + "unique_sentence2": 1996 }, "tam_Taml-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 567693, + "unique_pairs": 1997, "min_sentence1_length": 11, "average_sentence1_length": 155.21331997996995, "max_sentence1_length": 581, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 567693 + "unique_sentence2": 1996 }, "tat_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 515560, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 123.14772158237356, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 135.0195292939409, "max_sentence2_length": 398, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515560 + "unique_sentence2": 1997 }, "tat_Cyrl-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 492252, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 123.14772158237356, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 123.34802203304957, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492252 + "unique_sentence2": 1995 }, "tat_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 493646, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 123.14772158237356, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 493646 + "unique_sentence2": 1997 }, "tat_Cyrl-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506202, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 123.14772158237356, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 130.33350025037555, "max_sentence2_length": 473, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506202 + "unique_sentence2": 1996 }, "tat_Cyrl-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 496790, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 123.14772158237356, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.62043064596895, "max_sentence2_length": 395, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496790 + "unique_sentence2": 1996 }, "tat_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 531200, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 123.14772158237356, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 142.85127691537306, "max_sentence2_length": 576, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531200 + "unique_sentence2": 1996 }, "tat_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 511539, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 123.14772158237356, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511539 + "unique_sentence2": 1997 }, "tat_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 556948, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 123.14772158237356, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 155.74461692538807, "max_sentence2_length": 592, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 556948 + "unique_sentence2": 1996 }, "tat_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 539621, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 123.14772158237356, "max_sentence1_length": 539, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 147.06810215322986, "max_sentence2_length": 470, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539621 + "unique_sentence2": 1996 }, "tel_Telu-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 491329, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 491329 + "unique_sentence2": 1997 }, "tel_Telu-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 551375, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551375 + "unique_sentence2": 1996 }, "tel_Telu-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 495247, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495247 + "unique_sentence2": 1997 }, "tel_Telu-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 522730, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522730 + "unique_sentence2": 1997 }, "tel_Telu-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 492458, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492458 + "unique_sentence2": 1997 }, "tel_Telu-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509102, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509102 + "unique_sentence2": 1996 }, "tel_Telu-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513063, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513063 + "unique_sentence2": 1996 }, "tel_Telu-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 508414, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508414 + "unique_sentence2": 1995 }, "tel_Telu-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 495750, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495750 + "unique_sentence2": 1996 }, "tel_Telu-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 497949, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497949 + "unique_sentence2": 1996 }, "tel_Telu-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506268, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506268 + "unique_sentence2": 1996 }, "tel_Telu-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 467854, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 467854 + "unique_sentence2": 1996 }, "tel_Telu-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 557488, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557488 + "unique_sentence2": 1997 }, "tel_Telu-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 495525, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 123.9494241362043, "max_sentence1_length": 412, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.18527791687531, "max_sentence2_length": 390, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495525 + "unique_sentence2": 1996 }, "tgk_Cyrl-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 505328, + "unique_pairs": 1995, "min_sentence1_length": 3, "average_sentence1_length": 137.27941912869304, "max_sentence1_length": 451, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505328 + "unique_sentence2": 1995 }, "tgk_Cyrl-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 526514, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 137.27941912869304, "max_sentence1_length": 451, + "unique_sentence1": 1995, "min_sentence2_length": 5, "average_sentence2_length": 126.37305958938407, "max_sentence2_length": 399, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 526514 + "unique_sentence2": 1995 }, "tgk_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 521867, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 137.27941912869304, "max_sentence1_length": 451, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 521867 + "unique_sentence2": 1997 }, "tgk_Cyrl-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 517486, + "unique_pairs": 1995, "min_sentence1_length": 3, "average_sentence1_length": 137.27941912869304, "max_sentence1_length": 451, + "unique_sentence1": 1995, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517486 + "unique_sentence2": 1995 }, "tgk_Cyrl-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 474443, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 137.27941912869304, "max_sentence1_length": 451, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 474443 + "unique_sentence2": 1996 }, "tgk_Cyrl-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 520093, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 137.27941912869304, "max_sentence1_length": 451, + "unique_sentence1": 1995, "min_sentence2_length": 7, "average_sentence2_length": 123.15773660490736, "max_sentence2_length": 420, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 520093 + "unique_sentence2": 1996 }, "tgk_Cyrl-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 487982, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 137.27941912869304, "max_sentence1_length": 451, + "unique_sentence1": 1995, "min_sentence2_length": 6, "average_sentence2_length": 107.07811717576365, "max_sentence2_length": 392, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 487982 + "unique_sentence2": 1993 }, "tgk_Cyrl-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 516683, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 137.27941912869304, "max_sentence1_length": 451, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 121.45017526289435, "max_sentence2_length": 365, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516683 + "unique_sentence2": 1997 }, "tgk_Cyrl-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 516780, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 137.27941912869304, "max_sentence1_length": 451, + "unique_sentence1": 1995, "min_sentence2_length": 8, "average_sentence2_length": 121.49874812218327, "max_sentence2_length": 366, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516780 + "unique_sentence2": 1996 }, "tgk_Cyrl-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 489060, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 137.27941912869304, "max_sentence1_length": 451, + "unique_sentence1": 1995, "min_sentence2_length": 3, "average_sentence2_length": 107.6179268903355, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 489060 + "unique_sentence2": 1996 }, "tha_Thai-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 538097, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 118.91236855282925, "max_sentence1_length": 439, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 150.54031046569855, "max_sentence2_length": 478, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538097 + "unique_sentence2": 1993 }, "tha_Thai-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 480689, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 118.91236855282925, "max_sentence1_length": 439, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 121.79318978467701, "max_sentence2_length": 411, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 480689 + "unique_sentence2": 1992 }, "tha_Thai-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 485188, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 118.91236855282925, "max_sentence1_length": 439, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485188 + "unique_sentence2": 1997 }, "tha_Thai-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 525959, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 118.91236855282925, "max_sentence1_length": 439, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 144.4621932899349, "max_sentence2_length": 517, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525959 + "unique_sentence2": 1996 }, "tha_Thai-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 504448, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 118.91236855282925, "max_sentence1_length": 439, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 133.69053580370556, "max_sentence2_length": 507, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 504448 + "unique_sentence2": 1997 }, "tha_Thai-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 496516, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 118.91236855282925, "max_sentence1_length": 439, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 129.7185778668002, "max_sentence2_length": 414, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496516 + "unique_sentence2": 1997 }, "tha_Thai-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 549322, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 118.91236855282925, "max_sentence1_length": 439, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 156.16124186279418, "max_sentence2_length": 773, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549322 + "unique_sentence2": 1997 }, "tir_Ethi-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 332745, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 332745 + "unique_sentence2": 1994 }, "tir_Ethi-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 412958, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 412958 + "unique_sentence2": 1997 }, "tir_Ethi-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 435204, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 435204 + "unique_sentence2": 1997 }, "tir_Ethi-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 411339, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 411339 + "unique_sentence2": 1997 }, "tir_Ethi-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 456737, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 456737 + "unique_sentence2": 1996 }, "tir_Ethi-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 402669, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 402669 + "unique_sentence2": 1984 }, "tir_Ethi-som_Latn": { + "num_samples": 1997, + "number_of_characters": 456530, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 456530 + "unique_sentence2": 1997 }, "tir_Ethi-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 453380, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 453380 + "unique_sentence2": 1996 }, "tir_Ethi-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 437747, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 437747 + "unique_sentence2": 1997 }, "tir_Ethi-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 499521, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499521 + "unique_sentence2": 1997 }, "tir_Ethi-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 405041, + "unique_pairs": 1996, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 405041 + "unique_sentence2": 1990 }, "tir_Ethi-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 433328, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 433328 + "unique_sentence2": 1997 }, "tir_Ethi-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 481326, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 481326 + "unique_sentence2": 1996 }, "tir_Ethi-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 422970, + "unique_pairs": 1997, "min_sentence1_length": 5, "average_sentence1_length": 82.743114672008, "max_sentence1_length": 272, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 422970 + "unique_sentence2": 1996 }, "ton_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 561360, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 157.05558337506258, "max_sentence1_length": 468, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561360 + "unique_sentence2": 1997 }, "ton_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 614145, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 157.05558337506258, "max_sentence1_length": 468, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 150.478217325989, "max_sentence2_length": 448, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 614145 + "unique_sentence2": 1988 }, "ton_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 607060, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 157.05558337506258, "max_sentence1_length": 468, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 146.93039559339007, "max_sentence2_length": 554, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 607060 + "unique_sentence2": 1997 }, "ton_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 600612, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 157.05558337506258, "max_sentence1_length": 468, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 600612 + "unique_sentence2": 1997 }, "ton_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 617792, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 157.05558337506258, "max_sentence1_length": 468, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 152.30445668502753, "max_sentence2_length": 540, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 617792 + "unique_sentence2": 1996 }, "ton_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 633948, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 157.05558337506258, "max_sentence1_length": 468, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 160.39459188783175, "max_sentence2_length": 559, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 633948 + "unique_sentence2": 1994 }, "ton_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 587764, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 157.05558337506258, "max_sentence1_length": 468, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 137.26790185277918, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 587764 + "unique_sentence2": 1997 }, "ton_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 590823, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 157.05558337506258, "max_sentence1_length": 468, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 138.79969954932398, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 590823 + "unique_sentence2": 1997 }, "ton_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 591495, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 157.05558337506258, "max_sentence1_length": 468, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 139.1362043064597, "max_sentence2_length": 431, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 591495 + "unique_sentence2": 1996 }, "ton_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 623263, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 157.05558337506258, "max_sentence1_length": 468, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 155.04406609914872, "max_sentence2_length": 524, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 623263 + "unique_sentence2": 1997 }, "tsn_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 501790, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 501790 + "unique_sentence2": 1994 }, "tsn_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 582003, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 582003 + "unique_sentence2": 1997 }, "tsn_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 604249, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 604249 + "unique_sentence2": 1997 }, "tsn_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 580384, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580384 + "unique_sentence2": 1997 }, "tsn_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 625782, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 625782 + "unique_sentence2": 1996 }, "tsn_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 571714, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571714 + "unique_sentence2": 1984 }, "tsn_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 625575, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 625575 + "unique_sentence2": 1997 }, "tsn_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 622425, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 622425 + "unique_sentence2": 1996 }, "tsn_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 606792, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 606792 + "unique_sentence2": 1997 }, "tsn_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 499521, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 499521 + "unique_sentence2": 1996 }, "tsn_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 574086, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 574086 + "unique_sentence2": 1990 }, "tsn_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 602373, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 602373 + "unique_sentence2": 1997 }, "tsn_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 650371, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 650371 + "unique_sentence2": 1996 }, "tsn_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 592015, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 167.39258888332498, "max_sentence1_length": 556, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 592015 + "unique_sentence2": 1996 }, "tuk_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 554908, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 142.85127691537306, "max_sentence1_length": 576, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 135.0195292939409, "max_sentence2_length": 398, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 554908 + "unique_sentence2": 1997 }, "tuk_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 531600, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 142.85127691537306, "max_sentence1_length": 576, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 123.34802203304957, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531600 + "unique_sentence2": 1995 }, "tuk_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 532994, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 142.85127691537306, "max_sentence1_length": 576, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532994 + "unique_sentence2": 1997 }, "tuk_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 545550, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 142.85127691537306, "max_sentence1_length": 576, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 130.33350025037555, "max_sentence2_length": 473, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 545550 + "unique_sentence2": 1996 }, "tuk_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 536138, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 142.85127691537306, "max_sentence1_length": 576, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.62043064596895, "max_sentence2_length": 395, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536138 + "unique_sentence2": 1996 }, "tuk_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 531200, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 142.85127691537306, "max_sentence1_length": 576, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 123.14772158237356, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531200 + "unique_sentence2": 1996 }, "tuk_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 550887, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 142.85127691537306, "max_sentence1_length": 576, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550887 + "unique_sentence2": 1997 }, "tuk_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 596296, + "unique_pairs": 1997, "min_sentence1_length": 9, "average_sentence1_length": 142.85127691537306, "max_sentence1_length": 576, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 155.74461692538807, "max_sentence2_length": 592, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596296 + "unique_sentence2": 1996 }, "tuk_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 578969, + "unique_pairs": 1996, "min_sentence1_length": 9, "average_sentence1_length": 142.85127691537306, "max_sentence1_length": 576, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 147.06810215322986, "max_sentence2_length": 470, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578969 + "unique_sentence2": 1996 }, "tur_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 496794, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496794 + "unique_sentence2": 1995 }, "tur_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 535247, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 135.0195292939409, "max_sentence2_length": 398, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535247 + "unique_sentence2": 1997 }, "tur_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511939, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 123.34802203304957, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511939 + "unique_sentence2": 1995 }, "tur_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 509415, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509415 + "unique_sentence2": 1997 }, "tur_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 561263, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561263 + "unique_sentence2": 1996 }, "tur_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 564740, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564740 + "unique_sentence2": 1996 }, "tur_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 513333, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513333 + "unique_sentence2": 1997 }, "tur_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 508952, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508952 + "unique_sentence2": 1995 }, "tur_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 535413, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535413 + "unique_sentence2": 1996 }, "tur_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 558721, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558721 + "unique_sentence2": 1996 }, "tur_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 465909, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 465909 + "unique_sentence2": 1996 }, "tur_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 527188, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527188 + "unique_sentence2": 1996 }, "tur_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 543989, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543989 + "unique_sentence2": 1997 }, "tur_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 552585, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 552585 + "unique_sentence2": 1997 }, "tur_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 377239, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 377239 + "unique_sentence2": 1994 }, "tur_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525889, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 130.33350025037555, "max_sentence2_length": 473, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525889 + "unique_sentence2": 1996 }, "tur_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516477, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 125.62043064596895, "max_sentence2_length": 395, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 516477 + "unique_sentence2": 1996 }, "tur_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 399018, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 399018 + "unique_sentence2": 1995 }, "tur_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 525010, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525010 + "unique_sentence2": 1995 }, "tur_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 557528, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557528 + "unique_sentence2": 1996 }, "tur_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 543479, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543479 + "unique_sentence2": 1996 }, "tur_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 542828, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 542828 + "unique_sentence2": 1996 }, "tur_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 540506, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540506 + "unique_sentence2": 1996 }, "tur_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 553813, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553813 + "unique_sentence2": 1996 }, "tur_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 538122, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538122 + "unique_sentence2": 1997 }, "tur_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 517440, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517440 + "unique_sentence2": 1996 }, "tur_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 575574, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 575574 + "unique_sentence2": 1997 }, "tur_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511539, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 123.14772158237356, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 511539 + "unique_sentence2": 1996 }, "tur_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 550887, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 142.85127691537306, "max_sentence2_length": 576, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550887 + "unique_sentence2": 1996 }, "tur_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 576635, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 155.74461692538807, "max_sentence2_length": 592, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 576635 + "unique_sentence2": 1996 }, "tur_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 559308, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 147.06810215322986, "max_sentence2_length": 470, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559308 + "unique_sentence2": 1996 }, "tur_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 536734, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536734 + "unique_sentence2": 1996 }, "tur_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 357091, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 357091 + "unique_sentence2": 1996 }, "tur_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 523345, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 133.00600901352027, "max_sentence1_length": 504, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523345 + "unique_sentence2": 1996 }, "uig_Arab-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 580656, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 155.74461692538807, "max_sentence1_length": 592, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 135.0195292939409, "max_sentence2_length": 398, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 580656 + "unique_sentence2": 1997 }, "uig_Arab-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 557348, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 155.74461692538807, "max_sentence1_length": 592, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 123.34802203304957, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557348 + "unique_sentence2": 1995 }, "uig_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 558742, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 155.74461692538807, "max_sentence1_length": 592, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558742 + "unique_sentence2": 1997 }, "uig_Arab-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 571298, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 155.74461692538807, "max_sentence1_length": 592, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 130.33350025037555, "max_sentence2_length": 473, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 571298 + "unique_sentence2": 1996 }, "uig_Arab-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 561886, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 155.74461692538807, "max_sentence1_length": 592, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.62043064596895, "max_sentence2_length": 395, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 561886 + "unique_sentence2": 1996 }, "uig_Arab-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 556948, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 155.74461692538807, "max_sentence1_length": 592, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 123.14772158237356, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 556948 + "unique_sentence2": 1996 }, "uig_Arab-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 596296, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 155.74461692538807, "max_sentence1_length": 592, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 142.85127691537306, "max_sentence2_length": 576, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 596296 + "unique_sentence2": 1996 }, "uig_Arab-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 576635, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 155.74461692538807, "max_sentence1_length": 592, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 576635 + "unique_sentence2": 1997 }, "uig_Arab-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 604717, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 155.74461692538807, "max_sentence1_length": 592, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 147.06810215322986, "max_sentence2_length": 470, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 604717 + "unique_sentence2": 1996 }, "ukr_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518873, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.2373560340511, "max_sentence2_length": 422, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518873 + "unique_sentence2": 1996 }, "ukr_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 517693, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 127.64646970455684, "max_sentence2_length": 434, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517693 + "unique_sentence2": 1996 }, "ukr_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 532672, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 135.14722083124687, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532672 + "unique_sentence2": 1996 }, "ukr_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 504101, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 120.84026039058588, "max_sentence2_length": 474, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 504101 + "unique_sentence2": 1997 }, "ukr_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 510503, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 510503 + "unique_sentence2": 1997 }, "ukr_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 518708, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 128.1547320981472, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518708 + "unique_sentence2": 1997 }, "ukr_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530674, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.1467200801202, "max_sentence2_length": 451, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530674 + "unique_sentence2": 1997 }, "ukr_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 540649, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540649 + "unique_sentence2": 1996 }, "ukr_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537676, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 537676 + "unique_sentence2": 1996 }, "ukr_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 515752, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 126.67451176765148, "max_sentence2_length": 403, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515752 + "unique_sentence2": 1996 }, "ukr_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 515679, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.63795693540311, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515679 + "unique_sentence2": 1996 }, "ukr_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515086, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 126.34101151727592, "max_sentence2_length": 439, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515086 + "unique_sentence2": 1995 }, "ukr_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 518924, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 131.58888332498748, "max_sentence1_length": 440, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 128.26289434151226, "max_sentence2_length": 452, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518924 + "unique_sentence2": 1996 }, "urd_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 491800, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 491800 + "unique_sentence2": 1997 }, "urd_Arab-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 551846, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 152.15222834251378, "max_sentence2_length": 609, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 551846 + "unique_sentence2": 1996 }, "urd_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 495718, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495718 + "unique_sentence2": 1997 }, "urd_Arab-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 523201, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 137.80821231847773, "max_sentence2_length": 393, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523201 + "unique_sentence2": 1997 }, "urd_Arab-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 492929, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 122.64947421131697, "max_sentence2_length": 378, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 492929 + "unique_sentence2": 1997 }, "urd_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509573, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509573 + "unique_sentence2": 1996 }, "urd_Arab-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513534, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 132.96745117676514, "max_sentence2_length": 449, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 513534 + "unique_sentence2": 1996 }, "urd_Arab-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 508885, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 130.63945918878318, "max_sentence2_length": 443, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 508885 + "unique_sentence2": 1995 }, "urd_Arab-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 496221, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 124.29794692038057, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 496221 + "unique_sentence2": 1996 }, "urd_Arab-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 498420, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 125.39909864797195, "max_sentence2_length": 383, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 498420 + "unique_sentence2": 1996 }, "urd_Arab-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506739, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 129.56484727090637, "max_sentence2_length": 441, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 506739 + "unique_sentence2": 1996 }, "urd_Arab-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 468325, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 110.32899349023535, "max_sentence2_length": 335, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 468325 + "unique_sentence2": 1996 }, "urd_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 557959, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 557959 + "unique_sentence2": 1997 }, "urd_Arab-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 495525, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 124.18527791687531, "max_sentence1_length": 390, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 123.9494241362043, "max_sentence2_length": 412, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495525 + "unique_sentence2": 1996 }, "uzb_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 563329, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 147.06810215322986, "max_sentence1_length": 470, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 135.0195292939409, "max_sentence2_length": 398, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563329 + "unique_sentence2": 1997 }, "uzb_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 540021, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 147.06810215322986, "max_sentence1_length": 470, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 123.34802203304957, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540021 + "unique_sentence2": 1995 }, "uzb_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 541415, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 147.06810215322986, "max_sentence1_length": 470, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 541415 + "unique_sentence2": 1997 }, "uzb_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 553971, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 147.06810215322986, "max_sentence1_length": 470, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 130.33350025037555, "max_sentence2_length": 473, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553971 + "unique_sentence2": 1996 }, "uzb_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544559, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 147.06810215322986, "max_sentence1_length": 470, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 125.62043064596895, "max_sentence2_length": 395, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544559 + "unique_sentence2": 1996 }, "uzb_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 539621, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 147.06810215322986, "max_sentence1_length": 470, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 123.14772158237356, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 539621 + "unique_sentence2": 1996 }, "uzb_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 578969, + "unique_pairs": 1996, "min_sentence1_length": 8, "average_sentence1_length": 147.06810215322986, "max_sentence1_length": 470, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 142.85127691537306, "max_sentence2_length": 576, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 578969 + "unique_sentence2": 1996 }, "uzb_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 559308, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 147.06810215322986, "max_sentence1_length": 470, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559308 + "unique_sentence2": 1997 }, "uzb_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 604717, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 147.06810215322986, "max_sentence1_length": 470, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 155.74461692538807, "max_sentence2_length": 592, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 604717 + "unique_sentence2": 1996 }, "ven_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 598248, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.10315473209815, "max_sentence1_length": 535, + "unique_sentence1": 1993, "min_sentence2_length": 8, "average_sentence2_length": 149.47020530796195, "max_sentence2_length": 465, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 598248 + "unique_sentence2": 1997 }, "ven_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 547476, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.10315473209815, "max_sentence1_length": 535, + "unique_sentence1": 1993, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 547476 + "unique_sentence2": 1997 }, "ven_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 538734, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 150.10315473209815, "max_sentence1_length": 535, + "unique_sentence1": 1993, "min_sentence2_length": 6, "average_sentence2_length": 119.6685027541312, "max_sentence2_length": 493, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538734 + "unique_sentence2": 1994 }, "ven_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 528236, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.10315473209815, "max_sentence1_length": 535, + "unique_sentence1": 1993, "min_sentence2_length": 6, "average_sentence2_length": 114.4116174261392, "max_sentence2_length": 376, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 528236 + "unique_sentence2": 1996 }, "ven_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 603543, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 150.10315473209815, "max_sentence1_length": 535, + "unique_sentence1": 1993, "min_sentence2_length": 10, "average_sentence2_length": 152.12168252378567, "max_sentence2_length": 541, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 603543 + "unique_sentence2": 1996 }, "ven_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 597495, + "unique_pairs": 1997, "min_sentence1_length": 10, "average_sentence1_length": 150.10315473209815, "max_sentence1_length": 535, + "unique_sentence1": 1993, "min_sentence2_length": 6, "average_sentence2_length": 149.09313970956435, "max_sentence2_length": 590, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 597495 + "unique_sentence2": 1997 }, "ven_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 584038, + "unique_pairs": 1996, "min_sentence1_length": 10, "average_sentence1_length": 150.10315473209815, "max_sentence1_length": 535, + "unique_sentence1": 1993, "min_sentence2_length": 10, "average_sentence2_length": 142.35453179769655, "max_sentence2_length": 464, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 584038 + "unique_sentence2": 1993 }, "ven_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 598086, + "unique_pairs": 1995, "min_sentence1_length": 10, "average_sentence1_length": 150.10315473209815, "max_sentence1_length": 535, + "unique_sentence1": 1993, "min_sentence2_length": 6, "average_sentence2_length": 149.38908362543816, "max_sentence2_length": 511, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 598086 + "unique_sentence2": 1995 }, "vie_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 502302, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 502302 + "unique_sentence2": 1995 }, "vie_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 514923, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514923 + "unique_sentence2": 1997 }, "vie_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 566771, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 566771 + "unique_sentence2": 1996 }, "vie_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 570248, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 570248 + "unique_sentence2": 1996 }, "vie_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 518841, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 518841 + "unique_sentence2": 1997 }, "vie_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 514460, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514460 + "unique_sentence2": 1995 }, "vie_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 540921, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540921 + "unique_sentence2": 1996 }, "vie_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 564229, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 564229 + "unique_sentence2": 1996 }, "vie_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 471417, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 471417 + "unique_sentence2": 1996 }, "vie_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 532696, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532696 + "unique_sentence2": 1996 }, "vie_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 549497, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549497 + "unique_sentence2": 1997 }, "vie_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 558093, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 558093 + "unique_sentence2": 1997 }, "vie_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 382747, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 382747 + "unique_sentence2": 1994 }, "vie_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 404526, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 404526 + "unique_sentence2": 1995 }, "vie_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 530518, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530518 + "unique_sentence2": 1995 }, "vie_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 563036, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563036 + "unique_sentence2": 1996 }, "vie_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 548987, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548987 + "unique_sentence2": 1996 }, "vie_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 548336, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 548336 + "unique_sentence2": 1996 }, "vie_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 546014, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 546014 + "unique_sentence2": 1996 }, "vie_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 559321, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559321 + "unique_sentence2": 1996 }, "vie_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 543630, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 543630 + "unique_sentence2": 1997 }, "vie_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 522948, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 522948 + "unique_sentence2": 1996 }, "vie_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 581082, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 581082 + "unique_sentence2": 1997 }, "vie_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 536734, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536734 + "unique_sentence2": 1997 }, "vie_Latn-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 350008, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 39.502754131196795, "max_sentence2_length": 133, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 350008 + "unique_sentence2": 1996 }, "vie_Latn-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 356082, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 42.54431647471207, "max_sentence2_length": 263, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 356082 + "unique_sentence2": 1997 }, "vie_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 362599, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 362599 + "unique_sentence2": 1996 }, "vie_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 528853, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 135.764146219329, "max_sentence1_length": 437, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 528853 + "unique_sentence2": 1996 }, "wol_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 407310, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 407310 + "unique_sentence2": 1994 }, "wol_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 487523, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 487523 + "unique_sentence2": 1997 }, "wol_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 509769, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509769 + "unique_sentence2": 1997 }, "wol_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 485904, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 485904 + "unique_sentence2": 1997 }, "wol_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 531302, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531302 + "unique_sentence2": 1996 }, "wol_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 477234, + "unique_pairs": 1992, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 477234 + "unique_sentence2": 1984 }, "wol_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 531095, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 531095 + "unique_sentence2": 1997 }, "wol_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 527945, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527945 + "unique_sentence2": 1996 }, "wol_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 512312, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 512312 + "unique_sentence2": 1997 }, "wol_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 405041, + "unique_pairs": 1996, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 405041 + "unique_sentence2": 1996 }, "wol_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 574086, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 574086 + "unique_sentence2": 1997 }, "wol_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 507893, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507893 + "unique_sentence2": 1997 }, "wol_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 555891, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555891 + "unique_sentence2": 1996 }, "wol_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 497535, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 120.08162243365048, "max_sentence1_length": 405, + "unique_sentence1": 1990, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497535 + "unique_sentence2": 1996 }, "xho_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 435597, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 435597 + "unique_sentence2": 1994 }, "xho_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 515810, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 515810 + "unique_sentence2": 1997 }, "xho_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 538056, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 538056 + "unique_sentence2": 1997 }, "xho_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 514191, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 514191 + "unique_sentence2": 1997 }, "xho_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 559589, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559589 + "unique_sentence2": 1996 }, "xho_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 505521, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505521 + "unique_sentence2": 1984 }, "xho_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 559382, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 559382 + "unique_sentence2": 1997 }, "xho_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 556232, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 556232 + "unique_sentence2": 1996 }, "xho_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 540599, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 540599 + "unique_sentence2": 1997 }, "xho_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 433328, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 433328 + "unique_sentence2": 1996 }, "xho_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 602373, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 602373 + "unique_sentence2": 1997 }, "xho_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 507893, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 507893 + "unique_sentence2": 1990 }, "xho_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 584178, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 584178 + "unique_sentence2": 1996 }, "xho_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 525822, + "unique_pairs": 1997, "min_sentence1_length": 6, "average_sentence1_length": 134.2463695543315, "max_sentence1_length": 492, + "unique_sentence1": 1997, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525822 + "unique_sentence2": 1996 }, "yor_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 483595, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 483595 + "unique_sentence2": 1994 }, "yor_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 563808, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 563808 + "unique_sentence2": 1997 }, "yor_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 586054, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 586054 + "unique_sentence2": 1997 }, "yor_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 562189, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 562189 + "unique_sentence2": 1997 }, "yor_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 607587, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 607587 + "unique_sentence2": 1996 }, "yor_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 553519, + "unique_pairs": 1996, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553519 + "unique_sentence2": 1984 }, "yor_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 607380, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 607380 + "unique_sentence2": 1997 }, "yor_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 604230, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 604230 + "unique_sentence2": 1996 }, "yor_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 588597, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 588597 + "unique_sentence2": 1997 }, "yor_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 481326, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 481326 + "unique_sentence2": 1996 }, "yor_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 650371, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 650371 + "unique_sentence2": 1997 }, "yor_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 555891, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 555891 + "unique_sentence2": 1990 }, "yor_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 584178, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 584178 + "unique_sentence2": 1997 }, "yor_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 573820, + "unique_pairs": 1997, "min_sentence1_length": 7, "average_sentence1_length": 158.2814221331998, "max_sentence1_length": 582, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 573820 + "unique_sentence2": 1996 }, "yue_Hant-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 326607, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 39.502754131196795, "max_sentence1_length": 133, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 326607 + "unique_sentence2": 1997 }, "yue_Hant-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 190513, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 39.502754131196795, "max_sentence1_length": 133, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 190513 + "unique_sentence2": 1994 }, "yue_Hant-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 212292, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 39.502754131196795, "max_sentence1_length": 133, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 212292 + "unique_sentence2": 1995 }, "yue_Hant-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 350008, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 39.502754131196795, "max_sentence1_length": 133, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 350008 + "unique_sentence2": 1996 }, "yue_Hant-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 163848, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 39.502754131196795, "max_sentence1_length": 133, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 42.54431647471207, "max_sentence2_length": 263, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 163848 + "unique_sentence2": 1997 }, "yue_Hant-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 170365, + "unique_pairs": 1996, "min_sentence1_length": 4, "average_sentence1_length": 39.502754131196795, "max_sentence1_length": 133, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 170365 + "unique_sentence2": 1996 }, "zho_Hans-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 332681, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 42.54431647471207, "max_sentence1_length": 263, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 332681 + "unique_sentence2": 1997 }, "zho_Hans-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 196587, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 42.54431647471207, "max_sentence1_length": 263, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 196587 + "unique_sentence2": 1994 }, "zho_Hans-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 218366, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 42.54431647471207, "max_sentence1_length": 263, + "unique_sentence1": 1997, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 218366 + "unique_sentence2": 1995 }, "zho_Hans-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 356082, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 42.54431647471207, "max_sentence1_length": 263, + "unique_sentence1": 1997, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 356082 + "unique_sentence2": 1996 }, "zho_Hans-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 163848, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 42.54431647471207, "max_sentence1_length": 263, + "unique_sentence1": 1997, "min_sentence2_length": 4, "average_sentence2_length": 39.502754131196795, "max_sentence2_length": 133, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 163848 + "unique_sentence2": 1996 }, "zho_Hans-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 176439, + "unique_pairs": 1997, "min_sentence1_length": 4, "average_sentence1_length": 42.54431647471207, "max_sentence1_length": 263, + "unique_sentence1": 1997, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 176439 + "unique_sentence2": 1996 }, "zho_Hant-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 322659, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 322659 + "unique_sentence2": 1995 }, "zho_Hant-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 335280, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 335280 + "unique_sentence2": 1997 }, "zho_Hant-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 387128, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 387128 + "unique_sentence2": 1996 }, "zho_Hant-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 390605, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 390605 + "unique_sentence2": 1996 }, "zho_Hant-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 339198, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 339198 + "unique_sentence2": 1997 }, "zho_Hant-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 334817, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 334817 + "unique_sentence2": 1995 }, "zho_Hant-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 361278, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 361278 + "unique_sentence2": 1996 }, "zho_Hant-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 384586, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 384586 + "unique_sentence2": 1996 }, "zho_Hant-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 291774, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 291774 + "unique_sentence2": 1996 }, "zho_Hant-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 353053, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 353053 + "unique_sentence2": 1996 }, "zho_Hant-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 369854, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 369854 + "unique_sentence2": 1997 }, "zho_Hant-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 378450, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 378450 + "unique_sentence2": 1997 }, "zho_Hant-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 203104, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 203104 + "unique_sentence2": 1994 }, "zho_Hant-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 224883, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 224883 + "unique_sentence2": 1995 }, "zho_Hant-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 350875, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 350875 + "unique_sentence2": 1995 }, "zho_Hant-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 383393, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 383393 + "unique_sentence2": 1996 }, "zho_Hant-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 369344, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 369344 + "unique_sentence2": 1996 }, "zho_Hant-por_Latn": { + "num_samples": 1997, + "number_of_characters": 368693, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 368693 + "unique_sentence2": 1996 }, "zho_Hant-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 366371, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 366371 + "unique_sentence2": 1996 }, "zho_Hant-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 379678, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 379678 + "unique_sentence2": 1996 }, "zho_Hant-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 363987, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 363987 + "unique_sentence2": 1997 }, "zho_Hant-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 343305, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 343305 + "unique_sentence2": 1996 }, "zho_Hant-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 401439, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 401439 + "unique_sentence2": 1997 }, "zho_Hant-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 357091, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 357091 + "unique_sentence2": 1997 }, "zho_Hant-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 362599, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 362599 + "unique_sentence2": 1996 }, "zho_Hant-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 170365, + "unique_pairs": 1996, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 39.502754131196795, "max_sentence2_length": 133, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 170365 + "unique_sentence2": 1996 }, "zho_Hant-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 176439, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 42.54431647471207, "max_sentence2_length": 263, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 176439 + "unique_sentence2": 1997 }, "zho_Hant-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 349210, + "unique_pairs": 1997, "min_sentence1_length": 3, "average_sentence1_length": 45.80771156735103, "max_sentence1_length": 200, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 129.0595893840761, "max_sentence2_length": 494, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 349210 + "unique_sentence2": 1996 }, "zul_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 425239, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 83.87931897846771, "max_sentence2_length": 290, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 425239 + "unique_sentence2": 1994 }, "zul_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 488913, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 115.76414621932899, "max_sentence2_length": 362, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 488913 + "unique_sentence2": 1995 }, "zul_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 501534, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 122.08412618928392, "max_sentence2_length": 402, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 501534 + "unique_sentence2": 1997 }, "zul_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 553382, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 148.04707060590886, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 553382 + "unique_sentence2": 1996 }, "zul_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 556859, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 149.78818227341011, "max_sentence2_length": 584, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 556859 + "unique_sentence2": 1996 }, "zul_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 505452, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 124.04606910365548, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 505452 + "unique_sentence2": 1997 }, "zul_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 501071, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 121.85227841762644, "max_sentence2_length": 389, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 501071 + "unique_sentence2": 1995 }, "zul_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 527532, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.10265398097147, "max_sentence2_length": 463, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527532 + "unique_sentence2": 1996 }, "zul_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 550840, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.7741612418628, "max_sentence2_length": 512, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 550840 + "unique_sentence2": 1996 }, "zul_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 527698, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 135.185778668002, "max_sentence2_length": 483, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 527698 + "unique_sentence2": 1997 }, "zul_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 458028, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 100.29844767150726, "max_sentence2_length": 375, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 458028 + "unique_sentence2": 1996 }, "zul_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 519307, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 130.9839759639459, "max_sentence2_length": 394, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 519307 + "unique_sentence2": 1996 }, "zul_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 536108, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 139.3970956434652, "max_sentence2_length": 508, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 536108 + "unique_sentence2": 1997 }, "zul_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 503833, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 123.23535302954431, "max_sentence2_length": 469, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 503833 + "unique_sentence2": 1997 }, "zul_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 544704, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 143.70155232849274, "max_sentence2_length": 486, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 544704 + "unique_sentence2": 1997 }, "zul_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 369358, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 4, "average_sentence2_length": 55.89684526790185, "max_sentence2_length": 189, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 369358 + "unique_sentence2": 1994 }, "zul_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 391137, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 66.80270405608412, "max_sentence2_length": 217, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 391137 + "unique_sentence2": 1995 }, "zul_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 517129, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 129.893340010015, "max_sentence2_length": 446, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 517129 + "unique_sentence2": 1995 }, "zul_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 549647, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 146.1767651477216, "max_sentence2_length": 539, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549647 + "unique_sentence2": 1996 }, "zul_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 549231, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 145.96845267901853, "max_sentence2_length": 487, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549231 + "unique_sentence2": 1996 }, "zul_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 495163, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 118.89384076114172, "max_sentence2_length": 466, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 495163 + "unique_sentence2": 1984 }, "zul_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 535598, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 9, "average_sentence2_length": 139.14171256885328, "max_sentence2_length": 468, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 535598 + "unique_sentence2": 1996 }, "zul_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 534947, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 138.81572358537807, "max_sentence2_length": 497, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 534947 + "unique_sentence2": 1996 }, "zul_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 532625, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 137.6529794692038, "max_sentence2_length": 419, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 532625 + "unique_sentence2": 1996 }, "zul_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 549024, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 145.8647971957937, "max_sentence2_length": 455, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 549024 + "unique_sentence2": 1997 }, "zul_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 545932, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 1, "average_sentence2_length": 144.3164747120681, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 545932 + "unique_sentence2": 1996 }, "zul_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 545874, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 144.28743114672008, "max_sentence2_length": 510, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 545874 + "unique_sentence2": 1996 }, "zul_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 530241, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 10, "average_sentence2_length": 136.45918878317477, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 530241 + "unique_sentence2": 1997 }, "zul_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 509559, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 8, "average_sentence2_length": 126.10265398097145, "max_sentence2_length": 430, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 509559 + "unique_sentence2": 1996 }, "zul_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 567693, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 11, "average_sentence2_length": 155.21331997996995, "max_sentence2_length": 581, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 567693 + "unique_sentence2": 1997 }, "zul_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 422970, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 5, "average_sentence2_length": 82.743114672008, "max_sentence2_length": 272, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 422970 + "unique_sentence2": 1996 }, "zul_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 592015, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 167.39258888332498, "max_sentence2_length": 556, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 592015 + "unique_sentence2": 1997 }, "zul_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 523345, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 133.00600901352027, "max_sentence2_length": 504, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 523345 + "unique_sentence2": 1997 }, "zul_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 528853, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 135.764146219329, "max_sentence2_length": 437, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 528853 + "unique_sentence2": 1996 }, "zul_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 497535, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 120.08162243365048, "max_sentence2_length": 405, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 497535 + "unique_sentence2": 1990 }, "zul_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 525822, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 6, "average_sentence2_length": 134.2463695543315, "max_sentence2_length": 492, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 525822 + "unique_sentence2": 1997 }, "zul_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 573820, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 7, "average_sentence2_length": 158.2814221331998, "max_sentence2_length": 582, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 573820 + "unique_sentence2": 1996 }, "zul_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 349210, + "unique_pairs": 1997, "min_sentence1_length": 8, "average_sentence1_length": 129.0595893840761, "max_sentence1_length": 494, + "unique_sentence1": 1996, "min_sentence2_length": 3, "average_sentence2_length": 45.80771156735103, "max_sentence2_length": 200, - "num_samples": 1997, - "num_samples_sentence2": 1997, - "number_of_characters": 349210 + "unique_sentence2": 1996 } } } diff --git a/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json b/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json index 144963c94..754f13c76 100644 --- a/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json @@ -1,58 +1,68 @@ { "train": { + "num_samples": 1640, + "number_of_characters": 445805, + "unique_pairs": 1632, "min_sentence1_length": 3, "average_sentence1_length": 136.3170731707317, "max_sentence1_length": 1698, + "unique_sentence1": 405, "min_sentence2_length": 3, "average_sentence2_length": 135.515243902439, "max_sentence2_length": 1728, - "num_samples": 1640, - "num_samples_sentence2": 1640, - "number_of_characters": 445805, + "unique_sentence2": 1631, "hf_subset_descriptive_stats": { "en-ha": { + "num_samples": 410, + "number_of_characters": 115348, + "unique_pairs": 407, "min_sentence1_length": 3, "average_sentence1_length": 136.3170731707317, "max_sentence1_length": 1698, + "unique_sentence1": 405, "min_sentence2_length": 4, "average_sentence2_length": 145.01951219512196, "max_sentence2_length": 1728, - "num_samples": 410, - "num_samples_sentence2": 410, - "number_of_characters": 115348 + "unique_sentence2": 407 }, "en-ig": { + "num_samples": 410, + "number_of_characters": 107173, + "unique_pairs": 409, "min_sentence1_length": 3, "average_sentence1_length": 136.3170731707317, "max_sentence1_length": 1698, + "unique_sentence1": 405, "min_sentence2_length": 5, "average_sentence2_length": 125.08048780487805, "max_sentence2_length": 1137, - "num_samples": 410, - "num_samples_sentence2": 410, - "number_of_characters": 107173 + "unique_sentence2": 408 }, "en-pcm": { + "num_samples": 410, + "number_of_characters": 109955, + "unique_pairs": 408, "min_sentence1_length": 3, "average_sentence1_length": 136.3170731707317, "max_sentence1_length": 1698, + "unique_sentence1": 405, "min_sentence2_length": 3, "average_sentence2_length": 131.8658536585366, "max_sentence2_length": 1552, - "num_samples": 410, - "num_samples_sentence2": 410, - "number_of_characters": 109955 + "unique_sentence2": 408 }, "en-yo": { + "num_samples": 410, + "number_of_characters": 113329, + "unique_pairs": 409, "min_sentence1_length": 3, "average_sentence1_length": 136.3170731707317, "max_sentence1_length": 1698, + "unique_sentence1": 405, "min_sentence2_length": 6, "average_sentence2_length": 140.0951219512195, "max_sentence2_length": 1338, - "num_samples": 410, - "num_samples_sentence2": 410, - "number_of_characters": 113329 + "unique_sentence2": 409 } } } diff --git a/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json b/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json index de150505b..96403e4c8 100644 --- a/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json @@ -1,13 +1,15 @@ { "test": { + "num_samples": 228, + "number_of_characters": 37441, + "unique_pairs": 228, "min_sentence1_length": 13, "average_sentence1_length": 82.19736842105263, "max_sentence1_length": 272, + "unique_sentence1": 227, "min_sentence2_length": 10, "average_sentence2_length": 82.01754385964912, "max_sentence2_length": 269, - "num_samples": 228, - "num_samples_sentence2": 228, - "number_of_characters": 37441 + "unique_sentence2": 226 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json b/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json index 655cc7d2e..12f400372 100644 --- a/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json @@ -1,36 +1,42 @@ { "test": { + "num_samples": 3640, + "number_of_characters": 572146, + "unique_pairs": 3640, "min_sentence1_length": 13, "average_sentence1_length": 78.59148351648352, "max_sentence1_length": 203, + "unique_sentence1": 3636, "min_sentence2_length": 13, "average_sentence2_length": 78.59148351648352, "max_sentence2_length": 203, - "num_samples": 3640, - "num_samples_sentence2": 3640, - "number_of_characters": 572146, + "unique_sentence2": 3636, "hf_subset_descriptive_stats": { "kat_Geor-eng_Latn": { + "num_samples": 1820, + "number_of_characters": 286073, + "unique_pairs": 1820, "min_sentence1_length": 30, "average_sentence1_length": 76.06593406593407, "max_sentence1_length": 189, + "unique_sentence1": 1820, "min_sentence2_length": 13, "average_sentence2_length": 81.11703296703297, "max_sentence2_length": 203, - "num_samples": 1820, - "num_samples_sentence2": 1820, - "number_of_characters": 286073 + "unique_sentence2": 1816 }, "eng_Latn-kat_Geor": { + "num_samples": 1820, + "number_of_characters": 286073, + "unique_pairs": 1820, "min_sentence1_length": 13, "average_sentence1_length": 81.11703296703297, "max_sentence1_length": 203, + "unique_sentence1": 1816, "min_sentence2_length": 30, "average_sentence2_length": 76.06593406593407, "max_sentence2_length": 189, - "num_samples": 1820, - "num_samples_sentence2": 1820, - "number_of_characters": 286073 + "unique_sentence2": 1820 } } } diff --git a/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json b/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json index 77bab2b30..2d97df573 100644 --- a/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json @@ -1,13 +1,15 @@ { "test": { + "num_samples": 2048, + "number_of_characters": 575910, + "unique_pairs": 2048, "min_sentence1_length": 11, "average_sentence1_length": 139.22802734375, "max_sentence1_length": 1291, + "unique_sentence1": 2048, "min_sentence2_length": 11, "average_sentence2_length": 141.97802734375, "max_sentence2_length": 1217, - "num_samples": 2048, - "num_samples_sentence2": 2048, - "number_of_characters": 575910 + "unique_sentence2": 2047 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LanguageClassification.json b/mteb/descriptive_stats/Classification/LanguageClassification.json index 0142cc3e1..6622d23be 100644 --- a/mteb/descriptive_stats/Classification/LanguageClassification.json +++ b/mteb/descriptive_stats/Classification/LanguageClassification.json @@ -2,6 +2,7 @@ "test": { "num_samples": 2048, "number_of_characters": 224352, + "num_texts_in_train": 31, "min_text_length": 14, "average_text_length": 109.546875, "max_text_length": 1270, @@ -69,5 +70,77 @@ "count": 103 } } + }, + "train": { + "num_samples": 70000, + "number_of_characters": 7760299, + "num_texts_in_train": null, + "min_text_length": 2, + "average_text_length": 110.86141428571429, + "max_text_length": 2422, + "unique_text": 68978, + "unique_labels": 20, + "labels": { + "12": { + "count": 3500 + }, + "1": { + "count": 3500 + }, + "19": { + "count": 3500 + }, + "15": { + "count": 3500 + }, + "13": { + "count": 3500 + }, + "11": { + "count": 3500 + }, + "17": { + "count": 3500 + }, + "14": { + "count": 3500 + }, + "16": { + "count": 3500 + }, + "5": { + "count": 3500 + }, + "0": { + "count": 3500 + }, + "8": { + "count": 3500 + }, + "7": { + "count": 3500 + }, + "2": { + "count": 3500 + }, + "3": { + "count": 3500 + }, + "10": { + "count": 3500 + }, + "6": { + "count": 3500 + }, + "18": { + "count": 3500 + }, + "4": { + "count": 3500 + }, + "9": { + "count": 3500 + } + } } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json b/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json index 67b65bdc7..63fcfd3e5 100644 --- a/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json +++ b/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json @@ -2,9 +2,11 @@ "test": { "num_samples": 1319, "number_of_characters": 122279, + "num_texts_in_train": 46, "min_text_length": 8, "average_text_length": 92.70583775587566, "max_text_length": 1584, + "unique_text": 1315, "unique_labels": 2, "labels": { "1": { @@ -14,5 +16,23 @@ "count": 959 } } + }, + "train": { + "num_samples": 11870, + "number_of_characters": 1130860, + "num_texts_in_train": null, + "min_text_length": 7, + "average_text_length": 95.27042965459141, + "max_text_length": 2112, + "unique_text": 11655, + "unique_labels": 2, + "labels": { + "1": { + "count": 3245 + }, + "0": { + "count": 8625 + } + } } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json b/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json index 062713177..2d9a0a01b 100644 --- a/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json +++ b/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json @@ -5,6 +5,7 @@ "min_text_length": 5000, "average_text_length": 7500.0, "max_text_length": 10000, + "unique_texts": 41555, "min_labels_per_text": 1, "average_labels_per_text": 7500.0, "max_labels_per_text": 14251, diff --git a/mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json new file mode 100644 index 000000000..0370d5147 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json @@ -0,0 +1,168 @@ +{ + "test": { + "num_samples": 37500, + "number_of_characters": 74294927, + "min_text_length": 148, + "average_text_length": 1981.1980533333333, + "max_text_length": 38759, + "min_labels_per_text": 6, + "average_labels_per_text": 1.0, + "max_labels_per_text": 8830, + "unique_labels": 51, + "labels": { + "epidemiology": { + "count": 6656 + }, + "public and global health": { + "count": 3595 + }, + "oncology": { + "count": 845 + }, + "allergy and immunology": { + "count": 464 + }, + "orthopedics": { + "count": 104 + }, + "health informatics": { + "count": 1107 + }, + "occupational and environmental health": { + "count": 415 + }, + "infectious diseases": { + "count": 8830 + }, + "genetic and genomic medicine": { + "count": 1918 + }, + "health policy": { + "count": 527 + }, + "gastroenterology": { + "count": 343 + }, + "radiology and imaging": { + "count": 541 + }, + "pain medicine": { + "count": 121 + }, + "neurology": { + "count": 1773 + }, + "primary care research": { + "count": 232 + }, + "rheumatology": { + "count": 189 + }, + "endocrinology": { + "count": 419 + }, + "hematology": { + "count": 202 + }, + "addiction medicine": { + "count": 178 + }, + "pediatrics": { + "count": 589 + }, + "cardiovascular medicine": { + "count": 855 + }, + "obstetrics and gynecology": { + "count": 373 + }, + "health systems and quality improvement": { + "count": 491 + }, + "nephrology": { + "count": 241 + }, + "respiratory medicine": { + "count": 482 + }, + "geriatric medicine": { + "count": 169 + }, + "dentistry and oral medicine": { + "count": 159 + }, + "psychiatry and clinical psychology": { + "count": 1781 + }, + "nutrition": { + "count": 240 + }, + "intensive care and critical care medicine": { + "count": 368 + }, + "rehabilitation medicine and physical therapy": { + "count": 322 + }, + "otolaryngology": { + "count": 166 + }, + "nursing": { + "count": 93 + }, + "transplantation": { + "count": 118 + }, + "health economics": { + "count": 327 + }, + "sports medicine": { + "count": 180 + }, + "hiv aids": { + "count": 363 + }, + "dermatology": { + "count": 98 + }, + "pathology": { + "count": 223 + }, + "emergency medicine": { + "count": 191 + }, + "pharmacology and therapeutics": { + "count": 221 + }, + "ophthalmology": { + "count": 220 + }, + "medical ethics": { + "count": 46 + }, + "palliative medicine": { + "count": 45 + }, + "sexual and reproductive health": { + "count": 156 + }, + "medical education": { + "count": 203 + }, + "surgery": { + "count": 162 + }, + "urology": { + "count": 65 + }, + "anesthesia": { + "count": 72 + }, + "toxicology": { + "count": 16 + }, + "forensic medicine": { + "count": 6 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json new file mode 100644 index 000000000..7b55ddd4d --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json @@ -0,0 +1,168 @@ +{ + "test": { + "num_samples": 37500, + "number_of_characters": 4301276, + "min_text_length": 18, + "average_text_length": 114.70069333333333, + "max_text_length": 339, + "min_labels_per_text": 6, + "average_labels_per_text": 1.0, + "max_labels_per_text": 8830, + "unique_labels": 51, + "labels": { + "epidemiology": { + "count": 6656 + }, + "public and global health": { + "count": 3595 + }, + "oncology": { + "count": 845 + }, + "allergy and immunology": { + "count": 464 + }, + "orthopedics": { + "count": 104 + }, + "health informatics": { + "count": 1107 + }, + "occupational and environmental health": { + "count": 415 + }, + "infectious diseases": { + "count": 8830 + }, + "genetic and genomic medicine": { + "count": 1918 + }, + "health policy": { + "count": 527 + }, + "gastroenterology": { + "count": 343 + }, + "radiology and imaging": { + "count": 541 + }, + "pain medicine": { + "count": 121 + }, + "neurology": { + "count": 1773 + }, + "primary care research": { + "count": 232 + }, + "rheumatology": { + "count": 189 + }, + "endocrinology": { + "count": 419 + }, + "hematology": { + "count": 202 + }, + "addiction medicine": { + "count": 178 + }, + "pediatrics": { + "count": 589 + }, + "cardiovascular medicine": { + "count": 855 + }, + "obstetrics and gynecology": { + "count": 373 + }, + "health systems and quality improvement": { + "count": 491 + }, + "nephrology": { + "count": 241 + }, + "respiratory medicine": { + "count": 482 + }, + "geriatric medicine": { + "count": 169 + }, + "dentistry and oral medicine": { + "count": 159 + }, + "psychiatry and clinical psychology": { + "count": 1781 + }, + "nutrition": { + "count": 240 + }, + "intensive care and critical care medicine": { + "count": 368 + }, + "rehabilitation medicine and physical therapy": { + "count": 322 + }, + "otolaryngology": { + "count": 166 + }, + "nursing": { + "count": 93 + }, + "transplantation": { + "count": 118 + }, + "health economics": { + "count": 327 + }, + "sports medicine": { + "count": 180 + }, + "hiv aids": { + "count": 363 + }, + "dermatology": { + "count": 98 + }, + "pathology": { + "count": 223 + }, + "emergency medicine": { + "count": 191 + }, + "pharmacology and therapeutics": { + "count": 221 + }, + "ophthalmology": { + "count": 220 + }, + "medical ethics": { + "count": 46 + }, + "palliative medicine": { + "count": 45 + }, + "sexual and reproductive health": { + "count": 156 + }, + "medical education": { + "count": 203 + }, + "surgery": { + "count": 162 + }, + "urology": { + "count": 65 + }, + "anesthesia": { + "count": 72 + }, + "toxicology": { + "count": 16 + }, + "forensic medicine": { + "count": 6 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json new file mode 100644 index 000000000..ba997dbef --- /dev/null +++ b/mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json @@ -0,0 +1,1335 @@ +{ + "test": { + "num_samples": 459389, + "number_of_characters": 334286895, + "min_text_length": 79, + "average_text_length": 727.6771864367671, + "max_text_length": 4359, + "min_labels_per_text": 2, + "average_labels_per_text": 1.0, + "max_labels_per_text": 77908, + "unique_labels": 440, + "labels": { + "FortNiteBR": { + "count": 436 + }, + "buildapc": { + "count": 8484 + }, + "offmychest": { + "count": 570 + }, + "nus": { + "count": 45 + }, + "relationship_advice": { + "count": 16651 + }, + "premed": { + "count": 201 + }, + "dogecoin": { + "count": 8108 + }, + "GamingLaptops": { + "count": 183 + }, + "asktransgender": { + "count": 326 + }, + "MachineLearning": { + "count": 61 + }, + "puppy101": { + "count": 1597 + }, + "GunAccessoriesForSale": { + "count": 2619 + }, + "Random_Acts_Of_Amazon": { + "count": 1115 + }, + "Catholicism": { + "count": 183 + }, + "MonsterHunter": { + "count": 218 + }, + "tipofmypenis": { + "count": 87 + }, + "samsung": { + "count": 69 + }, + "PersonalFinanceCanada": { + "count": 341 + }, + "Dyson_Sphere_Program": { + "count": 55 + }, + "bleach": { + "count": 41 + }, + "AmItheAsshole": { + "count": 3730 + }, + "WallStreetbetsELITE": { + "count": 328 + }, + "GlobalPowers": { + "count": 35 + }, + "ABraThatFits": { + "count": 159 + }, + "PokemonGoFriends": { + "count": 1165 + }, + "NoMansSkyTheGame": { + "count": 259 + }, + "masseffect": { + "count": 233 + }, + "dating_advice": { + "count": 559 + }, + "yoga": { + "count": 50 + }, + "depression": { + "count": 515 + }, + "COVID19positive": { + "count": 180 + }, + "generationology": { + "count": 37 + }, + "feedthebeast": { + "count": 192 + }, + "EliteDangerous": { + "count": 270 + }, + "alcoholicsanonymous": { + "count": 93 + }, + "GoRVing": { + "count": 35 + }, + "thedivision": { + "count": 111 + }, + "breakingmom": { + "count": 105 + }, + "AskAnAmerican": { + "count": 80 + }, + "HypnoFair": { + "count": 5 + }, + "JustUnsubbed": { + "count": 13 + }, + "socialanxiety": { + "count": 123 + }, + "dirtykikpals": { + "count": 202 + }, + "askTO": { + "count": 126 + }, + "AskCulinary": { + "count": 108 + }, + "Bogleheads": { + "count": 71 + }, + "dragonquest": { + "count": 45 + }, + "NoContract": { + "count": 30 + }, + "gorillaz": { + "count": 14 + }, + "MondoGore": { + "count": 8 + }, + "comicswap": { + "count": 56 + }, + "VirtualYoutubers": { + "count": 92 + }, + "Gta5Modding": { + "count": 28 + }, + "obs": { + "count": 61 + }, + "vcu": { + "count": 9 + }, + "KingkillerChronicle": { + "count": 17 + }, + "AmongUs": { + "count": 41 + }, + "wireshark": { + "count": 3 + }, + "Dodocodes": { + "count": 46 + }, + "Aliexpress": { + "count": 40 + }, + "LearnerDriverUK": { + "count": 12 + }, + "PanicAttack": { + "count": 23 + }, + "KassadinMains": { + "count": 10 + }, + "islam": { + "count": 93 + }, + "chronotrigger": { + "count": 4 + }, + "skincareexchange": { + "count": 13 + }, + "PokemonHome": { + "count": 21 + }, + "survivinginfidelity": { + "count": 71 + }, + "igcse": { + "count": 21 + }, + "C25K": { + "count": 21 + }, + "aorus": { + "count": 2 + }, + "idleon": { + "count": 19 + }, + "photography": { + "count": 22 + }, + "cryptocoins": { + "count": 7 + }, + "CanaryWharfBets": { + "count": 7 + }, + "KillingEve": { + "count": 7 + }, + "GameBuilderGarage": { + "count": 16 + }, + "SauceSharingCommunity": { + "count": 7 + }, + "turo": { + "count": 9 + }, + "foodscience": { + "count": 14 + }, + "HIMYM": { + "count": 20 + }, + "HauntingOfHillHouse": { + "count": 4 + }, + "GoodNotes": { + "count": 8 + }, + "RedditWritesSeinfeld": { + "count": 6 + }, + "AirReps": { + "count": 2 + }, + "ADHD": { + "count": 3811 + }, + "BuddyCrossing": { + "count": 446 + }, + "libraryofruina": { + "count": 98 + }, + "SluttyConfessions": { + "count": 2787 + }, + "tipofmytongue": { + "count": 7145 + }, + "fleshlight": { + "count": 128 + }, + "amcstock": { + "count": 13910 + }, + "teenagers": { + "count": 77908 + }, + "suggestmeabook": { + "count": 1540 + }, + "dirtypenpals": { + "count": 5587 + }, + "MinecraftServer": { + "count": 177 + }, + "CreditCards": { + "count": 669 + }, + "Guitar": { + "count": 10952 + }, + "rpg": { + "count": 529 + }, + "NoFap": { + "count": 14853 + }, + "lfg": { + "count": 1093 + }, + "MarsWallStreet": { + "count": 935 + }, + "SummonSign": { + "count": 931 + }, + "AssassinsCreedValhala": { + "count": 295 + }, + "hoi4": { + "count": 432 + }, + "Coins4Sale": { + "count": 260 + }, + "xbox": { + "count": 459 + }, + "TooAfraidToAsk": { + "count": 7404 + }, + "NBA2k": { + "count": 553 + }, + "KGBTR": { + "count": 943 + }, + "roblox": { + "count": 220 + }, + "salesforce": { + "count": 214 + }, + "TwoXChromosomes": { + "count": 1736 + }, + "mechmarket": { + "count": 4863 + }, + "Gaming_Headsets": { + "count": 103 + }, + "pittsburgh": { + "count": 189 + }, + "CryptoMars": { + "count": 1606 + }, + "FridayNightFunkin": { + "count": 378 + }, + "vaginismus": { + "count": 122 + }, + "transpositive": { + "count": 10 + }, + "comicbooks": { + "count": 274 + }, + "BDSMcommunity": { + "count": 185 + }, + "aliens": { + "count": 201 + }, + "Scotch": { + "count": 64 + }, + "KikRoleplay": { + "count": 141 + }, + "Kayaking": { + "count": 91 + }, + "196": { + "count": 47 + }, + "digimon": { + "count": 140 + }, + "Evernote": { + "count": 42 + }, + "logh": { + "count": 22 + }, + "arlington": { + "count": 15 + }, + "Adopted": { + "count": 8 + }, + "DissonautUniverse": { + "count": 4 + }, + "Midsommar": { + "count": 12 + }, + "SofiawithanF": { + "count": 83 + }, + "xmpp": { + "count": 6 + }, + "ZombsRoyale": { + "count": 16 + }, + "accesscontrol": { + "count": 8 + }, + "WetlanderHumor": { + "count": 2 + }, + "PoonamPandeyFanatics": { + "count": 2 + }, + "screenplaychallenge": { + "count": 2 + }, + "scatstories": { + "count": 2 + }, + "techsupport": { + "count": 290 + }, + "whatcarshouldIbuy": { + "count": 79 + }, + "Stormlight_Archive": { + "count": 15 + }, + "deadbydaylight": { + "count": 126 + }, + "bicycling": { + "count": 27 + }, + "oculus": { + "count": 64 + }, + "Cartalk": { + "count": 33 + }, + "Sims4": { + "count": 43 + }, + "NoFeeAC": { + "count": 95 + }, + "Crypto_com": { + "count": 37 + }, + "ITCareerQuestions": { + "count": 259 + }, + "aromantic": { + "count": 18 + }, + "Revu": { + "count": 3 + }, + "exalted": { + "count": 2 + }, + "HilariaBaldwin": { + "count": 20 + }, + "Testosterone": { + "count": 35 + }, + "Screenwriting": { + "count": 170 + }, + "LifeProTips": { + "count": 49 + }, + "steinsgate": { + "count": 13 + }, + "Baystreetbets": { + "count": 10 + }, + "AskGirls": { + "count": 7 + }, + "idlechampions": { + "count": 7 + }, + "facebook": { + "count": 17 + }, + "tf2trade": { + "count": 4 + }, + "mfdoom": { + "count": 3 + }, + "FiddlesticksMains": { + "count": 2 + }, + "HFY": { + "count": 10 + }, + "FiestaST": { + "count": 2 + }, + "whatsthatbook": { + "count": 994 + }, + "GearsOfWar": { + "count": 879 + }, + "KazuhaMains": { + "count": 175 + }, + "RepTime": { + "count": 211 + }, + "AstroGaming": { + "count": 141 + }, + "metalgearsolid": { + "count": 152 + }, + "qBittorrent": { + "count": 39 + }, + "ELLIPAL_Official": { + "count": 24 + }, + "raisedbynarcissists": { + "count": 4895 + }, + "unpopularopinion": { + "count": 14901 + }, + "ACTrade": { + "count": 5679 + }, + "askcarsales": { + "count": 1339 + }, + "AskVet": { + "count": 1357 + }, + "whowouldwin": { + "count": 4493 + }, + "playstation": { + "count": 1362 + }, + "anime": { + "count": 6531 + }, + "GME": { + "count": 12577 + }, + "DotA2": { + "count": 2004 + }, + "cryptostreetbets": { + "count": 2241 + }, + "MonsterHunterWorld": { + "count": 698 + }, + "Market76": { + "count": 14274 + }, + "DnD": { + "count": 5092 + }, + "leagueoflegends": { + "count": 3683 + }, + "doordash_drivers": { + "count": 1626 + }, + "theta_network": { + "count": 489 + }, + "exmuslim": { + "count": 1369 + }, + "gonewildaudio": { + "count": 2998 + }, + "conspiracy": { + "count": 3587 + }, + "heroesofthestorm": { + "count": 535 + }, + "FanFiction": { + "count": 2782 + }, + "Doom": { + "count": 1251 + }, + "texas": { + "count": 269 + }, + "Vent": { + "count": 1738 + }, + "selfimprovement": { + "count": 1284 + }, + "youtubers": { + "count": 706 + }, + "askseddit": { + "count": 237 + }, + "boardgames": { + "count": 1237 + }, + "bravelydefault": { + "count": 347 + }, + "ConquerorsBlade": { + "count": 238 + }, + "ChronicPain": { + "count": 527 + }, + "teenagersnew": { + "count": 256 + }, + "brasil": { + "count": 1092 + }, + "MatthiasSubmissions": { + "count": 921 + }, + "MarylandUnemployment": { + "count": 314 + }, + "SaltLakeCity": { + "count": 411 + }, + "BokunoheroFanfiction": { + "count": 155 + }, + "BenignExistence": { + "count": 125 + }, + "GayYoungOldDating": { + "count": 156 + }, + "Bible": { + "count": 202 + }, + "haskell": { + "count": 154 + }, + "seduction": { + "count": 400 + }, + "fantasywriters": { + "count": 262 + }, + "HiveOS": { + "count": 100 + }, + "PerkByDaylight": { + "count": 15 + }, + "Hedgehog": { + "count": 73 + }, + "xmen": { + "count": 263 + }, + "HyperRP": { + "count": 122 + }, + "emotestories": { + "count": 3 + }, + "tutanota": { + "count": 135 + }, + "CultoftheFranklin": { + "count": 46 + }, + "langrisser": { + "count": 62 + }, + "CozyGrove": { + "count": 61 + }, + "Sverigesforsvarsmakt": { + "count": 12 + }, + "silverbugbets": { + "count": 21 + }, + "WreckingBallMains": { + "count": 5 + }, + "capitalism_in_decay": { + "count": 8 + }, + "paintdotnet": { + "count": 11 + }, + "u_mawadom118": { + "count": 4 + }, + "xboxfindfriends": { + "count": 2 + }, + "CPTSD": { + "count": 540 + }, + "destiny2": { + "count": 318 + }, + "Wallstreetsilver": { + "count": 1013 + }, + "DestinyTheGame": { + "count": 1107 + }, + "blackopscoldwar": { + "count": 400 + }, + "InstacartShoppers": { + "count": 202 + }, + "RocketLeagueExchange": { + "count": 832 + }, + "apexlegends": { + "count": 3265 + }, + "kansascity": { + "count": 53 + }, + "namenerds": { + "count": 235 + }, + "help": { + "count": 152 + }, + "Kengan_Ashura": { + "count": 132 + }, + "thetagang": { + "count": 165 + }, + "GameSale": { + "count": 262 + }, + "Reduction": { + "count": 109 + }, + "sex": { + "count": 906 + }, + "bostonr4r": { + "count": 75 + }, + "LegendsOfRuneterra": { + "count": 231 + }, + "overlord": { + "count": 48 + }, + "madisonwi": { + "count": 53 + }, + "steelseries": { + "count": 79 + }, + "ClashOfClansRecruit": { + "count": 214 + }, + "CharacterRant": { + "count": 55 + }, + "AirForce": { + "count": 94 + }, + "sexstories": { + "count": 92 + }, + "NameThatSong": { + "count": 162 + }, + "depressed": { + "count": 74 + }, + "ibs": { + "count": 150 + }, + "40kLore": { + "count": 269 + }, + "podcasts": { + "count": 88 + }, + "miraculousladybug": { + "count": 150 + }, + "ask": { + "count": 224 + }, + "EverMerge": { + "count": 31 + }, + "TMJ": { + "count": 54 + }, + "BitLifeApp": { + "count": 39 + }, + "FireEmblemHeroes": { + "count": 100 + }, + "software": { + "count": 62 + }, + "ShieldAndroidTV": { + "count": 70 + }, + "GriefSupport": { + "count": 125 + }, + "onewheel": { + "count": 37 + }, + "MensRights": { + "count": 80 + }, + "nhl": { + "count": 22 + }, + "ClashOfClans": { + "count": 107 + }, + "ps3homebrew": { + "count": 33 + }, + "LightNovels": { + "count": 77 + }, + "redsox": { + "count": 34 + }, + "CryptoMarkets": { + "count": 44 + }, + "ugly": { + "count": 47 + }, + "GCXRep": { + "count": 12 + }, + "cscareerquestionsEU": { + "count": 65 + }, + "MindHunter": { + "count": 6 + }, + "starcraft2coop": { + "count": 15 + }, + "nanocurrency": { + "count": 1421 + }, + "ModelCars": { + "count": 8 + }, + "UKJobs": { + "count": 30 + }, + "Netherlands": { + "count": 44 + }, + "clonewars": { + "count": 8 + }, + "Julia": { + "count": 11 + }, + "Prolactinoma": { + "count": 9 + }, + "sofi": { + "count": 11 + }, + "royalfamily": { + "count": 6 + }, + "ConnecticutR4R": { + "count": 8 + }, + "weather": { + "count": 5 + }, + "oneui": { + "count": 7 + }, + "KTM": { + "count": 5 + }, + "Aerials": { + "count": 3 + }, + "seoul": { + "count": 2 + }, + "exjw": { + "count": 3281 + }, + "ModernMagic": { + "count": 699 + }, + "Paladins": { + "count": 1242 + }, + "kdramarecommends": { + "count": 1611 + }, + "hitbtc": { + "count": 330 + }, + "endocrinology": { + "count": 75 + }, + "Bath": { + "count": 43 + }, + "NassauCountyHookups": { + "count": 5 + }, + "feminineboys": { + "count": 1248 + }, + "dreamsmp": { + "count": 2018 + }, + "SquaredCircle": { + "count": 2255 + }, + "Minecraft": { + "count": 8753 + }, + "spirituality": { + "count": 1809 + }, + "Eldenring": { + "count": 1471 + }, + "Sat": { + "count": 1172 + }, + "bonnaroo": { + "count": 194 + }, + "gardening": { + "count": 1892 + }, + "Unemployment": { + "count": 6185 + }, + "mac": { + "count": 1847 + }, + "Bestbuy": { + "count": 437 + }, + "quittingkratom": { + "count": 1081 + }, + "lawschooladmissions": { + "count": 3436 + }, + "NiceHash": { + "count": 2135 + }, + "McMaster": { + "count": 815 + }, + "covidlonghaulers": { + "count": 1299 + }, + "stalker": { + "count": 758 + }, + "MLBTheShow": { + "count": 2721 + }, + "FortniteCompetitive": { + "count": 998 + }, + "dpdr": { + "count": 514 + }, + "appliancerepair": { + "count": 720 + }, + "thomasthetankengine": { + "count": 207 + }, + "delhi": { + "count": 217 + }, + "Huel": { + "count": 300 + }, + "leafs": { + "count": 203 + }, + "HotWheels": { + "count": 170 + }, + "90dayfianceuncensored": { + "count": 550 + }, + "Throwers": { + "count": 142 + }, + "Wavyhair": { + "count": 270 + }, + "CryptoHorde": { + "count": 128 + }, + "ShuumatsuNoValkyrie": { + "count": 453 + }, + "TeensMeetTeens": { + "count": 432 + }, + "dbrand": { + "count": 108 + }, + "SLFmeetups": { + "count": 18 + }, + "1200isplentyketo": { + "count": 48 + }, + "passive_income": { + "count": 211 + }, + "BroadCity": { + "count": 16 + }, + "RevenantMain": { + "count": 71 + }, + "extrarfl": { + "count": 25 + }, + "AgonGame": { + "count": 5 + }, + "FitnessDE": { + "count": 3 + }, + "gaming": { + "count": 1277 + }, + "livesound": { + "count": 91 + }, + "IBO": { + "count": 1896 + }, + "EscapefromTarkov": { + "count": 1300 + }, + "amex": { + "count": 145 + }, + "DMAcademy": { + "count": 1411 + }, + "VinylCollectors": { + "count": 556 + }, + "cardano": { + "count": 716 + }, + "brave_browser": { + "count": 159 + }, + "dating": { + "count": 952 + }, + "OculusQuest": { + "count": 942 + }, + "Superstonk": { + "count": 3089 + }, + "MtF": { + "count": 957 + }, + "findaleague": { + "count": 207 + }, + "Nioh": { + "count": 398 + }, + "IRS": { + "count": 715 + }, + "transgendercirclejerk": { + "count": 353 + }, + "learnmath": { + "count": 489 + }, + "piano": { + "count": 263 + }, + "LeagueConnect": { + "count": 216 + }, + "eu4": { + "count": 561 + }, + "Wordpress": { + "count": 345 + }, + "RoleplayingForReddit": { + "count": 31 + }, + "LOONA": { + "count": 89 + }, + "newtothenavy": { + "count": 167 + }, + "HaircareScience": { + "count": 118 + }, + "appletv": { + "count": 167 + }, + "sissypersonals": { + "count": 102 + }, + "raleigh": { + "count": 168 + }, + "realonlyfansreviews": { + "count": 21 + }, + "AskGames": { + "count": 49 + }, + "PokemonTCG": { + "count": 325 + }, + "controlgame": { + "count": 109 + }, + "GoogleDataStudio": { + "count": 16 + }, + "WhiteWolfRPG": { + "count": 139 + }, + "MECoOp": { + "count": 31 + }, + "snuffrp": { + "count": 46 + }, + "lockpicking": { + "count": 103 + }, + "wicked_edge": { + "count": 105 + }, + "BMW": { + "count": 99 + }, + "choiceofgames": { + "count": 24 + }, + "hisdarkmaterials": { + "count": 12 + }, + "SakuraGakuin": { + "count": 24 + }, + "detrans": { + "count": 55 + }, + "Smallville": { + "count": 37 + }, + "kingofqueens": { + "count": 7 + }, + "JamesHoffmann": { + "count": 22 + }, + "stashinvest": { + "count": 16 + }, + "ABA": { + "count": 79 + }, + "ladybusiness": { + "count": 10 + }, + "gamegrumps": { + "count": 32 + }, + "GodEater": { + "count": 21 + }, + "tomorrow": { + "count": 39 + }, + "Tomorrowland": { + "count": 9 + }, + "BlackCountryNewRoad": { + "count": 5 + }, + "STAYC": { + "count": 3 + }, + "SatoshiStreetBets": { + "count": 3828 + }, + "AskLosAngeles": { + "count": 1036 + }, + "buildapcforme": { + "count": 1689 + }, + "ApplyingToCollege": { + "count": 10675 + }, + "watercooling": { + "count": 1209 + }, + "BreakUps": { + "count": 4914 + }, + "FIFA": { + "count": 3811 + }, + "emacs": { + "count": 712 + }, + "trakstocks": { + "count": 691 + }, + "Shittyaskflying": { + "count": 147 + }, + "AmazonFC": { + "count": 1178 + }, + "stocks": { + "count": 4610 + }, + "BangaloreMains": { + "count": 26 + }, + "pokemon": { + "count": 3953 + }, + "religion": { + "count": 684 + }, + "cuboulder": { + "count": 269 + }, + "self": { + "count": 1688 + }, + "tarot": { + "count": 912 + }, + "turtles": { + "count": 49 + }, + "TheMagnusArchives": { + "count": 300 + }, + "Superhero_Ideas": { + "count": 34 + }, + "NTU": { + "count": 308 + }, + "touhou": { + "count": 623 + }, + "JoJolion": { + "count": 50 + }, + "lasers": { + "count": 27 + }, + "popperpigs": { + "count": 67 + }, + "aggretsuko": { + "count": 20 + }, + "Library": { + "count": 5 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json b/mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json new file mode 100644 index 000000000..77be5a3b7 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json @@ -0,0 +1,75 @@ +{ + "test": { + "num_samples": 59545, + "number_of_characters": 1907719, + "min_text_length": 11, + "average_text_length": 32.03827357460744, + "max_text_length": 120, + "min_labels_per_text": 2082, + "average_labels_per_text": 1.0, + "max_labels_per_text": 3236, + "unique_labels": 20, + "labels": { + "12": { + "count": 3137 + }, + "6": { + "count": 3070 + }, + "0": { + "count": 2613 + }, + "2": { + "count": 3155 + }, + "10": { + "count": 3220 + }, + "17": { + "count": 2986 + }, + "14": { + "count": 3106 + }, + "13": { + "count": 3055 + }, + "1": { + "count": 3056 + }, + "16": { + "count": 2911 + }, + "9": { + "count": 2984 + }, + "3": { + "count": 3070 + }, + "15": { + "count": 3090 + }, + "7": { + "count": 3036 + }, + "5": { + "count": 3124 + }, + "11": { + "count": 3236 + }, + "18": { + "count": 2483 + }, + "8": { + "count": 3090 + }, + "19": { + "count": 2082 + }, + "4": { + "count": 3041 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json b/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json index 700dbeed0..4c1f30309 100644 --- a/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json +++ b/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json @@ -5,6 +5,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 49704, "min_labels_per_text": 1, "average_labels_per_text": 512.0, "max_labels_per_text": 3986, @@ -864,6 +865,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 3860, "min_labels_per_text": 6, "average_labels_per_text": 512.0, "max_labels_per_text": 1492, @@ -928,6 +930,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 4596, "min_labels_per_text": 20, "average_labels_per_text": 512.0, "max_labels_per_text": 1844, @@ -965,6 +968,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 4782, "min_labels_per_text": 21, "average_labels_per_text": 512.0, "max_labels_per_text": 1559, @@ -1041,6 +1045,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 4725, "min_labels_per_text": 35, "average_labels_per_text": 512.0, "max_labels_per_text": 911, @@ -1114,6 +1119,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 4474, "min_labels_per_text": 110, "average_labels_per_text": 512.0, "max_labels_per_text": 2486, @@ -1142,6 +1148,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 2717, "min_labels_per_text": 2, "average_labels_per_text": 512.0, "max_labels_per_text": 1334, @@ -1239,6 +1246,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 2258, "min_labels_per_text": 1, "average_labels_per_text": 512.0, "max_labels_per_text": 1405, @@ -1354,6 +1362,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 3365, "min_labels_per_text": 5, "average_labels_per_text": 512.0, "max_labels_per_text": 1078, @@ -1484,6 +1493,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 4540, "min_labels_per_text": 13, "average_labels_per_text": 512.0, "max_labels_per_text": 878, @@ -1545,6 +1555,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 3881, "min_labels_per_text": 1, "average_labels_per_text": 512.0, "max_labels_per_text": 3986, @@ -1606,6 +1617,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 1887, "min_labels_per_text": 2, "average_labels_per_text": 512.0, "max_labels_per_text": 1634, @@ -1700,6 +1712,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 2605, "min_labels_per_text": 3, "average_labels_per_text": 512.0, "max_labels_per_text": 1081, @@ -1782,6 +1795,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 3741, "min_labels_per_text": 2, "average_labels_per_text": 512.0, "max_labels_per_text": 1109, @@ -1903,6 +1917,7 @@ "min_text_length": 512, "average_text_length": 512.0, "max_text_length": 512, + "unique_texts": 2317, "min_labels_per_text": 2, "average_labels_per_text": 512.0, "max_labels_per_text": 3653, diff --git a/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json b/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json index a05f101ff..897b23d7c 100644 --- a/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json +++ b/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json @@ -7,15 +7,19 @@ "min_document_length": 7, "average_document_length": 2233.0329664807277, "max_document_length": 2959, + "unique_docs": 19143, "min_query_length": 55, "average_query_length": 109.75, "max_query_length": 278, + "unique_queries": 20, "min_instruction_length": 102, "average_instruction_length": 295.55, "max_instruction_length": 811, + "unique_instructions": 20, "min_changed_instruction_length": 151, "average_changed_instruction_length": 355.2, "max_changed_instruction_length": 837, + "unique_changed_instructions": 20, "min_average_relevant_docs_per_query": 4, "average_relevant_docs_per_query": 32.7, "max_average_relevant_docs_per_query": 55, diff --git a/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json b/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json index cc8313a80..d5d91adf5 100644 --- a/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json +++ b/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json @@ -1,13 +1,15 @@ { "test": { + "num_samples": 1882, + "number_of_characters": 171649, + "number_texts_in_train": 7, "min_text_length": 6, "average_text_length": 91.20563230605738, "max_text_length": 220, - "number_of_characters": 171649, + "unique_texts": 1875, "min_labels_per_text": 0, "average_label_per_text": 0.620616365568544, "max_labels_per_text": 2, - "num_samples": 1882, "unique_labels": 6, "labels": { "None": { @@ -29,5 +31,38 @@ "count": 125 } } + }, + "train": { + "num_samples": 7528, + "number_of_characters": 697322, + "number_texts_in_train": null, + "min_text_length": 5, + "average_text_length": 92.63044633368757, + "max_text_length": 280, + "unique_texts": 7500, + "min_labels_per_text": 0, + "average_label_per_text": 0.6101222104144527, + "max_labels_per_text": 3, + "unique_labels": 6, + "labels": { + "None": { + "count": 3043 + }, + "2": { + "count": 607 + }, + "0": { + "count": 1569 + }, + "3": { + "count": 589 + }, + "1": { + "count": 1417 + }, + "4": { + "count": 411 + } + } } } \ No newline at end of file diff --git a/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json b/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json deleted file mode 100644 index 37fe86909..000000000 --- a/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json +++ /dev/null @@ -1,1828 +0,0 @@ -{ - "test": { - "min_text_length": 563, - "average_text_length": 12014.408930434782, - "max_text_length": 1458188, - "number_of_characters": 1381657027, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 115000, - "unique_labels": 21, - "labels": { - "18": { - "count": 50784 - }, - "15": { - "count": 30981 - }, - "5": { - "count": 24978 - }, - "6": { - "count": 45080 - }, - "3": { - "count": 63687 - }, - "17": { - "count": 37743 - }, - "1": { - "count": 15019 - }, - "20": { - "count": 14030 - }, - "0": { - "count": 17802 - }, - "2": { - "count": 22402 - }, - "19": { - "count": 10212 - }, - "9": { - "count": 3772 - }, - "4": { - "count": 9062 - }, - "10": { - "count": 7705 - }, - "11": { - "count": 12213 - }, - "7": { - "count": 14306 - }, - "12": { - "count": 11799 - }, - "8": { - "count": 13800 - }, - "13": { - "count": 2346 - }, - "14": { - "count": 4255 - }, - "16": { - "count": 1311 - } - }, - "hf_subset_descriptive_stats": { - "en": { - "min_text_length": 700, - "average_text_length": 11720.2926, - "max_text_length": 1269363, - "number_of_characters": 58601463, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "de": { - "min_text_length": 688, - "average_text_length": 12865.4162, - "max_text_length": 1361562, - "number_of_characters": 64327081, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "fr": { - "min_text_length": 676, - "average_text_length": 13081.1098, - "max_text_length": 1440461, - "number_of_characters": 65405549, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "it": { - "min_text_length": 696, - "average_text_length": 12763.4786, - "max_text_length": 1404333, - "number_of_characters": 63817393, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "es": { - "min_text_length": 683, - "average_text_length": 13080.29, - "max_text_length": 1458188, - "number_of_characters": 65401450, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "pl": { - "min_text_length": 697, - "average_text_length": 12282.5926, - "max_text_length": 1381409, - "number_of_characters": 61412963, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "ro": { - "min_text_length": 645, - "average_text_length": 12836.9322, - "max_text_length": 1450509, - "number_of_characters": 64184661, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "nl": { - "min_text_length": 721, - "average_text_length": 12857.9742, - "max_text_length": 1442428, - "number_of_characters": 64289871, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "el": { - "min_text_length": 695, - "average_text_length": 12998.143, - "max_text_length": 1436873, - "number_of_characters": 64990715, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "hu": { - "min_text_length": 635, - "average_text_length": 12424.641, - "max_text_length": 1405731, - "number_of_characters": 62123205, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "pt": { - "min_text_length": 662, - "average_text_length": 12482.4616, - "max_text_length": 1400357, - "number_of_characters": 62412308, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "cs": { - "min_text_length": 563, - "average_text_length": 10783.4676, - "max_text_length": 1183634, - "number_of_characters": 53917338, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "sv": { - "min_text_length": 660, - "average_text_length": 11612.4774, - "max_text_length": 1257482, - "number_of_characters": 58062387, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "bg": { - "min_text_length": 661, - "average_text_length": 12235.4268, - "max_text_length": 1309869, - "number_of_characters": 61177134, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "da": { - "min_text_length": 680, - "average_text_length": 11773.958, - "max_text_length": 1297978, - "number_of_characters": 58869790, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "fi": { - "min_text_length": 707, - "average_text_length": 12087.6862, - "max_text_length": 1330363, - "number_of_characters": 60438431, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "sk": { - "min_text_length": 595, - "average_text_length": 11130.814, - "max_text_length": 1229063, - "number_of_characters": 55654070, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "lt": { - "min_text_length": 597, - "average_text_length": 11245.3566, - "max_text_length": 1274867, - "number_of_characters": 56226783, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "hr": { - "min_text_length": 610, - "average_text_length": 11022.142, - "max_text_length": 1252581, - "number_of_characters": 55110710, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "sl": { - "min_text_length": 573, - "average_text_length": 10620.0594, - "max_text_length": 1208117, - "number_of_characters": 53100297, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "et": { - "min_text_length": 599, - "average_text_length": 10898.4312, - "max_text_length": 1370495, - "number_of_characters": 54492156, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "lv": { - "min_text_length": 614, - "average_text_length": 10938.5102, - "max_text_length": 1230284, - "number_of_characters": 54692551, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "mt": { - "min_text_length": 703, - "average_text_length": 12589.7442, - "max_text_length": 1403346, - "number_of_characters": 62948721, - "min_labels_per_text": 1, - "average_label_per_text": 3.5938, - "max_labels_per_text": 9, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - } - } - } -} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json b/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json index 35e88be06..849724bdb 100644 --- a/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json +++ b/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json @@ -5,9 +5,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 91.17892857142857, "max_sentence1_length": 268, + "unique_sentence1": 13404, "min_sentence2_length": 2, "avg_sentence2_length": 91.10121428571429, "max_sentence2_length": 247, + "unique_sentence2": 13462, "unique_labels": 2, "labels": { "1": { @@ -24,9 +26,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 119.7815, "max_sentence1_length": 268, + "unique_sentence1": 1934, "min_sentence2_length": 2, "avg_sentence2_length": 119.2355, "max_sentence2_length": 235, + "unique_sentence2": 1938, "unique_labels": 2, "labels": { "1": { @@ -43,9 +47,11 @@ "min_sentence1_length": 25, "avg_sentence1_length": 113.7575, "max_sentence1_length": 209, + "unique_sentence1": 1761, "min_sentence2_length": 25, "avg_sentence2_length": 113.4235, "max_sentence2_length": 209, + "unique_sentence2": 1800, "unique_labels": 2, "labels": { "1": { @@ -62,9 +68,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 117.815, "max_sentence1_length": 226, + "unique_sentence1": 1955, "min_sentence2_length": 22, "avg_sentence2_length": 117.798, "max_sentence2_length": 233, + "unique_sentence2": 1959, "unique_labels": 2, "labels": { "1": { @@ -81,9 +89,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 120.028, "max_sentence1_length": 238, + "unique_sentence1": 1954, "min_sentence2_length": 2, "avg_sentence2_length": 119.9885, "max_sentence2_length": 247, + "unique_sentence2": 1953, "unique_labels": 2, "labels": { "1": { @@ -100,9 +110,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 58.678, "max_sentence1_length": 192, + "unique_sentence1": 1944, "min_sentence2_length": 2, "avg_sentence2_length": 58.875, "max_sentence2_length": 198, + "unique_sentence2": 1941, "unique_labels": 2, "labels": { "1": { @@ -119,9 +131,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 64.9605, "max_sentence1_length": 153, + "unique_sentence1": 1954, "min_sentence2_length": 2, "avg_sentence2_length": 65.114, "max_sentence2_length": 159, + "unique_sentence2": 1969, "unique_labels": 2, "labels": { "1": { @@ -138,9 +152,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 43.232, "max_sentence1_length": 120, + "unique_sentence1": 1909, "min_sentence2_length": 2, "avg_sentence2_length": 43.274, "max_sentence2_length": 113, + "unique_sentence2": 1909, "unique_labels": 2, "labels": { "1": { @@ -159,9 +175,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 90.12585714285714, "max_sentence1_length": 248, + "unique_sentence1": 13357, "min_sentence2_length": 2, "avg_sentence2_length": 90.2045, "max_sentence2_length": 275, + "unique_sentence2": 13397, "unique_labels": 2, "labels": { "1": { @@ -178,9 +196,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 116.82, "max_sentence1_length": 248, + "unique_sentence1": 1914, "min_sentence2_length": 2, "avg_sentence2_length": 117.0015, "max_sentence2_length": 275, + "unique_sentence2": 1920, "unique_labels": 2, "labels": { "1": { @@ -197,9 +217,11 @@ "min_sentence1_length": 25, "avg_sentence1_length": 113.1075, "max_sentence1_length": 213, + "unique_sentence1": 1758, "min_sentence2_length": 25, "avg_sentence2_length": 112.858, "max_sentence2_length": 213, + "unique_sentence2": 1771, "unique_labels": 2, "labels": { "1": { @@ -216,9 +238,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 116.3285, "max_sentence1_length": 240, + "unique_sentence1": 1938, "min_sentence2_length": 2, "avg_sentence2_length": 116.7275, "max_sentence2_length": 241, + "unique_sentence2": 1941, "unique_labels": 2, "labels": { "1": { @@ -235,9 +259,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 119.5045, "max_sentence1_length": 233, + "unique_sentence1": 1933, "min_sentence2_length": 2, "avg_sentence2_length": 119.7505, "max_sentence2_length": 246, + "unique_sentence2": 1939, "unique_labels": 2, "labels": { "1": { @@ -254,9 +280,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 57.5105, "max_sentence1_length": 126, + "unique_sentence1": 1957, "min_sentence2_length": 2, "avg_sentence2_length": 57.317, "max_sentence2_length": 121, + "unique_sentence2": 1969, "unique_labels": 2, "labels": { "1": { @@ -273,9 +301,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 65.162, "max_sentence1_length": 178, + "unique_sentence1": 1963, "min_sentence2_length": 2, "avg_sentence2_length": 65.5155, "max_sentence2_length": 174, + "unique_sentence2": 1968, "unique_labels": 2, "labels": { "1": { @@ -292,9 +322,11 @@ "min_sentence1_length": 2, "avg_sentence1_length": 42.448, "max_sentence1_length": 101, + "unique_sentence1": 1899, "min_sentence2_length": 2, "avg_sentence2_length": 42.2615, "max_sentence2_length": 120, + "unique_sentence2": 1895, "unique_labels": 2, "labels": { "1": { diff --git a/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json b/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json index 06edb67b3..473a765dd 100644 --- a/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json +++ b/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json @@ -5,9 +5,11 @@ "min_sentence1_length": 24, "avg_sentence1_length": 79.48919160166103, "max_sentence1_length": 126, + "unique_sentence1": 4329, "min_sentence2_length": 6, "avg_sentence2_length": 88.5540419916948, "max_sentence2_length": 608, + "unique_sentence2": 41304, "unique_labels": 2, "labels": { "0": { diff --git a/mteb/descriptive_stats/PairClassification/XNLI.json b/mteb/descriptive_stats/PairClassification/XNLI.json index 62ce2048a..867fafdc8 100644 --- a/mteb/descriptive_stats/PairClassification/XNLI.json +++ b/mteb/descriptive_stats/PairClassification/XNLI.json @@ -5,9 +5,11 @@ "min_sentence1_length": 3, "avg_sentence1_length": 103.23793825222397, "max_sentence1_length": 401, + "unique_sentence1": 15328, "min_sentence2_length": 2, "avg_sentence2_length": 48.88895866038723, "max_sentence2_length": 187, + "unique_sentence2": 19104, "unique_labels": 2, "labels": { "0": { @@ -24,9 +26,11 @@ "min_sentence1_length": 11, "avg_sentence1_length": 89.57362637362637, "max_sentence1_length": 242, + "unique_sentence1": 1095, "min_sentence2_length": 8, "avg_sentence2_length": 41.99487179487179, "max_sentence2_length": 115, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -43,9 +47,11 @@ "min_sentence1_length": 14, "avg_sentence1_length": 110.01611721611722, "max_sentence1_length": 303, + "unique_sentence1": 1095, "min_sentence2_length": 8, "avg_sentence2_length": 51.62930402930403, "max_sentence2_length": 150, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -62,9 +68,11 @@ "min_sentence1_length": 3, "avg_sentence1_length": 119.92600732600732, "max_sentence1_length": 301, + "unique_sentence1": 1095, "min_sentence2_length": 9, "avg_sentence2_length": 56.794871794871796, "max_sentence2_length": 187, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -81,9 +89,11 @@ "min_sentence1_length": 13, "avg_sentence1_length": 119.05421245421246, "max_sentence1_length": 344, + "unique_sentence1": 1095, "min_sentence2_length": 13, "avg_sentence2_length": 56.93260073260073, "max_sentence2_length": 172, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -100,9 +110,11 @@ "min_sentence1_length": 19, "avg_sentence1_length": 105.67032967032966, "max_sentence1_length": 268, + "unique_sentence1": 1095, "min_sentence2_length": 9, "avg_sentence2_length": 49.8043956043956, "max_sentence2_length": 137, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -119,9 +131,11 @@ "min_sentence1_length": 11, "avg_sentence1_length": 115.43296703296703, "max_sentence1_length": 385, + "unique_sentence1": 1094, "min_sentence2_length": 8, "avg_sentence2_length": 54.68205128205128, "max_sentence2_length": 163, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -138,9 +152,11 @@ "min_sentence1_length": 9, "avg_sentence1_length": 121.0967032967033, "max_sentence1_length": 327, + "unique_sentence1": 1095, "min_sentence2_length": 10, "avg_sentence2_length": 58.58021978021978, "max_sentence2_length": 169, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -157,9 +173,11 @@ "min_sentence1_length": 16, "avg_sentence1_length": 104.63443223443224, "max_sentence1_length": 401, + "unique_sentence1": 1095, "min_sentence2_length": 9, "avg_sentence2_length": 50.17289377289377, "max_sentence2_length": 162, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -176,9 +194,11 @@ "min_sentence1_length": 11, "avg_sentence1_length": 110.76923076923077, "max_sentence1_length": 306, + "unique_sentence1": 1095, "min_sentence2_length": 8, "avg_sentence2_length": 52.452014652014654, "max_sentence2_length": 167, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -195,9 +215,11 @@ "min_sentence1_length": 10, "avg_sentence1_length": 104.43956043956044, "max_sentence1_length": 266, + "unique_sentence1": 1094, "min_sentence2_length": 2, "avg_sentence2_length": 49.48205128205128, "max_sentence2_length": 146, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -214,9 +236,11 @@ "min_sentence1_length": 12, "avg_sentence1_length": 96.6923076923077, "max_sentence1_length": 262, + "unique_sentence1": 1095, "min_sentence2_length": 6, "avg_sentence2_length": 44.544322344322346, "max_sentence2_length": 129, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -233,9 +257,11 @@ "min_sentence1_length": 15, "avg_sentence1_length": 103.67765567765568, "max_sentence1_length": 255, + "unique_sentence1": 1095, "min_sentence2_length": 6, "avg_sentence2_length": 49.18534798534799, "max_sentence2_length": 140, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -252,9 +278,11 @@ "min_sentence1_length": 14, "avg_sentence1_length": 111.31208791208792, "max_sentence1_length": 265, + "unique_sentence1": 1095, "min_sentence2_length": 9, "avg_sentence2_length": 52.46007326007326, "max_sentence2_length": 143, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -271,9 +299,11 @@ "min_sentence1_length": 4, "avg_sentence1_length": 33.03589743589744, "max_sentence1_length": 112, + "unique_sentence1": 1095, "min_sentence2_length": 3, "avg_sentence2_length": 15.73040293040293, "max_sentence2_length": 59, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -292,9 +322,11 @@ "min_sentence1_length": 5, "avg_sentence1_length": 103.20790162218734, "max_sentence1_length": 323, + "unique_sentence1": 11171, "min_sentence2_length": 3, "avg_sentence2_length": 49.01909994767138, "max_sentence2_length": 172, + "unique_sentence2": 19101, "unique_labels": 2, "labels": { "0": { @@ -311,9 +343,11 @@ "min_sentence1_length": 13, "avg_sentence1_length": 88.31868131868131, "max_sentence1_length": 214, + "unique_sentence1": 798, "min_sentence2_length": 6, "avg_sentence2_length": 41.61172161172161, "max_sentence2_length": 137, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -330,9 +364,11 @@ "min_sentence1_length": 16, "avg_sentence1_length": 109.196336996337, "max_sentence1_length": 316, + "unique_sentence1": 798, "min_sentence2_length": 10, "avg_sentence2_length": 51.967032967032964, "max_sentence2_length": 151, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -349,9 +385,11 @@ "min_sentence1_length": 20, "avg_sentence1_length": 119.81172161172161, "max_sentence1_length": 298, + "unique_sentence1": 798, "min_sentence2_length": 12, "avg_sentence2_length": 57.36923076923077, "max_sentence2_length": 162, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -368,9 +406,11 @@ "min_sentence1_length": 16, "avg_sentence1_length": 119.87545787545787, "max_sentence1_length": 302, + "unique_sentence1": 798, "min_sentence2_length": 6, "avg_sentence2_length": 56.88278388278388, "max_sentence2_length": 171, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -387,9 +427,11 @@ "min_sentence1_length": 20, "avg_sentence1_length": 105.71648351648352, "max_sentence1_length": 271, + "unique_sentence1": 798, "min_sentence2_length": 8, "avg_sentence2_length": 49.87619047619047, "max_sentence2_length": 139, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -406,9 +448,11 @@ "min_sentence1_length": 14, "avg_sentence1_length": 115.17289377289377, "max_sentence1_length": 265, + "unique_sentence1": 798, "min_sentence2_length": 7, "avg_sentence2_length": 55.120879120879124, "max_sentence2_length": 148, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -425,9 +469,11 @@ "min_sentence1_length": 19, "avg_sentence1_length": 121.75897435897436, "max_sentence1_length": 323, + "unique_sentence1": 798, "min_sentence2_length": 11, "avg_sentence2_length": 59.08864468864469, "max_sentence2_length": 172, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -444,9 +490,11 @@ "min_sentence1_length": 18, "avg_sentence1_length": 105.06446886446886, "max_sentence1_length": 277, + "unique_sentence1": 798, "min_sentence2_length": 7, "avg_sentence2_length": 50.44395604395604, "max_sentence2_length": 152, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -463,9 +511,11 @@ "min_sentence1_length": 15, "avg_sentence1_length": 109.74725274725274, "max_sentence1_length": 310, + "unique_sentence1": 798, "min_sentence2_length": 8, "avg_sentence2_length": 52.26886446886447, "max_sentence2_length": 140, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -482,9 +532,11 @@ "min_sentence1_length": 13, "avg_sentence1_length": 104.32234432234432, "max_sentence1_length": 264, + "unique_sentence1": 798, "min_sentence2_length": 8, "avg_sentence2_length": 49.87692307692308, "max_sentence2_length": 153, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -501,9 +553,11 @@ "min_sentence1_length": 7, "avg_sentence1_length": 97.28498168498169, "max_sentence1_length": 255, + "unique_sentence1": 798, "min_sentence2_length": 3, "avg_sentence2_length": 43.843223443223444, "max_sentence2_length": 140, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -520,9 +574,11 @@ "min_sentence1_length": 15, "avg_sentence1_length": 102.96630036630036, "max_sentence1_length": 269, + "unique_sentence1": 798, "min_sentence2_length": 10, "avg_sentence2_length": 49.63809523809524, "max_sentence2_length": 139, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -539,9 +595,11 @@ "min_sentence1_length": 18, "avg_sentence1_length": 112.26373626373626, "max_sentence1_length": 323, + "unique_sentence1": 798, "min_sentence2_length": 9, "avg_sentence2_length": 52.432967032967035, "max_sentence2_length": 159, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -558,9 +616,11 @@ "min_sentence1_length": 5, "avg_sentence1_length": 33.41098901098901, "max_sentence1_length": 135, + "unique_sentence1": 798, "min_sentence2_length": 3, "avg_sentence2_length": 15.846886446886447, "max_sentence2_length": 66, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { diff --git a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json index deee90477..a0ced7def 100644 --- a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json +++ b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json @@ -7,9 +7,14 @@ "min_query_length": 17, "avg_query_length": 50.205333333333336, "max_query_length": 148, + "unique_query": 374, "min_positive_length": 15, "avg_positive_length": 52.54013303769401, "max_positive_length": 152, - "avg_negative_length": 52.69189704480458 + "unique_positive": 2165, + "min_negative_length": 15, + "avg_negative_length": 52.69189704480458, + "max_negative_length": 148, + "unique_negative": 5002 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/ESCIReranking.json b/mteb/descriptive_stats/Reranking/ESCIReranking.json index e17fa884e..9c9556be9 100644 --- a/mteb/descriptive_stats/Reranking/ESCIReranking.json +++ b/mteb/descriptive_stats/Reranking/ESCIReranking.json @@ -7,10 +7,15 @@ "min_query_length": 1, "avg_query_length": 19.691890046098685, "max_query_length": 151, + "unique_query": 29269, "min_positive_length": 1, "avg_positive_length": 803.9230995961918, "max_positive_length": 8640, + "unique_positive": 217712, + "min_negative_length": 1, "avg_negative_length": 808.501458121397, + "max_negative_length": 4441, + "unique_negative": 39551, "hf_subset_descriptive_stats": { "us": { "num_samples": 21296, @@ -20,10 +25,15 @@ "min_query_length": 1, "avg_query_length": 21.440833959429, "max_query_length": 151, + "unique_query": 21296, "min_positive_length": 1, "avg_positive_length": 868.3698006600661, "max_positive_length": 5545, - "avg_negative_length": 864.4493578918431 + "unique_positive": 150734, + "min_negative_length": 1, + "avg_negative_length": 864.4493578918431, + "max_negative_length": 3779, + "unique_negative": 23073 }, "es": { "num_samples": 3703, @@ -33,10 +43,15 @@ "min_query_length": 3, "avg_query_length": 20.681609505806104, "max_query_length": 59, + "unique_query": 3703, "min_positive_length": 1, "avg_positive_length": 980.9613142418818, "max_positive_length": 8640, - "avg_negative_length": 1023.2159481488756 + "unique_positive": 32921, + "min_negative_length": 1, + "avg_negative_length": 1023.2159481488756, + "max_negative_length": 4441, + "unique_negative": 9285 }, "jp": { "num_samples": 4286, @@ -46,10 +61,15 @@ "min_query_length": 1, "avg_query_length": 10.146756882874476, "max_query_length": 60, + "unique_query": 4286, "min_positive_length": 1, "avg_positive_length": 358.35792317905475, "max_positive_length": 3488, - "avg_negative_length": 388.075445337059 + "unique_positive": 35165, + "min_negative_length": 1, + "avg_negative_length": 388.075445337059, + "max_negative_length": 3940, + "unique_negative": 7289 } } } diff --git a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json index c14c0a0f8..0506ff39e 100644 --- a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json +++ b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json @@ -7,10 +7,15 @@ "min_query_length": 7, "avg_query_length": 59.091208333333334, "max_query_length": 180, + "unique_query": 23997, "min_positive_length": 100, "avg_positive_length": 385.45120833333334, "max_positive_length": 3515, + "unique_positive": 23993, + "min_negative_length": 100, "avg_negative_length": 381.23913541666667, + "max_negative_length": 9461, + "unique_negative": 191783, "hf_subset_descriptive_stats": { "bg": { "num_samples": 1500, @@ -20,10 +25,15 @@ "min_query_length": 18, "avg_query_length": 60.82666666666667, "max_query_length": 166, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 375.88866666666667, "max_positive_length": 2241, - "avg_negative_length": 374.18691666666666 + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 374.18691666666666, + "max_negative_length": 4869, + "unique_negative": 11996 }, "bn": { "num_samples": 1500, @@ -33,10 +43,15 @@ "min_query_length": 7, "avg_query_length": 47.266666666666666, "max_query_length": 123, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 394.5946666666667, "max_positive_length": 2338, - "avg_negative_length": 393.98241666666667 + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 393.98241666666667, + "max_negative_length": 5104, + "unique_negative": 11996 }, "cs": { "num_samples": 1500, @@ -46,10 +61,15 @@ "min_query_length": 17, "avg_query_length": 56.272, "max_query_length": 137, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 383.8446666666667, "max_positive_length": 2300, - "avg_negative_length": 368.2504166666667 + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 368.2504166666667, + "max_negative_length": 3487, + "unique_negative": 11982 }, "da": { "num_samples": 1500, @@ -59,10 +79,15 @@ "min_query_length": 17, "avg_query_length": 56.75066666666667, "max_query_length": 137, + "unique_query": 1499, "min_positive_length": 100, "avg_positive_length": 351.6813333333333, "max_positive_length": 2159, - "avg_negative_length": 344.457 + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 344.457, + "max_negative_length": 2563, + "unique_negative": 11972 }, "de": { "num_samples": 1500, @@ -72,10 +97,15 @@ "min_query_length": 20, "avg_query_length": 70.004, "max_query_length": 180, + "unique_query": 1499, "min_positive_length": 100, "avg_positive_length": 391.5366666666667, "max_positive_length": 2674, - "avg_negative_length": 399.27341666666666 + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 399.27341666666666, + "max_negative_length": 3083, + "unique_negative": 12000 }, "en": { "num_samples": 1500, @@ -85,10 +115,15 @@ "min_query_length": 18, "avg_query_length": 68.372, "max_query_length": 162, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 451.72866666666664, "max_positive_length": 3515, - "avg_negative_length": 453.14441666666664 + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 453.14441666666664, + "max_negative_length": 3662, + "unique_negative": 12000 }, "fa": { "num_samples": 1500, @@ -98,10 +133,15 @@ "min_query_length": 12, "avg_query_length": 48.66733333333333, "max_query_length": 119, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 347.704, "max_positive_length": 2571, - "avg_negative_length": 344.8385 + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 344.8385, + "max_negative_length": 4707, + "unique_negative": 11978 }, "fi": { "num_samples": 1500, @@ -111,10 +151,15 @@ "min_query_length": 14, "avg_query_length": 55.343333333333334, "max_query_length": 132, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 394.7126666666667, "max_positive_length": 2129, - "avg_negative_length": 377.83733333333333 + "unique_positive": 1498, + "min_negative_length": 100, + "avg_negative_length": 377.83733333333333, + "max_negative_length": 2574, + "unique_negative": 11972 }, "hi": { "num_samples": 1500, @@ -124,10 +169,15 @@ "min_query_length": 13, "avg_query_length": 50.77733333333333, "max_query_length": 125, + "unique_query": 1499, "min_positive_length": 100, "avg_positive_length": 420.3786666666667, "max_positive_length": 2361, - "avg_negative_length": 409.51875 + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 409.51875, + "max_negative_length": 5912, + "unique_negative": 11996 }, "it": { "num_samples": 1500, @@ -137,10 +187,15 @@ "min_query_length": 23, "avg_query_length": 70.05466666666666, "max_query_length": 156, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 396.97333333333336, "max_positive_length": 2082, - "avg_negative_length": 393.3295 + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 393.3295, + "max_negative_length": 9461, + "unique_negative": 11993 }, "nl": { "num_samples": 1500, @@ -150,10 +205,15 @@ "min_query_length": 18, "avg_query_length": 65.34466666666667, "max_query_length": 136, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 380.79133333333334, "max_positive_length": 1864, - "avg_negative_length": 375.02933333333334 + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 375.02933333333334, + "max_negative_length": 3641, + "unique_negative": 11985 }, "pt": { "num_samples": 1500, @@ -163,10 +223,15 @@ "min_query_length": 18, "avg_query_length": 65.11933333333333, "max_query_length": 176, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 404.01933333333335, "max_positive_length": 3057, - "avg_negative_length": 397.554 + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 397.554, + "max_negative_length": 2877, + "unique_negative": 11991 }, "ro": { "num_samples": 1500, @@ -176,10 +241,15 @@ "min_query_length": 14, "avg_query_length": 61.973333333333336, "max_query_length": 169, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 346.70866666666666, "max_positive_length": 1917, - "avg_negative_length": 348.5908333333333 + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 348.5908333333333, + "max_negative_length": 4213, + "unique_negative": 11971 }, "sr": { "num_samples": 1500, @@ -189,10 +259,15 @@ "min_query_length": 15, "avg_query_length": 55.669333333333334, "max_query_length": 146, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 386.34933333333333, "max_positive_length": 2421, - "avg_negative_length": 384.0586666666667 + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 384.0586666666667, + "max_negative_length": 3668, + "unique_negative": 11974 }, "no": { "num_samples": 1500, @@ -202,10 +277,15 @@ "min_query_length": 14, "avg_query_length": 55.288, "max_query_length": 129, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 367.72, "max_positive_length": 1450, - "avg_negative_length": 366.8395 + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 366.8395, + "max_negative_length": 2841, + "unique_negative": 11996 }, "sv": { "num_samples": 1500, @@ -215,10 +295,15 @@ "min_query_length": 17, "avg_query_length": 57.73, "max_query_length": 133, + "unique_query": 1500, "min_positive_length": 100, "avg_positive_length": 372.58733333333333, "max_positive_length": 2493, - "avg_negative_length": 368.93516666666665 + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 368.93516666666665, + "max_negative_length": 3680, + "unique_negative": 11999 } } } diff --git a/mteb/descriptive_stats/Retrieval/AppsRetrieval.json b/mteb/descriptive_stats/Retrieval/AppsRetrieval.json index a1c1011bf..caaab2453 100644 --- a/mteb/descriptive_stats/Retrieval/AppsRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/AppsRetrieval.json @@ -7,11 +7,14 @@ "min_document_length": 152, "average_document_length": 717.2737022247576, "max_document_length": 5742, + "unique_documents": 8765, "min_query_length": 6, "average_query_length": 1340.9604249667996, "max_query_length": 289049, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 3765, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3765 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json b/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json index 858d560f9..78c8a7e12 100644 --- a/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json @@ -7,12 +7,15 @@ "min_document_length": 4, "average_document_length": 137.38034094872688, "max_document_length": 237, + "unique_documents": 183488, "min_query_length": 2, "average_query_length": 1.0845149507355678, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0000413738481817, + "unique_queries": 338378, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0000413738481817, - "max_relevant_docs_per_query": 1.0000413738481817, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 183488, "hf_subset_descriptive_stats": { "acm_Arab-acm_Arab": { "number_of_characters": 51232, @@ -22,12 +25,15 @@ "min_document_length": 13, "average_document_length": 102.98360655737704, "max_document_length": 129, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "acm_Arab-eng_Latn": { "number_of_characters": 70589, @@ -37,12 +43,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-acm_Arab": { "number_of_characters": 51232, @@ -52,12 +61,15 @@ "min_document_length": 13, "average_document_length": 102.98360655737704, "max_document_length": 129, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "afr_Latn-afr_Latn": { "number_of_characters": 71217, @@ -67,12 +79,15 @@ "min_document_length": 17, "average_document_length": 143.93647540983608, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "afr_Latn-eng_Latn": { "number_of_characters": 70589, @@ -82,12 +97,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-afr_Latn": { "number_of_characters": 71217, @@ -97,12 +115,15 @@ "min_document_length": 17, "average_document_length": 143.93647540983608, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "als_Latn-als_Latn": { "number_of_characters": 69498, @@ -112,12 +133,15 @@ "min_document_length": 18, "average_document_length": 140.4139344262295, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "als_Latn-eng_Latn": { "number_of_characters": 70589, @@ -127,12 +151,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-als_Latn": { "number_of_characters": 69498, @@ -142,12 +169,15 @@ "min_document_length": 18, "average_document_length": 140.4139344262295, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "amh_Ethi-amh_Ethi": { "number_of_characters": 45221, @@ -157,12 +187,15 @@ "min_document_length": 13, "average_document_length": 90.66598360655738, "max_document_length": 100, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "amh_Ethi-eng_Latn": { "number_of_characters": 70589, @@ -172,12 +205,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-amh_Ethi": { "number_of_characters": 45221, @@ -187,12 +223,15 @@ "min_document_length": 13, "average_document_length": 90.66598360655738, "max_document_length": 100, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "apc_Arab-apc_Arab": { "number_of_characters": 51248, @@ -202,12 +241,15 @@ "min_document_length": 16, "average_document_length": 103.01639344262296, "max_document_length": 134, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "apc_Arab-eng_Latn": { "number_of_characters": 70589, @@ -217,12 +259,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-apc_Arab": { "number_of_characters": 51248, @@ -232,12 +277,15 @@ "min_document_length": 16, "average_document_length": 103.01639344262296, "max_document_length": 134, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Arab-arb_Arab": { "number_of_characters": 53671, @@ -247,12 +295,15 @@ "min_document_length": 14, "average_document_length": 107.98155737704919, "max_document_length": 134, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Arab-eng_Latn": { "number_of_characters": 70589, @@ -262,12 +313,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-arb_Arab": { "number_of_characters": 53671, @@ -277,12 +331,15 @@ "min_document_length": 14, "average_document_length": 107.98155737704919, "max_document_length": 134, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Latn-arb_Latn": { "number_of_characters": 61298, @@ -292,12 +349,15 @@ "min_document_length": 12, "average_document_length": 123.61065573770492, "max_document_length": 160, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Latn-eng_Latn": { "number_of_characters": 70589, @@ -307,12 +367,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-arb_Latn": { "number_of_characters": 61298, @@ -322,12 +385,15 @@ "min_document_length": 12, "average_document_length": 123.61065573770492, "max_document_length": 160, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ars_Arab-ars_Arab": { "number_of_characters": 51765, @@ -337,12 +403,15 @@ "min_document_length": 14, "average_document_length": 104.07581967213115, "max_document_length": 119, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ars_Arab-eng_Latn": { "number_of_characters": 70589, @@ -352,12 +421,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ars_Arab": { "number_of_characters": 51765, @@ -367,12 +439,15 @@ "min_document_length": 14, "average_document_length": 104.07581967213115, "max_document_length": 119, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ary_Arab-ary_Arab": { "number_of_characters": 60261, @@ -382,12 +457,15 @@ "min_document_length": 13, "average_document_length": 121.48565573770492, "max_document_length": 138, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.086859688195991, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ary_Arab-eng_Latn": { "number_of_characters": 70589, @@ -397,12 +475,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ary_Arab": { "number_of_characters": 60261, @@ -412,12 +493,15 @@ "min_document_length": 13, "average_document_length": 121.48565573770492, "max_document_length": 138, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.086859688195991, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arz_Arab-arz_Arab": { "number_of_characters": 52403, @@ -427,12 +511,15 @@ "min_document_length": 12, "average_document_length": 105.38319672131148, "max_document_length": 115, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arz_Arab-eng_Latn": { "number_of_characters": 70589, @@ -442,12 +529,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-arz_Arab": { "number_of_characters": 52403, @@ -457,12 +547,15 @@ "min_document_length": 12, "average_document_length": 105.38319672131148, "max_document_length": 115, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "asm_Beng-asm_Beng": { "number_of_characters": 62410, @@ -472,12 +565,15 @@ "min_document_length": 4, "average_document_length": 125.88934426229508, "max_document_length": 158, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "asm_Beng-eng_Latn": { "number_of_characters": 70589, @@ -487,12 +583,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-asm_Beng": { "number_of_characters": 62410, @@ -502,12 +601,15 @@ "min_document_length": 4, "average_document_length": 125.88934426229508, "max_document_length": 158, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "azj_Latn-azj_Latn": { "number_of_characters": 67137, @@ -517,12 +619,15 @@ "min_document_length": 12, "average_document_length": 135.57581967213116, "max_document_length": 156, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "azj_Latn-eng_Latn": { "number_of_characters": 70589, @@ -532,12 +637,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-azj_Latn": { "number_of_characters": 67137, @@ -547,12 +655,15 @@ "min_document_length": 12, "average_document_length": 135.57581967213116, "max_document_length": 156, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bam_Latn-bam_Latn": { "number_of_characters": 66084, @@ -562,12 +673,15 @@ "min_document_length": 13, "average_document_length": 133.41803278688525, "max_document_length": 166, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bam_Latn-eng_Latn": { "number_of_characters": 70589, @@ -577,12 +691,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-bam_Latn": { "number_of_characters": 66084, @@ -592,12 +709,15 @@ "min_document_length": 13, "average_document_length": 133.41803278688525, "max_document_length": 166, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Beng-ben_Beng": { "number_of_characters": 63512, @@ -607,12 +727,15 @@ "min_document_length": 9, "average_document_length": 128.14754098360655, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Beng-eng_Latn": { "number_of_characters": 70589, @@ -622,12 +745,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ben_Beng": { "number_of_characters": 63512, @@ -637,12 +763,15 @@ "min_document_length": 9, "average_document_length": 128.14754098360655, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Latn-ben_Latn": { "number_of_characters": 68285, @@ -652,12 +781,15 @@ "min_document_length": 9, "average_document_length": 137.92827868852459, "max_document_length": 185, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Latn-eng_Latn": { "number_of_characters": 70589, @@ -667,12 +799,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ben_Latn": { "number_of_characters": 68285, @@ -682,12 +817,15 @@ "min_document_length": 9, "average_document_length": 137.92827868852459, "max_document_length": 185, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bod_Tibt-bod_Tibt": { "number_of_characters": 79188, @@ -697,12 +835,15 @@ "min_document_length": 18, "average_document_length": 160.2704918032787, "max_document_length": 213, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bod_Tibt-eng_Latn": { "number_of_characters": 70589, @@ -712,12 +853,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-bod_Tibt": { "number_of_characters": 79188, @@ -727,12 +871,15 @@ "min_document_length": 18, "average_document_length": 160.2704918032787, "max_document_length": 213, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bul_Cyrl-bul_Cyrl": { "number_of_characters": 66577, @@ -742,12 +889,15 @@ "min_document_length": 14, "average_document_length": 134.42827868852459, "max_document_length": 177, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bul_Cyrl-eng_Latn": { "number_of_characters": 70589, @@ -757,12 +907,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-bul_Cyrl": { "number_of_characters": 66577, @@ -772,12 +925,15 @@ "min_document_length": 14, "average_document_length": 134.42827868852459, "max_document_length": 177, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "cat_Latn-cat_Latn": { "number_of_characters": 68842, @@ -787,12 +943,15 @@ "min_document_length": 15, "average_document_length": 139.06967213114754, "max_document_length": 163, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "cat_Latn-eng_Latn": { "number_of_characters": 70589, @@ -802,12 +961,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-cat_Latn": { "number_of_characters": 68842, @@ -817,12 +979,15 @@ "min_document_length": 15, "average_document_length": 139.06967213114754, "max_document_length": 163, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ceb_Latn-ceb_Latn": { "number_of_characters": 74053, @@ -832,12 +997,15 @@ "min_document_length": 15, "average_document_length": 149.74795081967213, "max_document_length": 184, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ceb_Latn-eng_Latn": { "number_of_characters": 70589, @@ -847,12 +1015,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ceb_Latn": { "number_of_characters": 74053, @@ -862,12 +1033,15 @@ "min_document_length": 15, "average_document_length": 149.74795081967213, "max_document_length": 184, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ces_Latn-ces_Latn": { "number_of_characters": 61936, @@ -877,12 +1051,15 @@ "min_document_length": 12, "average_document_length": 124.91803278688525, "max_document_length": 139, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ces_Latn-eng_Latn": { "number_of_characters": 70589, @@ -892,12 +1069,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ces_Latn": { "number_of_characters": 61936, @@ -907,12 +1087,15 @@ "min_document_length": 12, "average_document_length": 124.91803278688525, "max_document_length": 139, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ckb_Arab-ckb_Arab": { "number_of_characters": 64917, @@ -922,12 +1105,15 @@ "min_document_length": 11, "average_document_length": 131.0266393442623, "max_document_length": 178, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ckb_Arab-eng_Latn": { "number_of_characters": 70589, @@ -937,12 +1123,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ckb_Arab": { "number_of_characters": 64917, @@ -952,12 +1141,15 @@ "min_document_length": 11, "average_document_length": 131.0266393442623, "max_document_length": 178, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "dan_Latn-dan_Latn": { "number_of_characters": 66648, @@ -967,12 +1159,15 @@ "min_document_length": 14, "average_document_length": 134.5737704918033, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "dan_Latn-eng_Latn": { "number_of_characters": 70589, @@ -982,12 +1177,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-dan_Latn": { "number_of_characters": 66648, @@ -997,12 +1195,15 @@ "min_document_length": 14, "average_document_length": 134.5737704918033, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "deu_Latn-deu_Latn": { "number_of_characters": 68768, @@ -1012,12 +1213,15 @@ "min_document_length": 18, "average_document_length": 138.91803278688525, "max_document_length": 182, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "deu_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1027,12 +1231,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-deu_Latn": { "number_of_characters": 68768, @@ -1042,12 +1249,15 @@ "min_document_length": 18, "average_document_length": 138.91803278688525, "max_document_length": 182, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ell_Grek-ell_Grek": { "number_of_characters": 79210, @@ -1057,12 +1267,15 @@ "min_document_length": 18, "average_document_length": 160.3155737704918, "max_document_length": 212, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ell_Grek-eng_Latn": { "number_of_characters": 70589, @@ -1072,12 +1285,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ell_Grek": { "number_of_characters": 79210, @@ -1087,12 +1303,15 @@ "min_document_length": 18, "average_document_length": 160.3155737704918, "max_document_length": 212, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1102,12 +1321,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "est_Latn-est_Latn": { "number_of_characters": 61779, @@ -1117,12 +1339,15 @@ "min_document_length": 13, "average_document_length": 124.59631147540983, "max_document_length": 164, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "est_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1132,12 +1357,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-est_Latn": { "number_of_characters": 61779, @@ -1147,12 +1375,15 @@ "min_document_length": 13, "average_document_length": 124.59631147540983, "max_document_length": 164, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eus_Latn-eus_Latn": { "number_of_characters": 67979, @@ -1162,12 +1393,15 @@ "min_document_length": 13, "average_document_length": 137.3012295081967, "max_document_length": 169, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eus_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1177,12 +1411,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-eus_Latn": { "number_of_characters": 67979, @@ -1192,12 +1429,15 @@ "min_document_length": 13, "average_document_length": 137.3012295081967, "max_document_length": 169, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fin_Latn-fin_Latn": { "number_of_characters": 66234, @@ -1207,12 +1447,15 @@ "min_document_length": 14, "average_document_length": 133.72540983606558, "max_document_length": 161, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fin_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1222,12 +1465,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-fin_Latn": { "number_of_characters": 66234, @@ -1237,12 +1483,15 @@ "min_document_length": 14, "average_document_length": 133.72540983606558, "max_document_length": 161, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fra_Latn-fra_Latn": { "number_of_characters": 82464, @@ -1252,12 +1501,15 @@ "min_document_length": 19, "average_document_length": 166.98360655737704, "max_document_length": 204, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fra_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1267,12 +1519,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-fra_Latn": { "number_of_characters": 82464, @@ -1282,12 +1537,15 @@ "min_document_length": 19, "average_document_length": 166.98360655737704, "max_document_length": 204, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fuv_Latn-fuv_Latn": { "number_of_characters": 53555, @@ -1297,12 +1555,15 @@ "min_document_length": 18, "average_document_length": 107.7438524590164, "max_document_length": 122, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fuv_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1312,12 +1573,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-fuv_Latn": { "number_of_characters": 53555, @@ -1327,12 +1591,15 @@ "min_document_length": 18, "average_document_length": 107.7438524590164, "max_document_length": 122, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "gaz_Latn-gaz_Latn": { "number_of_characters": 78315, @@ -1342,12 +1609,15 @@ "min_document_length": 16, "average_document_length": 158.48155737704917, "max_document_length": 191, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "gaz_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1357,12 +1627,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-gaz_Latn": { "number_of_characters": 78315, @@ -1372,12 +1645,15 @@ "min_document_length": 16, "average_document_length": 158.48155737704917, "max_document_length": 191, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "grn_Latn-grn_Latn": { "number_of_characters": 68572, @@ -1387,12 +1663,15 @@ "min_document_length": 17, "average_document_length": 138.51639344262296, "max_document_length": 161, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "grn_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1402,12 +1681,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-grn_Latn": { "number_of_characters": 68572, @@ -1417,12 +1699,15 @@ "min_document_length": 17, "average_document_length": 138.51639344262296, "max_document_length": 161, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "guj_Gujr-guj_Gujr": { "number_of_characters": 57007, @@ -1432,12 +1717,15 @@ "min_document_length": 13, "average_document_length": 114.81762295081967, "max_document_length": 138, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "guj_Gujr-eng_Latn": { "number_of_characters": 70589, @@ -1447,12 +1735,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-guj_Gujr": { "number_of_characters": 57007, @@ -1462,12 +1753,15 @@ "min_document_length": 13, "average_document_length": 114.81762295081967, "max_document_length": 138, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hat_Latn-hat_Latn": { "number_of_characters": 64558, @@ -1477,12 +1771,15 @@ "min_document_length": 17, "average_document_length": 130.29098360655738, "max_document_length": 179, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hat_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1492,12 +1789,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hat_Latn": { "number_of_characters": 64558, @@ -1507,12 +1807,15 @@ "min_document_length": 17, "average_document_length": 130.29098360655738, "max_document_length": 179, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hau_Latn-hau_Latn": { "number_of_characters": 78240, @@ -1522,12 +1825,15 @@ "min_document_length": 14, "average_document_length": 158.327868852459, "max_document_length": 183, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hau_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1537,12 +1843,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hau_Latn": { "number_of_characters": 78240, @@ -1552,12 +1861,15 @@ "min_document_length": 14, "average_document_length": 158.327868852459, "max_document_length": 183, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "heb_Hebr-heb_Hebr": { "number_of_characters": 50598, @@ -1567,12 +1879,15 @@ "min_document_length": 12, "average_document_length": 101.68442622950819, "max_document_length": 134, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "heb_Hebr-eng_Latn": { "number_of_characters": 70589, @@ -1582,12 +1897,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-heb_Hebr": { "number_of_characters": 50598, @@ -1597,12 +1915,15 @@ "min_document_length": 12, "average_document_length": 101.68442622950819, "max_document_length": 134, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Deva-hin_Deva": { "number_of_characters": 66332, @@ -1612,12 +1933,15 @@ "min_document_length": 14, "average_document_length": 133.9262295081967, "max_document_length": 165, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Deva-eng_Latn": { "number_of_characters": 70589, @@ -1627,12 +1951,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hin_Deva": { "number_of_characters": 66332, @@ -1642,12 +1969,15 @@ "min_document_length": 14, "average_document_length": 133.9262295081967, "max_document_length": 165, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Latn-hin_Latn": { "number_of_characters": 68307, @@ -1657,12 +1987,15 @@ "min_document_length": 14, "average_document_length": 137.9733606557377, "max_document_length": 170, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1672,12 +2005,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hin_Latn": { "number_of_characters": 68307, @@ -1687,12 +2023,15 @@ "min_document_length": 14, "average_document_length": 137.9733606557377, "max_document_length": 170, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hrv_Latn-hrv_Latn": { "number_of_characters": 62928, @@ -1702,12 +2041,15 @@ "min_document_length": 13, "average_document_length": 126.95081967213115, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hrv_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1717,12 +2059,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hrv_Latn": { "number_of_characters": 62928, @@ -1732,12 +2077,15 @@ "min_document_length": 13, "average_document_length": 126.95081967213115, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hun_Latn-hun_Latn": { "number_of_characters": 67941, @@ -1747,12 +2095,15 @@ "min_document_length": 15, "average_document_length": 137.2233606557377, "max_document_length": 176, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hun_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1762,12 +2113,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hun_Latn": { "number_of_characters": 67941, @@ -1777,12 +2131,15 @@ "min_document_length": 15, "average_document_length": 137.2233606557377, "max_document_length": 176, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hye_Armn-hye_Armn": { "number_of_characters": 68859, @@ -1792,12 +2149,15 @@ "min_document_length": 15, "average_document_length": 139.1045081967213, "max_document_length": 193, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hye_Armn-eng_Latn": { "number_of_characters": 70589, @@ -1807,12 +2167,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hye_Armn": { "number_of_characters": 68859, @@ -1822,12 +2185,15 @@ "min_document_length": 15, "average_document_length": 139.1045081967213, "max_document_length": 193, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ibo_Latn-ibo_Latn": { "number_of_characters": 66167, @@ -1837,12 +2203,15 @@ "min_document_length": 19, "average_document_length": 133.58811475409837, "max_document_length": 156, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0011123470522802, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, - "max_relevant_docs_per_query": 1.0011123470522802 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "ibo_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1852,12 +2221,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ibo_Latn": { "number_of_characters": 66167, @@ -1867,12 +2239,15 @@ "min_document_length": 19, "average_document_length": 133.58811475409837, "max_document_length": 156, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0011123470522802, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, - "max_relevant_docs_per_query": 1.0011123470522802 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "ilo_Latn-ilo_Latn": { "number_of_characters": 78161, @@ -1882,12 +2257,15 @@ "min_document_length": 14, "average_document_length": 158.16598360655738, "max_document_length": 187, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ilo_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1897,12 +2275,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ilo_Latn": { "number_of_characters": 78161, @@ -1912,12 +2293,15 @@ "min_document_length": 14, "average_document_length": 158.16598360655738, "max_document_length": 187, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ind_Latn-ind_Latn": { "number_of_characters": 74871, @@ -1927,12 +2311,15 @@ "min_document_length": 19, "average_document_length": 151.42418032786884, "max_document_length": 207, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ind_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1942,12 +2329,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ind_Latn": { "number_of_characters": 74871, @@ -1957,12 +2347,15 @@ "min_document_length": 19, "average_document_length": 151.42418032786884, "max_document_length": 207, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "isl_Latn-isl_Latn": { "number_of_characters": 70522, @@ -1972,12 +2365,15 @@ "min_document_length": 14, "average_document_length": 142.5122950819672, "max_document_length": 170, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "isl_Latn-eng_Latn": { "number_of_characters": 70589, @@ -1987,12 +2383,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-isl_Latn": { "number_of_characters": 70522, @@ -2002,12 +2401,15 @@ "min_document_length": 14, "average_document_length": 142.5122950819672, "max_document_length": 170, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ita_Latn-ita_Latn": { "number_of_characters": 76124, @@ -2017,12 +2419,15 @@ "min_document_length": 18, "average_document_length": 153.99180327868854, "max_document_length": 185, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ita_Latn-eng_Latn": { "number_of_characters": 70589, @@ -2032,12 +2437,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ita_Latn": { "number_of_characters": 76124, @@ -2047,12 +2455,15 @@ "min_document_length": 18, "average_document_length": 153.99180327868854, "max_document_length": 185, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jav_Latn-jav_Latn": { "number_of_characters": 71722, @@ -2062,12 +2473,15 @@ "min_document_length": 14, "average_document_length": 144.97131147540983, "max_document_length": 174, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jav_Latn-eng_Latn": { "number_of_characters": 70589, @@ -2077,12 +2491,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-jav_Latn": { "number_of_characters": 71722, @@ -2092,12 +2509,15 @@ "min_document_length": 14, "average_document_length": 144.97131147540983, "max_document_length": 174, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jpn_Jpan-jpn_Jpan": { "number_of_characters": 33187, @@ -2107,12 +2527,15 @@ "min_document_length": 14, "average_document_length": 66.0061475409836, "max_document_length": 76, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jpn_Jpan-eng_Latn": { "number_of_characters": 70589, @@ -2122,12 +2545,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-jpn_Jpan": { "number_of_characters": 33187, @@ -2137,12 +2563,15 @@ "min_document_length": 14, "average_document_length": 66.0061475409836, "max_document_length": 76, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kac_Latn-kac_Latn": { "number_of_characters": 89655, @@ -2152,12 +2581,15 @@ "min_document_length": 18, "average_document_length": 181.71926229508196, "max_document_length": 195, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kac_Latn-eng_Latn": { "number_of_characters": 70589, @@ -2167,12 +2599,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kac_Latn": { "number_of_characters": 89655, @@ -2182,12 +2617,15 @@ "min_document_length": 18, "average_document_length": 181.71926229508196, "max_document_length": 195, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kan_Knda-kan_Knda": { "number_of_characters": 65899, @@ -2197,12 +2635,15 @@ "min_document_length": 14, "average_document_length": 133.0389344262295, "max_document_length": 165, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kan_Knda-eng_Latn": { "number_of_characters": 70589, @@ -2212,12 +2653,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kan_Knda": { "number_of_characters": 65899, @@ -2227,12 +2671,15 @@ "min_document_length": 14, "average_document_length": 133.0389344262295, "max_document_length": 165, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kat_Geor-kat_Geor": { "number_of_characters": 68309, @@ -2242,12 +2689,15 @@ "min_document_length": 14, "average_document_length": 137.97745901639345, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kat_Geor-eng_Latn": { "number_of_characters": 70589, @@ -2257,12 +2707,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kat_Geor": { "number_of_characters": 68309, @@ -2272,12 +2725,15 @@ "min_document_length": 14, "average_document_length": 137.97745901639345, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kaz_Cyrl-kaz_Cyrl": { "number_of_characters": 64657, @@ -2287,12 +2743,15 @@ "min_document_length": 17, "average_document_length": 130.49385245901638, "max_document_length": 158, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kaz_Cyrl-eng_Latn": { "number_of_characters": 70589, @@ -2302,12 +2761,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kaz_Cyrl": { "number_of_characters": 64657, @@ -2317,12 +2779,15 @@ "min_document_length": 17, "average_document_length": 130.49385245901638, "max_document_length": 158, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kea_Latn-kea_Latn": { "number_of_characters": 69323, @@ -2332,12 +2797,15 @@ "min_document_length": 14, "average_document_length": 140.05532786885246, "max_document_length": 183, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kea_Latn-eng_Latn": { "number_of_characters": 70589, @@ -2347,12 +2815,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kea_Latn": { "number_of_characters": 69323, @@ -2362,12 +2833,15 @@ "min_document_length": 14, "average_document_length": 140.05532786885246, "max_document_length": 183, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khk_Cyrl-khk_Cyrl": { "number_of_characters": 66977, @@ -2377,12 +2851,15 @@ "min_document_length": 16, "average_document_length": 135.24795081967213, "max_document_length": 162, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khk_Cyrl-eng_Latn": { "number_of_characters": 70589, @@ -2392,12 +2869,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-khk_Cyrl": { "number_of_characters": 66977, @@ -2407,12 +2887,15 @@ "min_document_length": 16, "average_document_length": 135.24795081967213, "max_document_length": 162, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khm_Khmr-khm_Khmr": { "number_of_characters": 69150, @@ -2422,12 +2905,15 @@ "min_document_length": 16, "average_document_length": 139.70081967213116, "max_document_length": 169, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khm_Khmr-eng_Latn": { "number_of_characters": 70589, @@ -2437,12 +2923,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-khm_Khmr": { "number_of_characters": 69150, @@ -2452,12 +2941,15 @@ "min_document_length": 16, "average_document_length": 139.70081967213116, "max_document_length": 169, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kin_Latn-kin_Latn": { "number_of_characters": 72803, @@ -2467,12 +2959,15 @@ "min_document_length": 13, "average_document_length": 147.18647540983608, "max_document_length": 194, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0011123470522802, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, - "max_relevant_docs_per_query": 1.0011123470522802 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "kin_Latn-eng_Latn": { "number_of_characters": 70589, @@ -2482,12 +2977,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kin_Latn": { "number_of_characters": 72803, @@ -2497,12 +2995,15 @@ "min_document_length": 13, "average_document_length": 147.18647540983608, "max_document_length": 194, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0011123470522802, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, - "max_relevant_docs_per_query": 1.0011123470522802 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "kir_Cyrl-kir_Cyrl": { "number_of_characters": 67957, @@ -2512,12 +3013,15 @@ "min_document_length": 17, "average_document_length": 137.25614754098362, "max_document_length": 182, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kir_Cyrl-eng_Latn": { "number_of_characters": 70589, @@ -2527,12 +3031,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kir_Cyrl": { "number_of_characters": 67957, @@ -2542,12 +3049,15 @@ "min_document_length": 17, "average_document_length": 137.25614754098362, "max_document_length": 182, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kor_Hang-kor_Hang": { "number_of_characters": 32708, @@ -2557,12 +3067,15 @@ "min_document_length": 13, "average_document_length": 65.02459016393442, "max_document_length": 88, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kor_Hang-eng_Latn": { "number_of_characters": 70589, @@ -2572,12 +3085,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kor_Hang": { "number_of_characters": 32708, @@ -2587,12 +3103,15 @@ "min_document_length": 13, "average_document_length": 65.02459016393442, "max_document_length": 88, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lao_Laoo-lao_Laoo": { "number_of_characters": 57958, @@ -2602,12 +3121,15 @@ "min_document_length": 15, "average_document_length": 116.76639344262296, "max_document_length": 142, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lao_Laoo-eng_Latn": { "number_of_characters": 70589, @@ -2617,12 +3139,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lao_Laoo": { "number_of_characters": 57958, @@ -2632,12 +3157,15 @@ "min_document_length": 15, "average_document_length": 116.76639344262296, "max_document_length": 142, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lin_Latn-lin_Latn": { "number_of_characters": 74223, @@ -2647,12 +3175,15 @@ "min_document_length": 17, "average_document_length": 150.09631147540983, "max_document_length": 183, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.086859688195991, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0022271714922049, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0022271714922049, - "max_relevant_docs_per_query": 1.0022271714922049 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "lin_Latn-eng_Latn": { "number_of_characters": 70589, @@ -2662,12 +3193,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lin_Latn": { "number_of_characters": 74223, @@ -2677,12 +3211,15 @@ "min_document_length": 17, "average_document_length": 150.09631147540983, "max_document_length": 183, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.086859688195991, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0022271714922049, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0022271714922049, - "max_relevant_docs_per_query": 1.0022271714922049 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "lit_Latn-lit_Latn": { "number_of_characters": 62805, @@ -2692,12 +3229,15 @@ "min_document_length": 14, "average_document_length": 126.69877049180327, "max_document_length": 167, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lit_Latn-eng_Latn": { "number_of_characters": 70589, @@ -2707,12 +3247,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lit_Latn": { "number_of_characters": 62805, @@ -2722,12 +3265,15 @@ "min_document_length": 14, "average_document_length": 126.69877049180327, "max_document_length": 167, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lug_Latn-lug_Latn": { "number_of_characters": 71566, @@ -2737,12 +3283,15 @@ "min_document_length": 12, "average_document_length": 144.6516393442623, "max_document_length": 237, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0011123470522802, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, - "max_relevant_docs_per_query": 1.0011123470522802 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "lug_Latn-eng_Latn": { "number_of_characters": 70589, @@ -2752,12 +3301,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lug_Latn": { "number_of_characters": 71566, @@ -2767,12 +3319,15 @@ "min_document_length": 12, "average_document_length": 144.6516393442623, "max_document_length": 237, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0011123470522802, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, - "max_relevant_docs_per_query": 1.0011123470522802 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "luo_Latn-luo_Latn": { "number_of_characters": 66805, @@ -2782,12 +3337,15 @@ "min_document_length": 15, "average_document_length": 134.8954918032787, "max_document_length": 178, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "luo_Latn-eng_Latn": { "number_of_characters": 70589, @@ -2797,12 +3355,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-luo_Latn": { "number_of_characters": 66805, @@ -2812,12 +3373,15 @@ "min_document_length": 15, "average_document_length": 134.8954918032787, "max_document_length": 178, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lvs_Latn-lvs_Latn": { "number_of_characters": 63957, @@ -2827,12 +3391,15 @@ "min_document_length": 13, "average_document_length": 129.0594262295082, "max_document_length": 172, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lvs_Latn-eng_Latn": { "number_of_characters": 70589, @@ -2842,12 +3409,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lvs_Latn": { "number_of_characters": 63957, @@ -2857,12 +3427,15 @@ "min_document_length": 13, "average_document_length": 129.0594262295082, "max_document_length": 172, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mal_Mlym-mal_Mlym": { "number_of_characters": 73599, @@ -2872,12 +3445,15 @@ "min_document_length": 14, "average_document_length": 148.81762295081967, "max_document_length": 191, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mal_Mlym-eng_Latn": { "number_of_characters": 70589, @@ -2887,12 +3463,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mal_Mlym": { "number_of_characters": 73599, @@ -2902,12 +3481,15 @@ "min_document_length": 14, "average_document_length": 148.81762295081967, "max_document_length": 191, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mar_Deva-mar_Deva": { "number_of_characters": 62671, @@ -2917,12 +3499,15 @@ "min_document_length": 15, "average_document_length": 126.42418032786885, "max_document_length": 160, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0011123470522802, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, - "max_relevant_docs_per_query": 1.0011123470522802 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "mar_Deva-eng_Latn": { "number_of_characters": 70589, @@ -2932,12 +3517,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mar_Deva": { "number_of_characters": 62671, @@ -2947,12 +3535,15 @@ "min_document_length": 15, "average_document_length": 126.42418032786885, "max_document_length": 160, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0011123470522802, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, - "max_relevant_docs_per_query": 1.0011123470522802 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "mkd_Cyrl-mkd_Cyrl": { "number_of_characters": 67588, @@ -2962,12 +3553,15 @@ "min_document_length": 12, "average_document_length": 136.5, "max_document_length": 180, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mkd_Cyrl-eng_Latn": { "number_of_characters": 70589, @@ -2977,12 +3571,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mkd_Cyrl": { "number_of_characters": 67588, @@ -2992,12 +3589,15 @@ "min_document_length": 12, "average_document_length": 136.5, "max_document_length": 180, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mlt_Latn-mlt_Latn": { "number_of_characters": 68480, @@ -3007,12 +3607,15 @@ "min_document_length": 14, "average_document_length": 138.327868852459, "max_document_length": 185, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mlt_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3022,12 +3625,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mlt_Latn": { "number_of_characters": 68480, @@ -3037,12 +3643,15 @@ "min_document_length": 14, "average_document_length": 138.327868852459, "max_document_length": 185, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mri_Latn-mri_Latn": { "number_of_characters": 74519, @@ -3052,12 +3661,15 @@ "min_document_length": 16, "average_document_length": 150.702868852459, "max_document_length": 185, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mri_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3067,12 +3679,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mri_Latn": { "number_of_characters": 74519, @@ -3082,12 +3697,15 @@ "min_document_length": 16, "average_document_length": 150.702868852459, "max_document_length": 185, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mya_Mymr-mya_Mymr": { "number_of_characters": 81331, @@ -3097,12 +3715,15 @@ "min_document_length": 18, "average_document_length": 164.66188524590163, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mya_Mymr-eng_Latn": { "number_of_characters": 70589, @@ -3112,12 +3733,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mya_Mymr": { "number_of_characters": 81331, @@ -3127,12 +3751,15 @@ "min_document_length": 18, "average_document_length": 164.66188524590163, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nld_Latn-nld_Latn": { "number_of_characters": 68789, @@ -3142,12 +3769,15 @@ "min_document_length": 16, "average_document_length": 138.9610655737705, "max_document_length": 183, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nld_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3157,12 +3787,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nld_Latn": { "number_of_characters": 68789, @@ -3172,12 +3805,15 @@ "min_document_length": 16, "average_document_length": 138.9610655737705, "max_document_length": 183, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nob_Latn-nob_Latn": { "number_of_characters": 64917, @@ -3187,12 +3823,15 @@ "min_document_length": 13, "average_document_length": 131.0266393442623, "max_document_length": 168, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nob_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3202,12 +3841,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nob_Latn": { "number_of_characters": 64917, @@ -3217,12 +3859,15 @@ "min_document_length": 13, "average_document_length": 131.0266393442623, "max_document_length": 168, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Deva-npi_Deva": { "number_of_characters": 61183, @@ -3232,12 +3877,15 @@ "min_document_length": 18, "average_document_length": 123.375, "max_document_length": 154, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Deva-eng_Latn": { "number_of_characters": 70589, @@ -3247,12 +3895,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-npi_Deva": { "number_of_characters": 61183, @@ -3262,12 +3913,15 @@ "min_document_length": 18, "average_document_length": 123.375, "max_document_length": 154, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Latn-npi_Latn": { "number_of_characters": 65683, @@ -3277,12 +3931,15 @@ "min_document_length": 20, "average_document_length": 132.59631147540983, "max_document_length": 154, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3292,12 +3949,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-npi_Latn": { "number_of_characters": 65683, @@ -3307,12 +3967,15 @@ "min_document_length": 20, "average_document_length": 132.59631147540983, "max_document_length": 154, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nso_Latn-nso_Latn": { "number_of_characters": 79073, @@ -3322,12 +3985,15 @@ "min_document_length": 12, "average_document_length": 160.03483606557376, "max_document_length": 235, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nso_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3337,12 +4003,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nso_Latn": { "number_of_characters": 79073, @@ -3352,12 +4021,15 @@ "min_document_length": 12, "average_document_length": 160.03483606557376, "max_document_length": 235, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nya_Latn-nya_Latn": { "number_of_characters": 82685, @@ -3367,12 +4039,15 @@ "min_document_length": 16, "average_document_length": 167.43647540983608, "max_document_length": 215, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nya_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3382,12 +4057,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nya_Latn": { "number_of_characters": 82685, @@ -3397,12 +4075,15 @@ "min_document_length": 16, "average_document_length": 167.43647540983608, "max_document_length": 215, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ory_Orya-ory_Orya": { "number_of_characters": 66638, @@ -3412,12 +4093,15 @@ "min_document_length": 10, "average_document_length": 134.55327868852459, "max_document_length": 168, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ory_Orya-eng_Latn": { "number_of_characters": 70589, @@ -3427,12 +4111,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ory_Orya": { "number_of_characters": 66638, @@ -3442,12 +4129,15 @@ "min_document_length": 10, "average_document_length": 134.55327868852459, "max_document_length": 168, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pan_Guru-pan_Guru": { "number_of_characters": 66944, @@ -3457,12 +4147,15 @@ "min_document_length": 12, "average_document_length": 135.18032786885246, "max_document_length": 157, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pan_Guru-eng_Latn": { "number_of_characters": 70589, @@ -3472,12 +4165,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pan_Guru": { "number_of_characters": 66944, @@ -3487,12 +4183,15 @@ "min_document_length": 12, "average_document_length": 135.18032786885246, "max_document_length": 157, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pbt_Arab-pbt_Arab": { "number_of_characters": 61880, @@ -3502,12 +4201,15 @@ "min_document_length": 15, "average_document_length": 124.80327868852459, "max_document_length": 155, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pbt_Arab-eng_Latn": { "number_of_characters": 70589, @@ -3517,12 +4219,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pbt_Arab": { "number_of_characters": 61880, @@ -3532,12 +4237,15 @@ "min_document_length": 15, "average_document_length": 124.80327868852459, "max_document_length": 155, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pes_Arab-pes_Arab": { "number_of_characters": 59252, @@ -3547,12 +4255,15 @@ "min_document_length": 12, "average_document_length": 119.41803278688525, "max_document_length": 152, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pes_Arab-eng_Latn": { "number_of_characters": 70589, @@ -3562,12 +4273,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pes_Arab": { "number_of_characters": 59252, @@ -3577,12 +4291,15 @@ "min_document_length": 12, "average_document_length": 119.41803278688525, "max_document_length": 152, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "plt_Latn-plt_Latn": { "number_of_characters": 86472, @@ -3592,12 +4309,15 @@ "min_document_length": 15, "average_document_length": 175.19672131147541, "max_document_length": 222, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "plt_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3607,12 +4327,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-plt_Latn": { "number_of_characters": 86472, @@ -3622,12 +4345,15 @@ "min_document_length": 15, "average_document_length": 175.19672131147541, "max_document_length": 222, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pol_Latn-pol_Latn": { "number_of_characters": 67664, @@ -3637,12 +4363,15 @@ "min_document_length": 16, "average_document_length": 136.65573770491804, "max_document_length": 196, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pol_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3652,12 +4381,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pol_Latn": { "number_of_characters": 67664, @@ -3667,12 +4399,15 @@ "min_document_length": 16, "average_document_length": 136.65573770491804, "max_document_length": 196, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "por_Latn-por_Latn": { "number_of_characters": 71281, @@ -3682,12 +4417,15 @@ "min_document_length": 16, "average_document_length": 144.06762295081967, "max_document_length": 179, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "por_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3697,12 +4435,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-por_Latn": { "number_of_characters": 71281, @@ -3712,12 +4453,15 @@ "min_document_length": 16, "average_document_length": 144.06762295081967, "max_document_length": 179, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ron_Latn-ron_Latn": { "number_of_characters": 71844, @@ -3727,12 +4471,15 @@ "min_document_length": 14, "average_document_length": 145.22131147540983, "max_document_length": 181, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ron_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3742,12 +4489,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ron_Latn": { "number_of_characters": 71844, @@ -3757,12 +4507,15 @@ "min_document_length": 14, "average_document_length": 145.22131147540983, "max_document_length": 181, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "rus_Cyrl-rus_Cyrl": { "number_of_characters": 75823, @@ -3772,12 +4525,15 @@ "min_document_length": 16, "average_document_length": 153.375, "max_document_length": 196, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "rus_Cyrl-eng_Latn": { "number_of_characters": 70589, @@ -3787,12 +4543,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-rus_Cyrl": { "number_of_characters": 75823, @@ -3802,12 +4561,15 @@ "min_document_length": 16, "average_document_length": 153.375, "max_document_length": 196, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "shn_Mymr-shn_Mymr": { "number_of_characters": 69288, @@ -3817,12 +4579,15 @@ "min_document_length": 14, "average_document_length": 139.98360655737704, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "shn_Mymr-eng_Latn": { "number_of_characters": 70589, @@ -3832,12 +4597,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-shn_Mymr": { "number_of_characters": 69288, @@ -3847,12 +4615,15 @@ "min_document_length": 14, "average_document_length": 139.98360655737704, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Latn-sin_Latn": { "number_of_characters": 85996, @@ -3862,12 +4633,15 @@ "min_document_length": 19, "average_document_length": 174.22131147540983, "max_document_length": 224, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3877,12 +4651,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sin_Latn": { "number_of_characters": 85996, @@ -3892,12 +4669,15 @@ "min_document_length": 19, "average_document_length": 174.22131147540983, "max_document_length": 224, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Sinh-sin_Sinh": { "number_of_characters": 63902, @@ -3907,12 +4687,15 @@ "min_document_length": 17, "average_document_length": 128.94672131147541, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Sinh-eng_Latn": { "number_of_characters": 70589, @@ -3922,12 +4705,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sin_Sinh": { "number_of_characters": 63902, @@ -3937,12 +4723,15 @@ "min_document_length": 17, "average_document_length": 128.94672131147541, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slk_Latn-slk_Latn": { "number_of_characters": 62663, @@ -3952,12 +4741,15 @@ "min_document_length": 12, "average_document_length": 126.4077868852459, "max_document_length": 146, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slk_Latn-eng_Latn": { "number_of_characters": 70589, @@ -3967,12 +4759,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-slk_Latn": { "number_of_characters": 62663, @@ -3982,12 +4777,15 @@ "min_document_length": 12, "average_document_length": 126.4077868852459, "max_document_length": 146, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slv_Latn-slv_Latn": { "number_of_characters": 62895, @@ -3997,12 +4795,15 @@ "min_document_length": 13, "average_document_length": 126.88319672131148, "max_document_length": 176, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slv_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4012,12 +4813,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-slv_Latn": { "number_of_characters": 62895, @@ -4027,12 +4831,15 @@ "min_document_length": 13, "average_document_length": 126.88319672131148, "max_document_length": 176, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sna_Latn-sna_Latn": { "number_of_characters": 74071, @@ -4042,12 +4849,15 @@ "min_document_length": 20, "average_document_length": 149.78483606557376, "max_document_length": 191, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sna_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4057,12 +4867,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sna_Latn": { "number_of_characters": 74071, @@ -4072,12 +4885,15 @@ "min_document_length": 20, "average_document_length": 149.78483606557376, "max_document_length": 191, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "snd_Arab-snd_Arab": { "number_of_characters": 58057, @@ -4087,12 +4903,15 @@ "min_document_length": 13, "average_document_length": 116.96926229508196, "max_document_length": 164, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "snd_Arab-eng_Latn": { "number_of_characters": 70589, @@ -4102,12 +4921,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-snd_Arab": { "number_of_characters": 58057, @@ -4117,12 +4939,15 @@ "min_document_length": 13, "average_document_length": 116.96926229508196, "max_document_length": 164, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "som_Latn-som_Latn": { "number_of_characters": 82838, @@ -4132,12 +4957,15 @@ "min_document_length": 16, "average_document_length": 167.75, "max_document_length": 201, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "som_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4147,12 +4975,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-som_Latn": { "number_of_characters": 82838, @@ -4162,12 +4993,15 @@ "min_document_length": 16, "average_document_length": 167.75, "max_document_length": 201, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sot_Latn-sot_Latn": { "number_of_characters": 75794, @@ -4177,12 +5011,15 @@ "min_document_length": 11, "average_document_length": 153.3155737704918, "max_document_length": 186, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sot_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4192,12 +5029,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sot_Latn": { "number_of_characters": 75794, @@ -4207,12 +5047,15 @@ "min_document_length": 11, "average_document_length": 153.3155737704918, "max_document_length": 186, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "spa_Latn-spa_Latn": { "number_of_characters": 74920, @@ -4222,12 +5065,15 @@ "min_document_length": 17, "average_document_length": 151.52459016393442, "max_document_length": 180, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "spa_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4237,12 +5083,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-spa_Latn": { "number_of_characters": 74920, @@ -4252,12 +5101,15 @@ "min_document_length": 17, "average_document_length": 151.52459016393442, "max_document_length": 180, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "srp_Cyrl-srp_Cyrl": { "number_of_characters": 61657, @@ -4267,12 +5119,15 @@ "min_document_length": 13, "average_document_length": 124.34631147540983, "max_document_length": 160, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0011123470522802, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, - "max_relevant_docs_per_query": 1.0011123470522802 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "srp_Cyrl-eng_Latn": { "number_of_characters": 70589, @@ -4282,12 +5137,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-srp_Cyrl": { "number_of_characters": 61657, @@ -4297,12 +5155,15 @@ "min_document_length": 13, "average_document_length": 124.34631147540983, "max_document_length": 160, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.085650723025584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0011123470522802, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, - "max_relevant_docs_per_query": 1.0011123470522802 + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "ssw_Latn-ssw_Latn": { "number_of_characters": 73964, @@ -4312,12 +5173,15 @@ "min_document_length": 14, "average_document_length": 149.5655737704918, "max_document_length": 182, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ssw_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4327,12 +5191,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ssw_Latn": { "number_of_characters": 73964, @@ -4342,12 +5209,15 @@ "min_document_length": 14, "average_document_length": 149.5655737704918, "max_document_length": 182, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sun_Latn-sun_Latn": { "number_of_characters": 71320, @@ -4357,12 +5227,15 @@ "min_document_length": 15, "average_document_length": 144.14754098360655, "max_document_length": 173, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sun_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4372,12 +5245,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sun_Latn": { "number_of_characters": 71320, @@ -4387,12 +5263,15 @@ "min_document_length": 15, "average_document_length": 144.14754098360655, "max_document_length": 173, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swe_Latn-swe_Latn": { "number_of_characters": 62785, @@ -4402,12 +5281,15 @@ "min_document_length": 16, "average_document_length": 126.6577868852459, "max_document_length": 154, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swe_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4417,12 +5299,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-swe_Latn": { "number_of_characters": 62785, @@ -4432,12 +5317,15 @@ "min_document_length": 16, "average_document_length": 126.6577868852459, "max_document_length": 154, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swh_Latn-swh_Latn": { "number_of_characters": 73480, @@ -4447,12 +5335,15 @@ "min_document_length": 14, "average_document_length": 148.5737704918033, "max_document_length": 194, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swh_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4462,12 +5353,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-swh_Latn": { "number_of_characters": 73480, @@ -4477,12 +5371,15 @@ "min_document_length": 14, "average_document_length": 148.5737704918033, "max_document_length": 194, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tam_Taml-tam_Taml": { "number_of_characters": 73991, @@ -4492,12 +5389,15 @@ "min_document_length": 20, "average_document_length": 149.62090163934425, "max_document_length": 181, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tam_Taml-eng_Latn": { "number_of_characters": 70589, @@ -4507,12 +5407,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tam_Taml": { "number_of_characters": 73991, @@ -4522,12 +5425,15 @@ "min_document_length": 20, "average_document_length": 149.62090163934425, "max_document_length": 181, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tel_Telu-tel_Telu": { "number_of_characters": 65945, @@ -4537,12 +5443,15 @@ "min_document_length": 16, "average_document_length": 133.13319672131146, "max_document_length": 149, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tel_Telu-eng_Latn": { "number_of_characters": 70589, @@ -4552,12 +5461,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tel_Telu": { "number_of_characters": 65945, @@ -4567,12 +5479,15 @@ "min_document_length": 16, "average_document_length": 133.13319672131146, "max_document_length": 149, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgk_Cyrl-tgk_Cyrl": { "number_of_characters": 67829, @@ -4582,12 +5497,15 @@ "min_document_length": 11, "average_document_length": 136.99385245901638, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgk_Cyrl-eng_Latn": { "number_of_characters": 70589, @@ -4597,12 +5515,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tgk_Cyrl": { "number_of_characters": 67829, @@ -4612,12 +5533,15 @@ "min_document_length": 11, "average_document_length": 136.99385245901638, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgl_Latn-tgl_Latn": { "number_of_characters": 75087, @@ -4627,12 +5551,15 @@ "min_document_length": 14, "average_document_length": 151.86680327868854, "max_document_length": 184, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgl_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4642,12 +5569,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tgl_Latn": { "number_of_characters": 75087, @@ -4657,12 +5587,15 @@ "min_document_length": 14, "average_document_length": 151.86680327868854, "max_document_length": 184, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tha_Thai-tha_Thai": { "number_of_characters": 54496, @@ -4672,12 +5605,15 @@ "min_document_length": 13, "average_document_length": 109.67213114754098, "max_document_length": 123, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tha_Thai-eng_Latn": { "number_of_characters": 70589, @@ -4687,12 +5623,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tha_Thai": { "number_of_characters": 54496, @@ -4702,12 +5641,15 @@ "min_document_length": 13, "average_document_length": 109.67213114754098, "max_document_length": 123, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tir_Ethi-tir_Ethi": { "number_of_characters": 47775, @@ -4717,12 +5659,15 @@ "min_document_length": 14, "average_document_length": 95.89959016393442, "max_document_length": 110, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tir_Ethi-eng_Latn": { "number_of_characters": 70589, @@ -4732,12 +5677,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tir_Ethi": { "number_of_characters": 47775, @@ -4747,12 +5695,15 @@ "min_document_length": 14, "average_document_length": 95.89959016393442, "max_document_length": 110, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tsn_Latn-tsn_Latn": { "number_of_characters": 79391, @@ -4762,12 +5713,15 @@ "min_document_length": 13, "average_document_length": 160.68647540983608, "max_document_length": 204, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tsn_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4777,12 +5731,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tsn_Latn": { "number_of_characters": 79391, @@ -4792,12 +5749,15 @@ "min_document_length": 13, "average_document_length": 160.68647540983608, "max_document_length": 204, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tso_Latn-tso_Latn": { "number_of_characters": 83501, @@ -4807,12 +5767,15 @@ "min_document_length": 13, "average_document_length": 169.10860655737704, "max_document_length": 215, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tso_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4822,12 +5785,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tso_Latn": { "number_of_characters": 83501, @@ -4837,12 +5803,15 @@ "min_document_length": 13, "average_document_length": 169.10860655737704, "max_document_length": 215, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tur_Latn-tur_Latn": { "number_of_characters": 65382, @@ -4852,12 +5821,15 @@ "min_document_length": 12, "average_document_length": 131.9795081967213, "max_document_length": 158, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tur_Latn-eng_Latn": { "number_of_characters": 70589, @@ -4867,12 +5839,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tur_Latn": { "number_of_characters": 65382, @@ -4882,12 +5857,15 @@ "min_document_length": 12, "average_document_length": 131.9795081967213, "max_document_length": 158, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ukr_Cyrl-ukr_Cyrl": { "number_of_characters": 65850, @@ -4897,12 +5875,15 @@ "min_document_length": 14, "average_document_length": 132.93852459016392, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ukr_Cyrl-eng_Latn": { "number_of_characters": 70589, @@ -4912,12 +5893,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ukr_Cyrl": { "number_of_characters": 65850, @@ -4927,12 +5911,15 @@ "min_document_length": 14, "average_document_length": 132.93852459016392, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Arab-urd_Arab": { "number_of_characters": 64450, @@ -4942,12 +5929,15 @@ "min_document_length": 11, "average_document_length": 130.06967213114754, "max_document_length": 187, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Arab-eng_Latn": { "number_of_characters": 70589, @@ -4957,12 +5947,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-urd_Arab": { "number_of_characters": 64450, @@ -4972,12 +5965,15 @@ "min_document_length": 11, "average_document_length": 130.06967213114754, "max_document_length": 187, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Latn-urd_Latn": { "number_of_characters": 82039, @@ -4987,12 +5983,15 @@ "min_document_length": 15, "average_document_length": 166.1127049180328, "max_document_length": 230, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Latn-eng_Latn": { "number_of_characters": 70589, @@ -5002,12 +6001,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-urd_Latn": { "number_of_characters": 82039, @@ -5017,12 +6019,15 @@ "min_document_length": 15, "average_document_length": 166.1127049180328, "max_document_length": 230, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "uzn_Latn-uzn_Latn": { "number_of_characters": 70828, @@ -5032,12 +6037,15 @@ "min_document_length": 11, "average_document_length": 143.13934426229508, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "uzn_Latn-eng_Latn": { "number_of_characters": 70589, @@ -5047,12 +6055,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-uzn_Latn": { "number_of_characters": 70828, @@ -5062,12 +6073,15 @@ "min_document_length": 11, "average_document_length": 143.13934426229508, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "vie_Latn-vie_Latn": { "number_of_characters": 66724, @@ -5077,12 +6091,15 @@ "min_document_length": 12, "average_document_length": 134.7295081967213, "max_document_length": 161, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "vie_Latn-eng_Latn": { "number_of_characters": 70589, @@ -5092,12 +6109,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-vie_Latn": { "number_of_characters": 66724, @@ -5107,12 +6127,15 @@ "min_document_length": 12, "average_document_length": 134.7295081967213, "max_document_length": 161, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "war_Latn-war_Latn": { "number_of_characters": 78444, @@ -5122,12 +6145,15 @@ "min_document_length": 13, "average_document_length": 158.74590163934425, "max_document_length": 207, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "war_Latn-eng_Latn": { "number_of_characters": 70589, @@ -5137,12 +6163,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-war_Latn": { "number_of_characters": 78444, @@ -5152,12 +6181,15 @@ "min_document_length": 13, "average_document_length": 158.74590163934425, "max_document_length": 207, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "wol_Latn-wol_Latn": { "number_of_characters": 64521, @@ -5167,12 +6199,15 @@ "min_document_length": 15, "average_document_length": 130.21516393442624, "max_document_length": 139, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "wol_Latn-eng_Latn": { "number_of_characters": 70589, @@ -5182,12 +6217,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-wol_Latn": { "number_of_characters": 64521, @@ -5197,12 +6235,15 @@ "min_document_length": 15, "average_document_length": 130.21516393442624, "max_document_length": 139, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "xho_Latn-xho_Latn": { "number_of_characters": 71629, @@ -5212,12 +6253,15 @@ "min_document_length": 16, "average_document_length": 144.78073770491804, "max_document_length": 179, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "xho_Latn-eng_Latn": { "number_of_characters": 70589, @@ -5227,12 +6271,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-xho_Latn": { "number_of_characters": 71629, @@ -5242,12 +6289,15 @@ "min_document_length": 16, "average_document_length": 144.78073770491804, "max_document_length": 179, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "yor_Latn-yor_Latn": { "number_of_characters": 62752, @@ -5257,12 +6307,15 @@ "min_document_length": 11, "average_document_length": 126.59016393442623, "max_document_length": 143, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "yor_Latn-eng_Latn": { "number_of_characters": 70589, @@ -5272,12 +6325,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-yor_Latn": { "number_of_characters": 62752, @@ -5287,12 +6343,15 @@ "min_document_length": 11, "average_document_length": 126.59016393442623, "max_document_length": 143, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hans-zho_Hans": { "number_of_characters": 20549, @@ -5302,12 +6361,15 @@ "min_document_length": 7, "average_document_length": 40.10860655737705, "max_document_length": 64, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hans-eng_Latn": { "number_of_characters": 70589, @@ -5317,12 +6379,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zho_Hans": { "number_of_characters": 20549, @@ -5332,12 +6397,15 @@ "min_document_length": 7, "average_document_length": 40.10860655737705, "max_document_length": 64, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hant-zho_Hant": { "number_of_characters": 19947, @@ -5347,12 +6415,15 @@ "min_document_length": 7, "average_document_length": 38.875, "max_document_length": 45, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hant-eng_Latn": { "number_of_characters": 70589, @@ -5362,12 +6433,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zho_Hant": { "number_of_characters": 19947, @@ -5377,12 +6451,15 @@ "min_document_length": 7, "average_document_length": 38.875, "max_document_length": 45, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zsm_Latn-zsm_Latn": { "number_of_characters": 72008, @@ -5392,12 +6469,15 @@ "min_document_length": 13, "average_document_length": 145.55737704918033, "max_document_length": 210, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zsm_Latn-eng_Latn": { "number_of_characters": 70589, @@ -5407,12 +6487,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zsm_Latn": { "number_of_characters": 72008, @@ -5422,12 +6505,15 @@ "min_document_length": 13, "average_document_length": 145.55737704918033, "max_document_length": 210, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zul_Latn-zul_Latn": { "number_of_characters": 69413, @@ -5437,12 +6523,15 @@ "min_document_length": 14, "average_document_length": 140.23975409836066, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zul_Latn-eng_Latn": { "number_of_characters": 70589, @@ -5452,12 +6541,15 @@ "min_document_length": 18, "average_document_length": 142.64959016393442, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zul_Latn": { "number_of_characters": 69413, @@ -5467,12 +6559,15 @@ "min_document_length": 14, "average_document_length": 140.23975409836066, "max_document_length": 171, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Arab-arb_Latn": { "number_of_characters": 61298, @@ -5482,12 +6577,15 @@ "min_document_length": 12, "average_document_length": 123.61065573770492, "max_document_length": 160, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Latn-arb_Arab": { "number_of_characters": 53671, @@ -5497,12 +6595,15 @@ "min_document_length": 14, "average_document_length": 107.98155737704919, "max_document_length": 134, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Beng-ben_Latn": { "number_of_characters": 68285, @@ -5512,12 +6613,15 @@ "min_document_length": 9, "average_document_length": 137.92827868852459, "max_document_length": 185, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Latn-ben_Beng": { "number_of_characters": 63512, @@ -5527,12 +6631,15 @@ "min_document_length": 9, "average_document_length": 128.14754098360655, "max_document_length": 175, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Deva-hin_Latn": { "number_of_characters": 68307, @@ -5542,12 +6649,15 @@ "min_document_length": 14, "average_document_length": 137.9733606557377, "max_document_length": 170, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Latn-hin_Deva": { "number_of_characters": 66332, @@ -5557,12 +6667,15 @@ "min_document_length": 14, "average_document_length": 133.9262295081967, "max_document_length": 165, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Deva-npi_Latn": { "number_of_characters": 65683, @@ -5572,12 +6685,15 @@ "min_document_length": 20, "average_document_length": 132.59631147540983, "max_document_length": 154, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Latn-npi_Deva": { "number_of_characters": 61183, @@ -5587,12 +6703,15 @@ "min_document_length": 18, "average_document_length": 123.375, "max_document_length": 154, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Sinh-sin_Latn": { "number_of_characters": 85996, @@ -5602,12 +6721,15 @@ "min_document_length": 19, "average_document_length": 174.22131147540983, "max_document_length": 224, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Latn-sin_Sinh": { "number_of_characters": 63902, @@ -5617,12 +6739,15 @@ "min_document_length": 17, "average_document_length": 128.94672131147541, "max_document_length": 159, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Arab-urd_Latn": { "number_of_characters": 82039, @@ -5632,12 +6757,15 @@ "min_document_length": 15, "average_document_length": 166.1127049180328, "max_document_length": 230, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Latn-urd_Arab": { "number_of_characters": 64450, @@ -5647,12 +6775,15 @@ "min_document_length": 11, "average_document_length": 130.06967213114754, "max_document_length": 187, + "unique_documents": 488, "min_query_length": 2, "average_query_length": 1.0844444444444445, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 } } } diff --git a/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json b/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json index e4bed3000..3d27f624b 100644 --- a/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json @@ -7,12 +7,15 @@ "min_document_length": 54, "average_document_length": 34.70511822986456, "max_document_length": 334374, + "unique_documents": 1003765, "min_query_length": 2, "average_query_length": 38.19428854093339, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 52561, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 52561, "hf_subset_descriptive_stats": { "python": { "number_of_characters": 14574651, @@ -22,12 +25,15 @@ "min_document_length": 95, "average_document_length": 49.994759373550714, "max_document_length": 14008, + "unique_documents": 280310, "min_query_length": 2, "average_query_length": 37.5801045716584, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 14918, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14918 }, "javascript": { "number_of_characters": 2587540, @@ -37,12 +43,15 @@ "min_document_length": 87, "average_document_length": 37.89792456903198, "max_document_length": 334374, + "unique_documents": 64854, "min_query_length": 2, "average_query_length": 39.412944393801276, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 3291, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3291 }, "go": { "number_of_characters": 3641108, @@ -52,12 +61,15 @@ "min_document_length": 54, "average_document_length": 17.957838193378645, "max_document_length": 5280, + "unique_documents": 182440, "min_query_length": 2, "average_query_length": 44.9248953459739, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 8122, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 8122 }, "ruby": { "number_of_characters": 629446, @@ -67,12 +79,15 @@ "min_document_length": 83, "average_document_length": 20.830830612985128, "max_document_length": 3992, + "unique_documents": 27570, "min_query_length": 2, "average_query_length": 43.72720063441713, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1261, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1261 }, "java": { "number_of_characters": 6791137, @@ -82,12 +97,15 @@ "min_document_length": 77, "average_document_length": 35.54789180940586, "max_document_length": 7615, + "unique_documents": 180866, "min_query_length": 2, "average_query_length": 33.019808306709265, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 10955, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 10955 }, "php": { "number_of_characters": 8619431, @@ -97,12 +115,15 @@ "min_document_length": 94, "average_document_length": 30.195091978709495, "max_document_length": 4904, + "unique_documents": 267725, "min_query_length": 2, "average_query_length": 38.20822035107749, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 14014, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14014 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json index 247e489b7..6d73096d4 100644 --- a/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json @@ -7,12 +7,15 @@ "min_document_length": 18, "average_document_length": 70.98776923076923, "max_document_length": 2532, + "unique_documents": 13000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 13000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 13000, "hf_subset_descriptive_stats": { "python": { "number_of_characters": 70519, @@ -22,12 +25,15 @@ "min_document_length": 21, "average_document_length": 69.519, "max_document_length": 1811, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "javascript": { "number_of_characters": 57880, @@ -37,12 +43,15 @@ "min_document_length": 18, "average_document_length": 56.88, "max_document_length": 601, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "typescript": { "number_of_characters": 61092, @@ -52,12 +61,15 @@ "min_document_length": 19, "average_document_length": 60.092, "max_document_length": 659, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "go": { "number_of_characters": 71797, @@ -67,12 +79,15 @@ "min_document_length": 19, "average_document_length": 70.797, "max_document_length": 1529, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "ruby": { "number_of_characters": 67900, @@ -82,12 +97,15 @@ "min_document_length": 20, "average_document_length": 66.9, "max_document_length": 751, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "java": { "number_of_characters": 63984, @@ -97,12 +115,15 @@ "min_document_length": 23, "average_document_length": 62.984, "max_document_length": 807, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "php": { "number_of_characters": 62927, @@ -112,12 +133,15 @@ "min_document_length": 21, "average_document_length": 61.927, "max_document_length": 766, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "c": { "number_of_characters": 98588, @@ -127,12 +151,15 @@ "min_document_length": 20, "average_document_length": 97.588, "max_document_length": 1672, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "c++": { "number_of_characters": 115480, @@ -142,12 +169,15 @@ "min_document_length": 22, "average_document_length": 114.48, "max_document_length": 1856, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "rust": { "number_of_characters": 68503, @@ -157,12 +187,15 @@ "min_document_length": 19, "average_document_length": 67.503, "max_document_length": 2532, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "swift": { "number_of_characters": 58279, @@ -172,12 +205,15 @@ "min_document_length": 19, "average_document_length": 57.279, "max_document_length": 727, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "scala": { "number_of_characters": 65833, @@ -187,12 +223,15 @@ "min_document_length": 22, "average_document_length": 64.833, "max_document_length": 685, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "shell": { "number_of_characters": 73059, @@ -202,12 +241,15 @@ "min_document_length": 18, "average_document_length": 72.059, "max_document_length": 813, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json b/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json index d6d5b6cc8..1be18319c 100644 --- a/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json +++ b/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json @@ -7,11 +7,14 @@ "min_document_length": 127, "average_document_length": 885.131117906693, "max_document_length": 32432, + "unique_documents": 66383, "min_query_length": 2, "average_query_length": 7344.177374406869, "max_query_length": 9403, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 13277, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 13277 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json b/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json index 2183d94b0..4511605dd 100644 --- a/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json +++ b/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json @@ -7,11 +7,14 @@ "min_document_length": 26, "average_document_length": 144.85253568097312, "max_document_length": 13851, + "unique_documents": 156526, "min_query_length": 1, "average_query_length": 7611.464064396601, "max_query_length": 11354, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 31306, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 31306 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json index 622694ef2..a817119b4 100644 --- a/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json @@ -7,12 +7,15 @@ "min_document_length": 23, "average_document_length": 20.28592186371801, "max_document_length": 214210, + "unique_documents": 1005474, "min_query_length": 2, "average_query_length": 38.259317745096176, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 52561, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 52561, "hf_subset_descriptive_stats": { "python": { "number_of_characters": 8792958, @@ -22,12 +25,15 @@ "min_document_length": 38, "average_document_length": 29.330466200133973, "max_document_length": 8326, + "unique_documents": 280652, "min_query_length": 2, "average_query_length": 37.62595522187961, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 14918, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14918 }, "javascript": { "number_of_characters": 1590642, @@ -37,12 +43,15 @@ "min_document_length": 40, "average_document_length": 22.395975521847824, "max_document_length": 214210, + "unique_documents": 65201, "min_query_length": 2, "average_query_length": 39.6238225463385, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 3291, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3291 }, "go": { "number_of_characters": 2264134, @@ -52,12 +61,15 @@ "min_document_length": 23, "average_document_length": 10.390259118395491, "max_document_length": 3589, + "unique_documents": 182735, "min_query_length": 2, "average_query_length": 44.99753755232701, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 8122, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 8122 }, "ruby": { "number_of_characters": 391703, @@ -67,12 +79,15 @@ "min_document_length": 36, "average_document_length": 12.198310859794113, "max_document_length": 2244, + "unique_documents": 27588, "min_query_length": 2, "average_query_length": 43.75574940523394, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1261, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1261 }, "java": { "number_of_characters": 4114584, @@ -82,12 +97,15 @@ "min_document_length": 38, "average_document_length": 20.724849636310413, "max_document_length": 5066, + "unique_documents": 181061, "min_query_length": 2, "average_query_length": 33.055408489274306, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 10955, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 10955 }, "php": { "number_of_characters": 5253894, @@ -97,12 +115,15 @@ "min_document_length": 40, "average_document_length": 17.586760961388624, "max_document_length": 2995, + "unique_documents": 268237, "min_query_length": 2, "average_query_length": 38.28129013843299, "max_query_length": 2, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 14014, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14014 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json index 52a641a88..853c4c79c 100644 --- a/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json @@ -7,12 +7,15 @@ "min_document_length": 2, "average_document_length": 324.01233333333334, "max_document_length": 17533, + "unique_documents": 6000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 6000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 6000, "hf_subset_descriptive_stats": { "python": { "number_of_characters": 467546, @@ -22,12 +25,15 @@ "min_document_length": 8, "average_document_length": 466.546, "max_document_length": 8636, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "javascript": { "number_of_characters": 187018, @@ -37,12 +43,15 @@ "min_document_length": 2, "average_document_length": 186.018, "max_document_length": 7657, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "go": { "number_of_characters": 126213, @@ -52,12 +61,15 @@ "min_document_length": 14, "average_document_length": 125.213, "max_document_length": 1501, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "ruby": { "number_of_characters": 314818, @@ -67,12 +79,15 @@ "min_document_length": 5, "average_document_length": 313.818, "max_document_length": 17533, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "java": { "number_of_characters": 691360, @@ -82,12 +97,15 @@ "min_document_length": 2, "average_document_length": 690.36, "max_document_length": 6473, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "php": { "number_of_characters": 163119, @@ -97,12 +115,15 @@ "min_document_length": 5, "average_document_length": 162.119, "max_document_length": 1240, + "unique_documents": 1000, "min_query_length": 1, "average_query_length": 1.0, "max_query_length": 1, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json b/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json index f05ff9de5..07081e69c 100644 --- a/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json +++ b/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json @@ -7,11 +7,14 @@ "min_document_length": 8, "average_document_length": 221.90178571428572, "max_document_length": 4147, + "unique_documents": 1008, "min_query_length": 8, "average_query_length": 6880.58371040724, "max_query_length": 10852, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 221, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 221 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json b/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json index 71715f554..042658caa 100644 --- a/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json +++ b/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json @@ -7,11 +7,14 @@ "min_document_length": 376, "average_document_length": 411.97549019607845, "max_document_length": 8285, + "unique_documents": 816, "min_query_length": 58, "average_query_length": 6709.666666666667, "max_query_length": 8469, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 180, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 180 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CosQA.json b/mteb/descriptive_stats/Retrieval/CosQA.json index 76febf878..d8f17d4b2 100644 --- a/mteb/descriptive_stats/Retrieval/CosQA.json +++ b/mteb/descriptive_stats/Retrieval/CosQA.json @@ -7,11 +7,14 @@ "min_document_length": 18, "average_document_length": 0.8933702193748787, "max_document_length": 83, + "unique_documents": 20604, "min_query_length": 88, "average_query_length": 11420.086, "max_query_length": 6396, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 500, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 500 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json b/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json index 311d92130..4598b2af7 100644 --- a/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json @@ -7,11 +7,14 @@ "min_document_length": 16, "average_document_length": 0.4425671239352529, "max_document_length": 98, + "unique_documents": 114229, "min_query_length": 8, "average_query_length": 429532.5737211635, "max_query_length": 188424, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 997, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 989 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/StackOverflowQA.json b/mteb/descriptive_stats/Retrieval/StackOverflowQA.json index f7822563d..51972461e 100644 --- a/mteb/descriptive_stats/Retrieval/StackOverflowQA.json +++ b/mteb/descriptive_stats/Retrieval/StackOverflowQA.json @@ -7,11 +7,14 @@ "min_document_length": 61, "average_document_length": 130.32145903366614, "max_document_length": 22234, + "unique_documents": 19931, "min_query_length": 5, "average_query_length": 12029.38365095286, "max_query_length": 46028, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 1994, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1994 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json b/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json index 8c4c00d9c..56c3964a5 100644 --- a/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json +++ b/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json @@ -7,11 +7,14 @@ "min_document_length": 13, "average_document_length": 4.582686984534865, "max_document_length": 281, + "unique_documents": 105851, "min_query_length": 17, "average_query_length": 2316.9494103572038, "max_query_length": 762, - "min_relevant_docs_per_query": 1.0, + "unique_queries": 5851, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1.0 + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 5851 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Touche2020.json b/mteb/descriptive_stats/Retrieval/Touche2020.json index 7be31e6f8..a3c37a54e 100644 --- a/mteb/descriptive_stats/Retrieval/Touche2020.json +++ b/mteb/descriptive_stats/Retrieval/Touche2020.json @@ -7,11 +7,14 @@ "min_document_length": 16, "average_document_length": 0.0055627442523101854, "max_document_length": 83, + "unique_documents": 382545, "min_query_length": 3, "average_query_length": 13430723.734693877, "max_query_length": 106072, - "min_relevant_docs_per_query": 19.020408163265305, - "average_relevant_docs_per_query": 19.020408163265305, - "max_relevant_docs_per_query": 19.020408163265305 + "unique_queries": 49, + "min_relevant_docs_per_query": 40, + "average_relevant_docs_per_query": 45.183673469387756, + "max_relevant_docs_per_query": 52, + "unique_relevant_docs": 2099 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json b/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json index 574cdec08..1b436abd7 100644 --- a/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json +++ b/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json @@ -7,11 +7,14 @@ "min_document_length": 16, "average_document_length": 0.007006176497701922, "max_document_length": 83, + "unique_documents": 303732, "min_query_length": 41, "average_query_length": 13000918.57142857, "max_query_length": 105983, - "min_relevant_docs_per_query": 34.93877551020408, - "average_relevant_docs_per_query": 34.93877551020408, - "max_relevant_docs_per_query": 34.93877551020408 + "unique_queries": 49, + "min_relevant_docs_per_query": 40, + "average_relevant_docs_per_query": 58.142857142857146, + "max_relevant_docs_per_query": 87, + "unique_relevant_docs": 2732 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json index 112d4f5af..f23a5ea1b 100644 --- a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json @@ -7,15 +7,19 @@ "min_document_length": 74, "average_document_length": 2331.0777818884367, "max_document_length": 24179, + "unique_docs": 121635, "min_query_length": 32, "average_query_length": 81.8780487804878, "max_query_length": 173, + "unique_queries": 75, "min_instruction_length": 93, "average_instruction_length": 389.9512195121951, "max_instruction_length": 887, + "unique_instructions": 75, "min_changed_instruction_length": 180, "average_changed_instruction_length": 450.5528455284553, "max_changed_instruction_length": 974, + "unique_changed_instructions": 123, "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 10.43089430894309, "max_average_relevant_docs_per_query": 24, @@ -31,15 +35,19 @@ "min_document_length": 99, "average_document_length": 3145.4990895627475, "max_document_length": 24179, + "unique_docs": 41189, "min_query_length": 34, "average_query_length": 80.075, "max_query_length": 124, + "unique_queries": 40, "min_instruction_length": 150, "average_instruction_length": 396.875, "max_instruction_length": 887, + "unique_instructions": 40, "min_changed_instruction_length": 205, "average_changed_instruction_length": 463.175, "max_changed_instruction_length": 974, + "unique_changed_instructions": 40, "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.85, "max_average_relevant_docs_per_query": 22, @@ -55,15 +63,19 @@ "min_document_length": 75, "average_document_length": 2784.0813456746173, "max_document_length": 24061, + "unique_docs": 39326, "min_query_length": 32, "average_query_length": 81.875, "max_query_length": 173, + "unique_queries": 40, "min_instruction_length": 93, "average_instruction_length": 371.125, "max_instruction_length": 887, + "unique_instructions": 40, "min_changed_instruction_length": 180, "average_changed_instruction_length": 431.8, "max_changed_instruction_length": 957, + "unique_changed_instructions": 40, "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 9.775, "max_average_relevant_docs_per_query": 24, @@ -79,15 +91,19 @@ "min_document_length": 74, "average_document_length": 1082.0501215953307, "max_document_length": 23840, + "unique_docs": 41120, "min_query_length": 32, "average_query_length": 83.55813953488372, "max_query_length": 159, + "unique_queries": 43, "min_instruction_length": 157, "average_instruction_length": 401.0232558139535, "max_instruction_length": 731, + "unique_instructions": 43, "min_changed_instruction_length": 209, "average_changed_instruction_length": 456.25581395348837, "max_changed_instruction_length": 822, + "unique_changed_instructions": 43, "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.651162790697674, "max_average_relevant_docs_per_query": 24, diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json b/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json index 4d2067530..54ae5d1ec 100644 --- a/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json @@ -7,15 +7,19 @@ "min_document_length": 74, "average_document_length": 2331.0777818884367, "max_document_length": 24179, + "unique_docs": 121635, "min_query_length": 10, "average_query_length": 57.113821138211385, "max_query_length": 136, + "unique_queries": 123, "min_instruction_length": 37, "average_instruction_length": 281.0650406504065, "max_instruction_length": 1009, + "unique_instructions": 123, "min_changed_instruction_length": 44, "average_changed_instruction_length": 326.9430894308943, "max_changed_instruction_length": 1083, + "unique_changed_instructions": 123, "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 10.43089430894309, "max_average_relevant_docs_per_query": 24, @@ -31,15 +35,19 @@ "min_document_length": 99, "average_document_length": 3145.4990895627475, "max_document_length": 24179, + "unique_docs": 41189, "min_query_length": 34, "average_query_length": 72.65, "max_query_length": 124, + "unique_queries": 40, "min_instruction_length": 121, "average_instruction_length": 358.925, "max_instruction_length": 759, + "unique_instructions": 40, "min_changed_instruction_length": 163, "average_changed_instruction_length": 415.325, "max_changed_instruction_length": 842, + "unique_changed_instructions": 40, "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.85, "max_average_relevant_docs_per_query": 22, @@ -55,15 +63,19 @@ "min_document_length": 75, "average_document_length": 2784.0813456746173, "max_document_length": 24061, + "unique_docs": 39326, "min_query_length": 26, "average_query_length": 77.5, "max_query_length": 136, + "unique_queries": 40, "min_instruction_length": 78, "average_instruction_length": 387.0, "max_instruction_length": 1009, + "unique_instructions": 40, "min_changed_instruction_length": 187, "average_changed_instruction_length": 458.0, "max_changed_instruction_length": 1083, + "unique_changed_instructions": 40, "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 9.775, "max_average_relevant_docs_per_query": 24, @@ -79,15 +91,19 @@ "min_document_length": 74, "average_document_length": 1082.0501215953307, "max_document_length": 23840, + "unique_docs": 41120, "min_query_length": 10, "average_query_length": 23.697674418604652, "max_query_length": 44, + "unique_queries": 43, "min_instruction_length": 37, "average_instruction_length": 110.09302325581395, "max_instruction_length": 209, + "unique_instructions": 43, "min_changed_instruction_length": 44, "average_changed_instruction_length": 122.81395348837209, "max_changed_instruction_length": 229, + "unique_changed_instructions": 43, "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.651162790697674, "max_average_relevant_docs_per_query": 24, diff --git a/mteb/descriptive_stats/STS/STS12.json b/mteb/descriptive_stats/STS/STS12.json index 0e8410dec..a7e11197a 100644 --- a/mteb/descriptive_stats/STS/STS12.json +++ b/mteb/descriptive_stats/STS/STS12.json @@ -5,9 +5,11 @@ "min_sentence1_length": 3, "average_sentence1_len": 63.78893178893179, "max_sentence1_length": 220, + "unique_sentence1": 2236, "min_sentence2_length": 7, "average_sentence2_len": 65.5926640926641, "max_sentence2_length": 204, + "unique_sentence2": 2797, "min_score": 0.0, "avg_score": 3.5060643500643507, "max_score": 5.0 diff --git a/mteb/descriptive_stats/STS/STS17.json b/mteb/descriptive_stats/STS/STS17.json index 131f35d40..912738035 100644 --- a/mteb/descriptive_stats/STS/STS17.json +++ b/mteb/descriptive_stats/STS/STS17.json @@ -5,9 +5,11 @@ "min_sentence1_length": 6, "average_sentence1_len": 38.14665170220726, "max_sentence1_length": 976, + "unique_sentence1": 4900, "min_sentence2_length": 6, "average_sentence2_len": 36.72502805836139, "max_sentence2_length": 1007, + "unique_sentence2": 4470, "min_score": 0.0, "avg_score": 2.3554804214989464, "max_score": 5.0, @@ -18,9 +20,11 @@ "min_sentence1_length": 6, "average_sentence1_len": 31.991918482080113, "max_sentence1_length": 976, + "unique_sentence1": 2650, "min_sentence2_length": 6, "average_sentence2_len": 32.44483485593816, "max_sentence2_length": 1007, + "unique_sentence2": 2720, "min_score": 0.0, "avg_score": 2.469359920356055, "max_score": 5.0 @@ -31,9 +35,11 @@ "min_sentence1_length": 11, "average_sentence1_len": 32.208, "max_sentence1_length": 99, + "unique_sentence1": 250, "min_sentence2_length": 9, "average_sentence2_len": 32.78, "max_sentence2_length": 83, + "unique_sentence2": 250, "min_score": 0.0, "avg_score": 2.216800000000001, "max_score": 5.0 @@ -44,9 +50,11 @@ "min_sentence1_length": 13, "average_sentence1_len": 42.36, "max_sentence1_length": 105, + "unique_sentence1": 250, "min_sentence2_length": 10, "average_sentence2_len": 32.696, "max_sentence2_length": 104, + "unique_sentence2": 250, "min_score": 0.0, "avg_score": 2.1423999999999994, "max_score": 5.0 @@ -57,9 +65,11 @@ "min_sentence1_length": 12, "average_sentence1_len": 43.952, "max_sentence1_length": 94, + "unique_sentence1": 250, "min_sentence2_length": 15, "average_sentence2_len": 44.756, "max_sentence2_length": 104, + "unique_sentence2": 250, "min_score": 0.0, "avg_score": 2.2776000000000014, "max_score": 5.0 @@ -70,9 +80,11 @@ "min_sentence1_length": 12, "average_sentence1_len": 43.952, "max_sentence1_length": 94, + "unique_sentence1": 250, "min_sentence2_length": 15, "average_sentence2_len": 42.724, "max_sentence2_length": 101, + "unique_sentence2": 250, "min_score": 0.0, "avg_score": 2.2776000000000014, "max_score": 5.0 @@ -83,9 +95,11 @@ "min_sentence1_length": 15, "average_sentence1_len": 41.916, "max_sentence1_length": 101, + "unique_sentence1": 250, "min_sentence2_length": 10, "average_sentence2_len": 41.6, "max_sentence2_length": 107, + "unique_sentence2": 250, "min_score": 0.0, "avg_score": 2.1335999999999986, "max_score": 5.0 @@ -96,9 +110,11 @@ "min_sentence1_length": 12, "average_sentence1_len": 50.84, "max_sentence1_length": 160, + "unique_sentence1": 250, "min_sentence2_length": 14, "average_sentence2_len": 42.024, "max_sentence2_length": 117, + "unique_sentence2": 250, "min_score": 0.0, "avg_score": 2.1464000000000003, "max_score": 5.0 @@ -109,9 +125,11 @@ "min_sentence1_length": 18, "average_sentence1_len": 49.836, "max_sentence1_length": 136, + "unique_sentence1": 250, "min_sentence2_length": 13, "average_sentence2_len": 51.224, "max_sentence2_length": 129, + "unique_sentence2": 250, "min_score": 0.0, "avg_score": 2.2312000000000007, "max_score": 5.0 @@ -122,9 +140,11 @@ "min_sentence1_length": 19, "average_sentence1_len": 49.624, "max_sentence1_length": 115, + "unique_sentence1": 250, "min_sentence2_length": 15, "average_sentence2_len": 42.724, "max_sentence2_length": 101, + "unique_sentence2": 250, "min_score": 0.0, "avg_score": 2.2776000000000014, "max_score": 5.0 @@ -135,9 +155,11 @@ "min_sentence1_length": 15, "average_sentence1_len": 50.028, "max_sentence1_length": 113, + "unique_sentence1": 250, "min_sentence2_length": 15, "average_sentence2_len": 42.724, "max_sentence2_length": 101, + "unique_sentence2": 250, "min_score": 0.0, "avg_score": 2.2776000000000014, "max_score": 5.0 @@ -148,9 +170,11 @@ "min_sentence1_length": 14, "average_sentence1_len": 46.816, "max_sentence1_length": 123, + "unique_sentence1": 250, "min_sentence2_length": 15, "average_sentence2_len": 42.724, "max_sentence2_length": 101, + "unique_sentence2": 250, "min_score": 0.0, "avg_score": 2.2776000000000014, "max_score": 5.0 diff --git a/mteb/descriptive_stats/Summarization/SummEval.json b/mteb/descriptive_stats/Summarization/SummEval.json index a705e47fb..4c2f133ab 100644 --- a/mteb/descriptive_stats/Summarization/SummEval.json +++ b/mteb/descriptive_stats/Summarization/SummEval.json @@ -5,12 +5,15 @@ "min_text_length": 626, "avg_text_length": 2100.35, "max_text_length": 3153, + "unique_texts": 100, "min_human_summaries_length": 11, "avg_human_summaries_length": 11.0, "max_human_summaries_length": 11, + "unique_human_summaries": 1100, "min_machine_summaries_length": 16, "avg_machine_summaries_length": 16.0, "max_machine_summaries_length": 16, + "unique_machine_summaries": 1548, "min_relevance": [ 1.0, 1.3333333333333333, diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index 2ac0096da..489b67ab4 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -52,10 +52,25 @@ class MockClassificationTask(AbsTaskClassification): "test": { "num_samples": 2, "number_of_characters": 52, + "num_texts_in_train": 1, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, - } + }, + "train": { + "num_samples": 2, + "number_of_characters": 53, + "num_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, } metadata = TaskMetadata( @@ -66,20 +81,22 @@ class MockClassificationTask(AbsTaskClassification): ) def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] + train_texts = ["This is a test sentence", "This is another train sentence"] + test_texts = ["This is a test sentence", "This is another test sentence"] + labels = [0, 1] self.dataset = DatasetDict( { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -93,26 +110,73 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "test": { "num_samples": 4, "number_of_characters": 104, + "num_texts_in_train": 1, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 2}, "1": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 52, + "num_texts_in_train": 1, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, "fra": { "num_samples": 2, "number_of_characters": 52, + "num_texts_in_train": 1, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, }, - } + }, + "train": { + "num_samples": 4, + "number_of_characters": 106, + "num_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 2}, "1": {"count": 2}}, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 2, + "number_of_characters": 53, + "num_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + "fra": { + "num_samples": 2, + "number_of_characters": 53, + "num_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + }, + }, } metadata = TaskMetadata( @@ -124,18 +188,19 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask metadata.eval_langs = multilingual_eval_langs def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] + train_texts = ["This is a test sentence", "This is another train sentence"] + test_texts = ["This is a test sentence", "This is another test sentence"] labels = [0, 1] data = { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -153,10 +218,17 @@ def load_data(self, **kwargs): class MockBitextMiningTask(AbsTaskBitextMining): expected_stats = { "test": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, } } @@ -190,22 +262,43 @@ def load_data(self, **kwargs): class MockMultilingualBitextMiningTask(AbsTaskBitextMining, MultilingualTask): expected_stats = { "test": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 4, "number_of_characters": 226, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "hf_subset_descriptive_stats": { "eng": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 2, "number_of_characters": 113, - }, - "fra": { + "unique_pairs": 2, + "min_sentence1_length": 23, "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + }, + "fra": { "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, }, }, } @@ -246,22 +339,43 @@ class MockMultilingualParallelBitextMiningTask(AbsTaskBitextMining, Multilingual parallel_subsets = True expected_stats = { "test": { - "average_sentence1_length": 28.25, - "average_sentence2_length": 28.25, "num_samples": 4, "number_of_characters": 226, + "unique_pairs": 4, + "min_sentence1_length": 23, + "average_sentence1_length": 28.25, + "max_sentence1_length": 37, + "unique_sentence1": 4, + "min_sentence2_length": 23, + "average_sentence2_length": 28.25, + "max_sentence2_length": 37, + "unique_sentence2": 4, "hf_subset_descriptive_stats": { "eng_Latn-fra_Latn": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, }, "fra_Latn-eng_Latn": { - "average_sentence1_length": 30.5, - "average_sentence2_length": 26.0, "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 24, + "average_sentence1_length": 30.5, + "max_sentence1_length": 37, + "unique_sentence1": 2, + "min_sentence2_length": 23, + "average_sentence2_length": 26.0, + "max_sentence2_length": 29, + "unique_sentence2": 2, }, }, } @@ -303,8 +417,13 @@ class MockClusteringTask(AbsTaskClustering): "test": { "num_samples": 1, "number_of_characters": 3, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 1, "average_labels_per_text": 3.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, } @@ -345,24 +464,39 @@ class MockMultilingualClusteringTask(AbsTaskClustering, MultilingualTask): "test": { "num_samples": 2, "number_of_characters": 6, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 2, "average_labels_per_text": 3.0, + "max_labels_per_text": 2, "unique_labels": 3, "labels": {"0": {"count": 2}, "1": {"count": 2}, "2": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 1, "number_of_characters": 3, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 1, "average_labels_per_text": 3.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, "fra": { "num_samples": 1, "number_of_characters": 3, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 1, "average_labels_per_text": 3.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, @@ -412,8 +546,12 @@ class MockClusteringFastTask(AbsTaskClusteringFast): "test": { "num_samples": 3, "number_of_characters": 81, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "min_labels_per_text": 1, "average_labels_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, } @@ -454,24 +592,36 @@ class MockMultilingualClusteringFastTask(AbsTaskClusteringFast, MultilingualTask "test": { "num_samples": 6, "number_of_characters": 162, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "min_labels_per_text": 2, "average_labels_per_text": 1.0, + "max_labels_per_text": 2, "unique_labels": 3, "labels": {"0": {"count": 2}, "1": {"count": 2}, "2": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 3, "number_of_characters": 81, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "min_labels_per_text": 1, "average_labels_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, "fra": { "num_samples": 3, "number_of_characters": 81, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "min_labels_per_text": 1, "average_labels_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, @@ -517,8 +667,14 @@ class MockPairClassificationTask(AbsTaskPairClassification): "test": { "num_samples": 2, "number_of_characters": 113, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 1}, "0": {"count": 1}}, } @@ -562,24 +718,42 @@ class MockMultilingualPairClassificationTask( "test": { "num_samples": 4, "number_of_characters": 226, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 2}, "0": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 113, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 1}, "0": {"count": 1}}, }, "fra": { "num_samples": 2, "number_of_characters": 113, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 1}, "0": {"count": 1}}, }, @@ -627,9 +801,17 @@ class MockSTSTask(AbsTaskSTS): "test": { "num_samples": 2, "number_of_characters": 113, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, } } @@ -674,23 +856,47 @@ class MockMultilingualSTSTask(AbsTaskSTS, MultilingualTask): "test": { "num_samples": 4, "number_of_characters": 226, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 113, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, }, "fra": { "num_samples": 2, "number_of_characters": 113, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, }, }, } @@ -742,10 +948,21 @@ class MockSummarizationTask(AbsTaskSummarization): "test": { "num_samples": 2, "number_of_characters": 60, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], } } @@ -795,26 +1012,59 @@ class MockMultilingualSummarizationTask(AbsTaskSummarization, MultilingualTask): "test": { "num_samples": 4, "number_of_characters": 120, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 60, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], }, "fra": { "num_samples": 2, "number_of_characters": 60, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], }, }, } @@ -872,9 +1122,18 @@ class MockRerankingTask(AbsTaskReranking): "number_of_characters": 172, "num_positive": 2, "num_negative": 2, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "min_query_length": 23, + "avg_query_length": 26.0, + "max_query_length": 29, + "unique_query": 2, + "min_positive_length": 27, + "avg_positive_length": 30.0, + "max_positive_length": 33, + "unique_positive": 2, + "min_negative_length": 27, + "avg_negative_length": 30.0, + "max_negative_length": 33, + "unique_negative": 2, } } @@ -917,27 +1176,54 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "number_of_characters": 344, "num_positive": 4, "num_negative": 4, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "min_query_length": 23, + "avg_query_length": 26.0, + "max_query_length": 29, + "unique_query": 2, + "min_positive_length": 27, + "avg_positive_length": 30.0, + "max_positive_length": 33, + "unique_positive": 2, + "min_negative_length": 27, + "avg_negative_length": 30.0, + "max_negative_length": 33, + "unique_negative": 2, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 172, "num_positive": 2, "num_negative": 2, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "min_query_length": 23, + "avg_query_length": 26.0, + "max_query_length": 29, + "unique_query": 2, + "min_positive_length": 27, + "avg_positive_length": 30.0, + "max_positive_length": 33, + "unique_positive": 2, + "min_negative_length": 27, + "avg_negative_length": 30.0, + "max_negative_length": 33, + "unique_negative": 2, }, "fra": { "num_samples": 2, "number_of_characters": 172, "num_positive": 2, "num_negative": 2, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "min_query_length": 23, + "avg_query_length": 26.0, + "max_query_length": 29, + "unique_query": 2, + "min_positive_length": 27, + "avg_positive_length": 30.0, + "max_positive_length": 33, + "unique_positive": 2, + "min_negative_length": 27, + "avg_negative_length": 30.0, + "max_negative_length": 33, + "unique_negative": 2, }, }, } @@ -982,13 +1268,22 @@ def load_data(self, **kwargs): class MockRetrievalTask(AbsTaskRetrieval): expected_stats = { "test": { + "number_of_characters": 112, "num_samples": 4, - "number_of_characters": 56.0, - "average_document_length": 15.0, - "average_query_length": 13.0, - "num_documents": 2, "num_queries": 2, - "average_relevant_docs_per_query": 1.0, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, } } @@ -1025,31 +1320,58 @@ def load_data(self, **kwargs): class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): expected_stats = { "test": { - "number_of_characters": 56.0, + "number_of_characters": 224, "num_samples": 8, "num_queries": 4, "num_documents": 4, - "average_document_length": 7.5, - "average_query_length": 6.5, - "average_relevant_docs_per_query": 1.0, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 4, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 4, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4, "hf_subset_descriptive_stats": { "eng": { - "number_of_characters": 56.0, + "number_of_characters": 112, "num_samples": 4, "num_queries": 2, "num_documents": 2, - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_relevant_docs_per_query": 1.0, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, }, "fra": { - "number_of_characters": 56.0, + "number_of_characters": 112, "num_samples": 4, "num_queries": 2, "num_documents": 2, - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_relevant_docs_per_query": 1.0, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, }, }, } @@ -1095,13 +1417,33 @@ def load_data(self, **kwargs): class MockMultilabelClassification(AbsTaskMultilabelClassification): expected_stats = { "test": { - "average_text_length": 26.0, + "num_samples": 6, "number_of_characters": 156, + "number_texts_in_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + "train": { "num_samples": 6, + "number_of_characters": 159, + "number_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 6}, "1": {"count": 6}}, - } + }, } metadata = TaskMetadata( @@ -1112,20 +1454,21 @@ class MockMultilabelClassification(AbsTaskMultilabelClassification): ) def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] * 3 + train_texts = ["This is a test sentence", "This is another train sentence"] * 3 + test_texts = ["This is a test sentence", "This is another test sentence"] * 3 labels = [[0, 1], [1, 0]] * 3 self.dataset = DatasetDict( { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -1139,31 +1482,93 @@ class MockMultilingualMultilabelClassification( ): expected_stats = { "test": { - "average_text_length": 26.0, + "num_samples": 12, "number_of_characters": 312, + "number_texts_in_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, - "num_samples": 12, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 12}, "1": {"count": 12}}, "hf_subset_descriptive_stats": { "eng": { - "average_text_length": 26.0, + "num_samples": 6, "number_of_characters": 156, + "number_texts_in_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, - "num_samples": 6, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 6}, "1": {"count": 6}}, }, "fra": { - "average_text_length": 26.0, + "num_samples": 6, "number_of_characters": 156, + "number_texts_in_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + }, + }, + "train": { + "num_samples": 12, + "number_of_characters": 318, + "number_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 12}, "1": {"count": 12}}, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 6, + "number_of_characters": 159, + "number_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + "fra": { "num_samples": 6, + "number_of_characters": 159, + "number_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 6}, "1": {"count": 6}}, }, }, - } + }, } metadata = TaskMetadata( @@ -1175,19 +1580,20 @@ class MockMultilingualMultilabelClassification( metadata.eval_langs = multilingual_eval_langs def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] * 3 + train_texts = ["This is a test sentence", "This is another train sentence"] * 3 + test_texts = ["This is a test sentence", "This is another test sentence"] * 3 labels = [[0, 1], [1, 0]] * 3 data = { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -1206,16 +1612,32 @@ class MockInstructionRetrival(AbsTaskInstructionRetrieval): do_length_ablation = True expected_stats = { "test": { - "average_changed_instruction_length": 37.0, + "num_samples": 4, + "num_docs": 2, + "num_queries": 2, + "number_of_characters": 244, + "min_document_length": 27, "average_document_length": 30.0, - "average_instruction_length": 29.0, + "max_document_length": 33, + "unique_docs": 2, + "min_query_length": 23, "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "min_instruction_length": 26, + "average_instruction_length": 29.0, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_changed_instruction_length": 34, + "average_changed_instruction_length": 37.0, + "max_changed_instruction_length": 40, + "unique_changed_instructions": 2, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_average_relevant_docs_per_query": 1, + "min_average_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, - "num_docs": 2, - "num_queries": 2, - "num_samples": 4, - "number_of_characters": 244, + "max_average_top_ranked_per_query": 2, } } @@ -1297,36 +1719,84 @@ class MockMultilingualInstructionRetrival( "num_docs": 4, "num_queries": 4, "number_of_characters": 488, + "min_document_length": 27, "average_document_length": 30.0, + "max_document_length": 33, + "unique_docs": 2, + "min_query_length": 23, "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "min_instruction_length": 26, "average_instruction_length": 29.0, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_changed_instruction_length": 34, "average_changed_instruction_length": 37.0, + "max_changed_instruction_length": 40, + "unique_changed_instructions": 2, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_average_relevant_docs_per_query": 1, + "min_average_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, + "max_average_top_ranked_per_query": 2, "hf_subset_descriptive_stats": { "eng": { "num_samples": 4, "num_docs": 2, "num_queries": 2, "number_of_characters": 244, + "min_document_length": 27, "average_document_length": 30.0, + "max_document_length": 33, + "unique_docs": 2, + "min_query_length": 23, "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "min_instruction_length": 26, "average_instruction_length": 29.0, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_changed_instruction_length": 34, "average_changed_instruction_length": 37.0, + "max_changed_instruction_length": 40, + "unique_changed_instructions": 2, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_average_relevant_docs_per_query": 1, + "min_average_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, + "max_average_top_ranked_per_query": 2, }, "fra": { "num_samples": 4, "num_docs": 2, "num_queries": 2, "number_of_characters": 244, + "min_document_length": 27, "average_document_length": 30.0, + "max_document_length": 33, + "unique_docs": 2, + "min_query_length": 23, "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "min_instruction_length": 26, "average_instruction_length": 29.0, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_changed_instruction_length": 34, "average_changed_instruction_length": 37.0, + "max_changed_instruction_length": 40, + "unique_changed_instructions": 2, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_average_relevant_docs_per_query": 1, + "min_average_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, + "max_average_top_ranked_per_query": 2, }, }, } diff --git a/tests/test_tasks/test_metadata.py b/tests/test_tasks/test_metadata.py index 1e7e1b24d..3d206da5c 100644 --- a/tests/test_tasks/test_metadata.py +++ b/tests/test_tasks/test_metadata.py @@ -8,9 +8,9 @@ @pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID) def test_descriptive_stats(task): result_stat = task.calculate_metadata_metrics() + # remove descriptive task file + task.metadata.descriptive_stat_path.unlink() task_stat = task.expected_stats for key, value in result_stat.items(): assert key in task_stat assert value == task_stat[key] - # remove descriptive task file - task.metadata.descriptive_stat_path.unlink()