From 9874bb723f4a80f1a0515f9a51c787197e6eb778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stig-Arne=20Gr=C3=B6nroos?= Date: Mon, 11 Sep 2023 13:59:07 +0300 Subject: [PATCH] Avoid dying if a rare bug occurs in the look ahead bucketing StopIteration when trying to pick a bucket in a smart way. Doing something stupid instead. Please check me. --- mammoth/inputters/dataloader.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/mammoth/inputters/dataloader.py b/mammoth/inputters/dataloader.py index 7e26987b..72dd0731 100644 --- a/mammoth/inputters/dataloader.py +++ b/mammoth/inputters/dataloader.py @@ -139,19 +139,30 @@ def __iter__(self): if accum: yield self.collate_fn(accum) break - if not any(self._lens[current_bucket_idx:]): - # this was the largest bucket, so we'll need to pick the next smallest instead - smallest_bucket_idx = next( - bucket_idx - for bucket_idx in range(smallest_bucket_idx, -1, -1) - if self._lens[bucket_idx] != 0 + try: + if not any(self._lens[current_bucket_idx:]): + # this was the largest bucket, so we'll need to pick the next smallest instead + smallest_bucket_idx = next( + bucket_idx + for bucket_idx in range(smallest_bucket_idx, -1, -1) + if self._lens[bucket_idx] != 0 + ) + current_bucket_idx = smallest_bucket_idx + else: + # there was a larger bucket, shift the index by one + current_bucket_idx = next( + bucket_idx + for bucket_idx in range(current_bucket_idx, len(self._buckets) + 1) + if self._lens[bucket_idx] != 0 + ) + except StopIteration: + logger.warning( + 'StopIteration when trying to pick a bucket in a smart way. ' + 'Doing something stupid instead. Please check me.' ) - current_bucket_idx = smallest_bucket_idx - else: - # there was a larger bucket, shift the index by one current_bucket_idx = next( bucket_idx - for bucket_idx in range(current_bucket_idx, len(self._buckets) + 1) + for bucket_idx in range(len(self._lens)) if self._lens[bucket_idx] != 0 ) _ = self._choose_and_prepare_bucket(bucket_idx=current_bucket_idx)