diff --git a/_modules/mltb2/files.html b/_modules/mltb2/files.html index 4eb7909..d73871d 100644 --- a/_modules/mltb2/files.html +++ b/_modules/mltb2/files.html @@ -230,10 +230,22 @@

Source code for mltb2.files

             (self._result_dir_path / f"{uuid}.lock").touch()
             self._own_lock_uuids.add(uuid)
 
-
[docs] def read_batch(self) -> Sequence[Dict[str, Any]]: - """Read the next batch of data.""" + def _get_remaining_data(self) -> List[Dict[str, Any]]: locked_or_done_uuids: Set[str] = self._get_locked_or_done_uuids() remaining_data = [d for d in self.data if d[self.uuid_name] not in locked_or_done_uuids] + return remaining_data + +
[docs] def read_batch(self) -> Sequence[Dict[str, Any]]: + """Read the next batch of data.""" + remaining_data: List[Dict[str, Any]] = self._get_remaining_data() + + # if we think we are done, delete all lock files and check again + # this is because lock files might be orphaned + if len(remaining_data) == 0: + for lock_file_path in self._result_dir_path.glob("*.lock"): + lock_file_path.unlink(missing_ok=True) + remaining_data = self._get_remaining_data() + random.shuffle(remaining_data) next_batch_size = min(self.batch_size, len(remaining_data)) next_batch = remaining_data[:next_batch_size]