From 9fe1cf46a410bf2deb66ddd31074ada42ef0f025 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20J=2E=20D=2E=20Johnsen?=
 <hakon.j.d.johnsen@ntnu.no>
Date: Tue, 9 Jun 2020 20:15:33 +0200
Subject: [PATCH] Detect missing entries during synchronization

If the DPT-RP1 has more than 1300 items, some of the items
will be missing from the "/documents2?entry_type=all" endpoint,
which is used during synchronization.
Now we detect this and go back to the old recursive folder traversal
if that happends.
---
 dptrp1/dptrp1.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/dptrp1/dptrp1.py b/dptrp1/dptrp1.py
index 9913b71..388657e 100755
--- a/dptrp1/dptrp1.py
+++ b/dptrp1/dptrp1.py
@@ -387,13 +387,39 @@ def traverse_folder(self, remote_path, fields=[]):
             field_query = "&fields=" + ",".join(fields)
         else:
             field_query = ""
-        all_entries = self._get_endpoint(
+        entry_data = self._get_endpoint(
             f"/documents2?entry_type=all" + field_query
-        ).json()["entry_list"]
+        ).json()
+
+        if entry_data["count"] != len(entry_data["entry_list"]):
+            # The device seems to not want to return more than 1300 items in the entry_list, meaning that we will miss entries if the device
+            # has more files/folders than this. Luckly, it can easily be detected by comparing the number of entries with the count.
+            # Perhaps there is some way to request the remaining entries from the same endpoint through some form of pagination,
+            # but we do not know how. Let's fall back to the slower recursive traversal
+            print("Warning: Fast folder traversal did not work. Falling back to slower, recursive folder traversal.")
+            return self.traverse_folder_recursively(remote_path)
+        
+        all_entries = entry_data["entry_list"]
 
         return list(
             filter(lambda e: e["entry_path"].startswith(remote_path), all_entries)
         )
+    
+    def traverse_folder_recursively(self, remote_path):
+        # This is the old recursive implementation of traverse_folder.
+        # It is slower because the main overhead when communicating with the DPT-RP1 is the request latency,
+        # and this recursive implementation makes one request per folder. However, the faster implementation
+        # above fails when there are more than 1300 items, in which case we fall back to this older implementation
+        def traverse(obj):
+            if obj['entry_type'] == 'document':
+                return [obj]
+            else:
+                children = self \
+                  ._get_endpoint("/folders/{remote_id}/entries2".format(remote_id = obj['entry_id'])) \
+                  .json()['entry_list']
+                return [obj] + functools.reduce(lambda acc, c: traverse(c) + acc, children[::-1], [])
+        return traverse(self._resolve_object_by_path(remote_path))
+
 
     def list_document_info(self, remote_path):
         remote_info = self._resolve_object_by_path(remote_path)