forked from cherti/remarkable-cli-tooling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresync.py
executable file
·978 lines (781 loc) · 31.1 KB
/
resync.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
#!/usr/bin/env python3
import sys
import os
import time
import datetime
import json
import shutil
import argparse
import uuid
import subprocess
import tempfile
import pathlib
import urllib.request
import re
import io
import tqdm
import ipaddress
default_prepdir = tempfile.mkdtemp(prefix="resync")
ssh_socketfile = '/tmp/remarkable-push.socket'
xochitl_dir = '~/.local/share/remarkable/xochitl'
parser = argparse.ArgumentParser(description='Push and pull files to and from your reMarkable',
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-n', '--dry-run', dest='dryrun', action='store_true', default=False,
help="Don't actually copy files, just show what would be copied")
parser.add_argument('-o', '--output', action='store', default=None, dest='destination',
help=('Destination for copied files.'
'\n In the push mode, it specifies a folder on the device and defaults to the name of the source directory.'
'\n In the pull mode, it specifies a directory on the computer and defaults to the current directory.'))
parser.add_argument('-v', dest='verbosity', action='count', default=0,
help='verbosity level')
parser.add_argument('--if-exists',
choices=["duplicate","overwrite","skip","doconly"],
default="skip",
help=("Specify the behavior when the destination file exists."
"\n duplicate: Create a duplicate file in the same directory."
"\n overwrite: Overwrite existing files and the metadata."
"\n doconly: Overwrite existing files but not the metadata."
"\n skip: Skip the file. (default)"))
parser.add_argument('--if-does-not-exist',
choices=["delete","skip"],
default="skip",
help=("Specify the behavior when the source file does not exist."
"\n delete: Remove the destination file."
"\n skip: Skip the file. (default)"))
parser.add_argument('-e', '--exclude', dest='exclude_patterns', action='append', default=[],
help='exclude a pattern from transfer (must be Python-regex)')
parser.add_argument('-r', '--remote-address', action='store', default='10.11.99.1', dest='host', metavar='<IP or hostname>',
help='remote address of the reMarkable')
parser.add_argument('--transfer-dir', metavar='<directory name>', dest='prepdir', type=str, default=default_prepdir,
help='custom directory to render files to-be-upload')
parser.add_argument('--debug', dest='debug', action='store_true', default=False,
help="Render documents, but don't copy to remarkable.")
parser.add_argument('-y', '--yes', dest='yes', action='store_true', default=False,
help="Do not ask for deletion.")
parser.add_argument('mode', type=str, choices=["push","pull","backup","+","-","clean"],
help=("Specifies the transfer mode."
"\n push, +: push documents from this machine to the reMarkable"
"\n pull, -: pull documents from the reMarkable to this machine"
"\n backup: pull all files from the remarkable to this machine (excludes still apply)"
"\n clean: performs a number of cleaning operations."
"\n * clear the files in Trash"
"\n * remove the orphaned files"
"\n * remove empty directories"
"\n * detect/select/remove duplicates"
"\n We ask before performing each step unless -y|--yes."))
parser.add_argument('documents', metavar='documents', type=str, nargs='*',
help='Documents and folders to be pushed to the reMarkable')
args = parser.parse_args()
if args.mode == '+':
args.mode = 'push'
elif args.mode == '-':
args.mode = 'pull'
try:
# verify host is a valid IP address string
_ = ipaddress.ip_address(args.host)
print(f"Assuming {args.host} is an ip address")
args.host = "root@"+args.host
except ValueError as e:
print(f"Assuming {args.host} is a host in SSH config")
pass
ssh_command = " ".join(
["ssh",
"-o PubkeyAcceptedKeyTypes=+ssh-rsa",
"-o HostKeyAlgorithms=+ssh-rsa",
"-o UserKnownHostsFile=/dev/null",
"-o StrictHostKeyChecking=no",
"-o ConnectTimeout=1",
f"-S {ssh_socketfile}",])
def ssh(arg,dry=False,status=False):
if args.verbosity >= 1:
print(f'{ssh_command} {args.host} \'{arg}\'')
if not dry:
if status:
return subprocess.getstatusoutput(f'{ssh_command} {args.host} \'{arg}\'')
else:
return subprocess.getoutput(f'{ssh_command} {args.host} \'{arg}\'')
class FileCollision(Exception):
pass
class ShouldNeverHappenError(Exception):
pass
#########################
#
# Helper functions
#
#########################
def logmsg(lvl, msg):
if lvl <= args.verbosity:
print(msg)
def gen_did():
"""
generates a uuid according to necessities (and marks it if desired for debugging and such)
"""
did = str(uuid.uuid4())
# did = 'f'*8 + did[8:] # for debugging purposes
return did
def construct_metadata(filetype, name, parent_id=''):
"""
constructs a metadata-json for a specified document
"""
meta={
"visibleName": name,
"parent": parent_id,
"lastModified": str(int(time.time()*1000)),
"metadatamodified": False,
"modified": False,
"pinned": False,
"synced": False,
"type": "CollectionType",
"version": 0,
"deleted": False,
}
if filetype in ['pdf', 'epub']:
# changed from default
meta["type"] = "DocumentType"
# only for pdfs & epubs
meta["lastOpened"] = meta["lastModified"]
meta["lastOpenedPage"] = 0
return meta
# from https://stackoverflow.com/questions/6886283/how-i-can-i-lazily-read-multiple-json-values-from-a-file-stream-in-python
def stream_read_json(f):
start_pos = 0
while True:
try:
obj = json.load(f)
yield obj
return
except json.JSONDecodeError as e:
f.seek(start_pos)
json_str = f.read(e.pos)
if json_str == '':
return
obj = json.loads(json_str)
start_pos += e.pos
yield obj
metadata_by_uuid = {}
metadata_by_name = {}
metadata_by_parent = {}
metadata_by_name_and_parent = {}
def retrieve_metadata():
"""
retrieves all metadata from the device
"""
print("retrieving metadata...")
paths = ssh(f'ls -1 {xochitl_dir}/*.metadata').split("\n")
with io.StringIO(ssh(f'cat {xochitl_dir}/*.metadata')) as f:
for path, metadata in tqdm.tqdm(zip(paths, stream_read_json(f)), total=len(paths)):
path = pathlib.Path(path)
if ('deleted' in metadata and metadata['deleted']) or \
('parent' in metadata and metadata['parent'] == 'trash'):
continue
# metadata["uuid"] = path.stem
uuid = path.stem
metadata_by_uuid[uuid] = metadata
if metadata["visibleName"] not in metadata_by_name:
metadata_by_name[metadata["visibleName"]] = dict()
metadata_by_name[metadata["visibleName"]][uuid] = metadata
if metadata["parent"] not in metadata_by_parent:
metadata_by_parent[metadata["parent"]] = dict()
metadata_by_parent[metadata["parent"]][uuid] = metadata
if (metadata["visibleName"], metadata["parent"]) in metadata_by_name_and_parent:
raise FileCollision(f'Same file name "{metadata["visibleName"]}" under the same parent, not supported! Remove either file! {fullpath(metadata)}')
metadata_by_name_and_parent[(metadata["visibleName"], metadata["parent"])] = (uuid, metadata)
pass
def get_metadata_by_uuid(u):
"""
retrieves metadata for a given document identified by its uuid
"""
if u in metadata_by_uuid:
return metadata_by_uuid[u]
else:
return None
def get_metadata_by_name(name):
"""
retrieves metadata for all given documents that have the given name set as visibleName
"""
if name in metadata_by_name:
return metadata_by_name[name]
else:
return None
def get_metadata_by_parent(parent):
"""
retrieves metadata for all given documents that have the given parent
"""
if parent in metadata_by_parent:
return metadata_by_parent[parent]
else:
return {}
def get_metadata_by_name_and_parent(name, parent):
"""
retrieves metadata for all given documents that have the given parent
"""
if (name, parent) in metadata_by_name_and_parent:
return metadata_by_name_and_parent[(name, parent)]
else:
return None
def remove_uuid(u):
"""note --- not a complete implementation. does not remove from _name and _name_and_parent."""
metadata = metadata_by_uuid[u]
del metadata_by_uuid[u]
if ("parent" in metadata) and (metadata["parent"] != ""):
siblings = metadata_by_parent[metadata["parent"]]
del siblings[u]
if not metadata_by_parent[metadata["parent"]]:
del metadata_by_parent[metadata["parent"]]
def curb_tree(node, excludelist):
"""
removes nodes from a tree based on a list of exclude patterns;
returns True if the root node is removed, None otherwise as the
tree is curbed inplace
"""
for exc in excludelist:
if re.match(exc, node.get_full_path()) is not None:
logmsg(2, "curbing "+node.get_full_path())
return True
uncurbed_children = []
for ch in node.children:
if not curb_tree(ch, excludelist):
uncurbed_children.append(ch)
node.children = uncurbed_children
return False
def ask(msg):
if args.yes:
return True
else:
return input(msg+" [Enter,y,Y / n]") in ['', 'y', 'Y']
def fullpath(metadata):
if ("parent" not in metadata) or (metadata["parent"] == ""):
return "/" + metadata["visibleName"]
else:
parent_uuid = metadata["parent"]
parent = metadata_by_uuid[parent_uuid]
return fullpath(parent) + "/" + metadata["visibleName"]
#################################
#
# Document tree abstraction
#
#################################
class Node:
def __init__(self, name, parent=None):
self.name = name
self.parent = parent
self.children = []
self.gets_modified = False
# now retrieve the document ID for this document if it already exists
if parent is None:
metadata = get_metadata_by_name_and_parent(self.name, "")
else:
metadata = get_metadata_by_name_and_parent(self.name, parent.id)
if metadata:
uuid, metadata = metadata
self.id = uuid
self.exists = True
else:
self.id = None
self.exists = False
def __repr__(self):
return self.get_full_path()
def add_child(self, node):
"""
add a child to this Node and make sure it has a parent set
"""
if node.parent is None:
raise ShouldNeverHappenError("Child was added without having a parent set.")
self.children.append(node)
def get_full_path(self):
if self.parent is None:
return self.name
else:
return self.parent.get_full_path() + '/' + self.name
def render_common(self, prepdir):
"""
renders all files that are shared between the different DocumentTypes
"""
logmsg(1, "preparing for upload: " + self.get_full_path())
if self.id is None:
self.id = gen_did()
with open(f'{prepdir}/{self.id}.metadata', 'w') as f:
if self.parent:
metadata = construct_metadata(self.filetype, self.name, parent_id=self.parent.id)
else:
metadata = construct_metadata(self.filetype, self.name)
json.dump(metadata, f, indent=4)
with open(f'{prepdir}/{self.id}.content', 'w') as f:
json.dump({}, f, indent=4)
def render(self, prepdir):
"""
This renders the given note, including DocumentType specifics;
needs to be reimplemented by the subclasses
"""
raise Exception("Rendering not implemented")
def build(self):
"""
This creates a document tree for all nodes that are direct and indirect
descendants of this node.
"""
raise Exception("Not implemented")
def download(self, targetdir=pathlib.Path.cwd()):
"""
retrieve document node from the remarkable to local system
"""
raise Exception("Not implemented")
class Document(Node):
def __init__(self, document, parent=None):
self.doc = pathlib.Path(document)
self.doctype = 'DocumentType'
self.filetype = self.doc.suffix[1:] if self.doc.suffix.startswith('.') else self.doc.suffix
super().__init__(self.doc.name, parent=parent)
def render(self, prepdir):
"""
renders an actual DocumentType tree node
"""
if not self.exists:
self.render_common(prepdir)
os.makedirs(f'{prepdir}/{self.id}')
os.makedirs(f'{prepdir}/{self.id}.thumbnails')
shutil.copy(self.doc, f'{prepdir}/{self.id}.{self.filetype}')
def build(self):
"""
This creates a document tree for all nodes that are direct and indirect
descendants of this node.
"""
# documents don't have children, this one's easy
return
def download(self, targetdir=pathlib.Path.cwd()):
"""
retrieve document node from the remarkable to local system
"""
if args.dryrun:
print("downloading document to", targetdir/self.name)
else:
logmsg(1, "retrieving " + self.get_full_path())
os.chdir(targetdir)
# documents we need to actually download
filename = self.name if self.name.lower().endswith('.pdf') else f'{self.name}.pdf'
if os.path.exists(filename):
if if_exists == "skip":
logmsg(0, f"File {filename} already exists, skipping")
elif if_exists == "overwrite":
try:
resp = urllib.request.urlopen(f'http://{args.host}/download/{self.id}/placeholder')
with open(filename, 'wb') as f:
f.write(resp.read())
except urllib.error.URLError as e:
print(f"{e.reason}: Is the web interface enabled? (Settings > Storage > USB web interface)")
sys.exit(2)
else:
raise Exception("huh?")
pass
class Folder(Node):
def __init__(self, name, parent=None):
self.doctype = 'CollectionType'
self.filetype = 'folder'
super().__init__(name, parent=parent)
def render(self, prepdir):
"""
renders a folder tree node
"""
if not self.exists:
self.render_common(prepdir)
for ch in self.children:
ch.render(prepdir)
def build(self):
"""
This creates a document tree for all nodes that are direct and indirect
descendants of this node.
"""
for uuid, metadata in get_metadata_by_parent(self.id).items():
if metadata['type'] == "CollectionType":
ch = Folder(metadata['visibleName'], parent=self)
else:
name = metadata['visibleName']
if not name.endswith('.pdf'):
name += '.pdf'
ch = Document(name, parent=self)
ch.id = uuid
self.add_child(ch)
ch.build()
def download(self, targetdir=pathlib.Path.cwd()):
"""
retrieve document node from the remarkable to local system
"""
if args.dryrun:
# folders we simply create ourselves
print("creating directory", targetdir/self.name)
for ch in self.children:
ch.download(targetdir/self.name)
else:
logmsg(1, "retrieving " + self.get_full_path())
os.chdir(targetdir)
# folders we simply create ourselves
os.makedirs(self.name, exist_ok=True)
for ch in self.children:
ch.download(targetdir/self.name)
def get_toplevel_files():
"""
get a list of all documents in the toplevel My files drawer
"""
toplevel_files = []
for u, md in get_metadata_by_parent(""):
toplevel_files.append(md['visibleName'])
return toplevel_files
###############################
#
# actual application logic
#
###############################
try:
from termcolor import colored
except ImportError:
colored = lambda s, c: s
size = shutil.get_terminal_size()
columns = size.columns
lines = size.lines
# just print a filesystem tree for the remarkable representation of what we are going to create
def print_tree(node, padding):
"""
prints a filesystem representation of the constructed document tree,
including a note if the according node already exists on the remarkable or not
"""
if node.gets_modified:
note = " | !!! gets modified !!!"
notelen = len(note)
note = colored(note, 'red')
elif node.exists:
note = " | exists already"
notelen = len(note)
note = colored(note, 'green')
else:
note = " | upload"
notelen = len(note)
line = padding + node.name
if len(line) > columns-notelen:
line = line[:columns-notelen-3] + "..."
line = line.ljust(columns-notelen)
print(line+note)
for ch in node.children:
print_tree(ch, padding+" ")
def construct_node_tree_from_disk(basepath, parent=None):
"""
this recursively constructs the document tree based on the top-level
document/folder data structure on disk that we put in initially
"""
if args.verbosity >= 1:
print(f"scanning {basepath}")
path = pathlib.Path(basepath)
if path.is_dir():
node = Folder(path.name, parent=parent)
for f in os.listdir(path):
child = construct_node_tree_from_disk(path/f, parent=node)
if child is not None:
node.add_child(child)
if not node.children:
print(f"empty directory, ignored: {path}")
return None
else:
return node
elif path.is_file() and path.suffix.lower() in ['.pdf', '.epub']:
node = Document(path, parent=parent)
if node.exists:
if args.if_exists == "skip":
pass
elif args.if_exists == "overwrite":
# ok, we want to overwrite a document. We need to pretend it's not there so it gets rendered, so let's
# lie to our parser here, claiming there is nothing
node.exists = False
node.gets_modified = True # and make a note to properly mark it in case of a dry run
elif args.if_exists == "doconly":
# ok, we want to overwrite a document. We need to pretend it's not there so it gets rendered, so let's
# lie to our parser here, claiming there is nothing
node.exists = False
node.gets_modified = True # and make a note to properly mark it in case of a dry run
# if we only want to overwrite the document file itself, but keep everything else,
# we simply switch out the render function of this node to a simple document copy
# might mess with xochitl's thumbnail-generation and other things, but overall seems to be fine
node.render = lambda self, prepdir: shutil.copy(self.doc, f'{prepdir}/{self.id}.{self.filetype}')
elif args.if_exists == "duplicate":
# if we don't skip existing files, this file gets a new document ID
# and becomes a new file next to the existing one
node.id = gen_did()
node.exists = False
else:
raise Exception("huh?")
return node
else:
print(f"unsupported file type, ignored: {path}")
return None
def push_to_remarkable():
"""
push a list of documents to the reMarkable
documents: list of documents
destination: location on the device
"""
if args.destination:
# first, assemble the given output directory (-o) where everything shall be sorted into
# into our document tree representation
# then add the actual folders/documents to the tree at the anchor point
folders = args.destination.split('/')
root = anchor = Folder(folders[0])
for folder in folders[1:]:
ch = Folder(folder, parent=anchor)
anchor.add_child(ch)
anchor = ch
for doc in args.documents:
node = construct_node_tree_from_disk(doc, parent=anchor)
if node is not None:
anchor.add_child(node)
# make it into a 1-element list to streamline code further down
root = [root]
else:
root = []
for doc in args.documents:
node = construct_node_tree_from_disk(doc)
if node is not None:
root.append(node)
# apply excludes
curbed_roots = []
for r in root:
if not curb_tree(r, args.exclude_patterns):
curbed_roots.append(r)
root = curbed_roots
# just print out the assembled document tree with appropriate actions
for r in root:
print_tree(r, "")
print()
try:
if not args.dryrun:
print(f"preparing the files to copy")
for r in root:
r.render(args.prepdir)
if args.debug:
print(f' --> Payload data can be found in {args.prepdir}')
return
command = f'rsync -a --info=progress2 -e "{ssh_command}" '
if args.dryrun:
command += " -n "
if args.if_does_not_exist == "delete":
command += " --delete "
command += f' {args.prepdir}/ {args.host}:{xochitl_dir}/ ' # note: the last / is important
print(f"running: {command}")
subprocess.run(command, shell=True, check=True)
if not args.dryrun:
ssh(f'systemctl restart xochitl')
finally:
if args.prepdir == default_prepdir and not args.debug: # aka we created it
shutil.rmtree(args.prepdir)
def pull_from_remarkable():
"""
pull documents from the remarkable to the local system
documents: list of document paths on the remarkable to pull from
"""
assert args.if_exists in ["skip", "overwrite"]
if args.destination is None:
destination_directory = pathlib.Path.cwd()
else:
destination_directory = pathlib.Path(args.destination).absolute()
if not destination_directory.exists():
print("Output directory non-existing, exiting.", file=sys.stderr)
anchors = []
for doc in args.documents:
*parents, target = doc.split('/')
local_anchor = None
if parents:
local_anchor = Folder(parents[0], parent=None)
for par in parents[1:]:
new_node = Folder(par, parent=local_anchor)
local_anchor.add_child(new_node)
local_anchor = new_node
metadata = get_metadata_by_name(target)
if metadata is not None:
if metadata['type'] == 'DocumentType':
new_node = Document(target, parent=local_anchor)
else:
new_node = Folder(target, parent=local_anchor)
anchors.append(new_node)
else:
print(f"Cannot find {doc}, skipping")
for a in anchors:
a.build()
if not curb_tree(a, args.exclude_patterns):
a.download(targetdir=destination_directory)
def cleanup_deleted():
print("removing trash files")
deleted_uuids = []
limit = 10
for u, metadata in tqdm.tqdm(metadata_by_uuid.items()):
if metadata['deleted']:
deleted_uuids.append(u)
if len(deleted_uuids) == 0:
print('No deleted files found.')
return False
else:
if ask(f'Clean up {len(deleted_uuids)} deleted files?'):
ssh(f"rm -r {xochitl_dir}/{{{','.join(deleted_uuids)}}}*", dry=args.dryrun)
return True
else:
return False
def cleanup_orphaned():
print("removing files without metadata")
files = ssh(f"ls -1 {xochitl_dir} | while read f ; do stem=${{f%%.*}} ; if ! [ -e {xochitl_dir}/$stem.metadata ] ; then echo $f ; fi ; done")
l = len(files.split("\n"))
if l == 1:
print('No orphan files found.')
elif args.dryrun:
if args.verbosity >= 1:
print(files)
else:
if args.verbosity >= 1:
print(files)
if ask(f'Clean up {l-1} orphaned files?'):
ssh(f"ls -1 {xochitl_dir} | while read f ; do stem=${{f%%.*}} ; if ! [ -e {xochitl_dir}/$stem.metadata ] ; then rm {xochitl_dir}/$f ; fi ; done")
def cleanup_duplicates():
"""detect, select, remove duplicates. If there are notes, merge them."""
print("computing md5sum of each file on the reMarkable device... it takes some time in the first run.")
results = ssh((f"for f in {xochitl_dir}/*.pdf ; do "
"if [ ! -e $f.md5sum ] ; then "
"md5sum $f > $f.md5sum ;"
"fi ;"
"done ;"
f"cat {xochitl_dir}/*.md5sum")).split("\n")
database = dict()
duplicates = set()
for line in results:
try:
md5, filename = line.split()
except Exception as e:
print(line)
raise e
u = os.path.basename(os.path.splitext(filename)[0])
if md5 not in database:
database[md5] = []
database[md5].append(u)
if len(database[md5])>=2:
duplicates.add(md5)
deleted_uuids = []
for j, md5 in enumerate(duplicates):
print(f"({j:3d}/{len(duplicates)}) found {len(database[md5])} duplicates for md5sum {md5}:")
try:
tmp = []
for u in database[md5]:
metadata = get_metadata_by_uuid(u)
if metadata is None:
print(f"weird, the metadata for {u} does not exist, skipping (in: {database[md5]})")
continue
lastmodified = metadata["lastModified"]
try:
lastmodified = int(lastmodified)
except ValueError as e:
print(f"error while reading the last modified date of file {fullpath(metadata)}")
raise e
tmp.append((lastmodified, u, metadata))
tmp = sorted(tmp,reverse=True)
for i, (lastmodified, u, metadata) in enumerate(tmp):
lastmodified = datetime.datetime.fromtimestamp(lastmodified//1000)
if i == 0:
prefix="(* newest)"
else:
prefix=" "
print(f"{prefix} {i}, uuid {u}, modified {lastmodified}, {fullpath(metadata)}")
except KeyError:
print("this should not happen...")
continue
while True:
if len(tmp) == 1:
print("Due to the anomaly, there is only one candidate which I keep.")
i = 0
break
s = input("Hit ENTER for default(*), select a number, or hit n to skip this file, or hit N to stop: ")
if s == "":
i = 0
break
elif s == "n":
i = -1
break
elif s == "N":
i = -2
break
else:
try:
i = int(s)
except ValueError:
print(f"Input parsing error. ('{s}') Try again")
continue
if 0 <= i < len(tmp):
break
else:
print(f"enter a number from 0 to {len(tmp)-1}.")
continue
if i == -1:
print(f"skipping this file.")
continue
if i == -2:
print(f"cleanup stopped.")
break
_, keep, _ = tmp[i]
for _, u, _ in tmp:
if u == keep:
continue
remove_uuid(u)
ssh(f"rm -rv {xochitl_dir}/{u}*", dry=args.dryrun)
deleted_uuids.append(u)
print(f'Removed {len(tmp)-1} duplicates.')
print(f'Removed {len(deleted_uuids)} duplicates in total.')
return len(deleted_uuids) > 0
def cleanup_emptydir():
"""remove empty directory"""
deleted_uuids = []
empty_found = True
iteration = 1
while empty_found:
print(f"iteration {iteration}")
iteration += 1
empty_found = False
_deleted_uuids = []
for u, metadata in metadata_by_uuid.items():
if metadata['type'] != "CollectionType":
continue
if u not in metadata_by_parent:
print(f"empty: {fullpath(metadata)}")
_deleted_uuids.append(u)
empty_found = True
# do not remove entries within a loop !
for u in _deleted_uuids:
remove_uuid(u)
deleted_uuids.extend(_deleted_uuids)
if len(deleted_uuids) == 0:
print('No empty directories found.')
return False
else:
if ask(f'Clean up {len(deleted_uuids)} empty directories?'):
ssh(f"rm -r {xochitl_dir}/{{{','.join(deleted_uuids)}}}*", dry=args.dryrun)
return True
else:
return False
ssh_connection = None
try:
ssh_connection = subprocess.Popen(f'{ssh_command} {args.host} -M -N -q ', shell=True)
# quickly check if we actually have a functional ssh connection (might not be the case right after an update)
status, checkmsg = ssh("/bin/true",status=True)
if status != 0:
print("ssh connection does not work, verify that you can manually ssh into your reMarkable. ssh itself commented the situation with:")
print("msg:",checkmsg)
sys.exit(255)
retrieve_metadata()
if args.mode == 'push':
push_to_remarkable()
elif args.mode == 'pull':
pull_from_remarkable()
elif args.mode == 'backup':
args.documents = get_toplevel_files()
pull_from_remarkable()
elif args.mode == 'clean':
r1 = cleanup_deleted()
cleanup_orphaned()
r2 = cleanup_duplicates()
r3 = cleanup_emptydir()
if not args.dryrun and (r1 or r2 or r3):
ssh(f'systemctl restart xochitl')
finally:
if ssh_connection is not None:
print("terminating ssh connection")
ssh_connection.terminate()