-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidate_epub.py
executable file
·91 lines (78 loc) · 3.26 KB
/
validate_epub.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python
import os, filecmp, shutil, time
import subprocess
BASE_DIR = "/home/content/uploaded/epubs"
VIDEO_DIR = "/home/content/uploaded/epubs/video"
REJECT_DIR = "/home/content/uploaded/epubs/error/reject"
ACCEPT_DIR = "/home/content/uploaded/epubs/error/accept"
WARNING_DIR = "/home/content/uploaded/epubs/warning"
CLEAN_DIR = "/home/content/uploaded/epubs/clean"
DUP_DIR = "/home/content/uploaded/duplicates"
ignore_dirs = []
ignore_dirs.append(os.path.join(BASE_DIR,'clean'))
ignore_dirs.append(os.path.join(BASE_DIR,'warning'))
ignore_dirs.append(os.path.join(BASE_DIR,'video'))
ignore_dirs.append(os.path.join(BASE_DIR,'error'))
ignore_dirs.append(os.path.join(BASE_DIR,'error/accept'))
ignore_dirs.append(os.path.join(BASE_DIR,'error/reject'))
def run(cmd):
call = ["/bin/bash", "-c", cmd]
output = ''
try:
output = subprocess.check_output(call,stderr=subprocess.STDOUT)
return output
except subprocess.CalledProcessError as file_error:
return file_error.output
file_list = []
for dirpath, dirnames, filenames in os.walk(BASE_DIR):
if dirpath not in ignore_dirs:
for files in filenames:
file_list.append(os.path.join(dirpath,files))
counter = 0
for files in file_list:
raw_path,raw_name = os.path.split(files)
raw_base,raw_ext = os.path.splitext(raw_name)
base = raw_base.lower()
ext = raw_ext.lower()
counter += 1
if ext == '.epub':
output = run("java -jar /home/mhare/sporklib/uploaded/epubcheck-3.0/epubcheck-3.0.jar " + files)
if 'ERROR:' in output:
if 'I/O error' in output:
file2 = os.path.join(REJECT_DIR,raw_name)
if 'image file' in output and 'is missing' in output:
file2 = os.path.join(REJECT_DIR,raw_name)
elif 'Cannot read' in output:
file2 = os.path.join(REJECT_DIR,raw_name)
elif 'invalid LOC' in output:
file2 = os.path.join(REJECT_DIR,raw_name)
elif 'No rootfile' in output:
file2 = os.path.join(REJECT_DIR,raw_name)
elif 'Premature end' in output:
file2 = os.path.join(REJECT_DIR,raw_name)
elif 'toc attribute' in output:
file2 = os.path.join(REJECT_DIR,raw_name)
elif 'Required META-INF/container.xml' in output:
file2 = os.path.join(REJECT_DIR,raw_name)
elif 'character content' in output:
file2 = os.path.join(REJECT_DIR,raw_name)
else:
file2 = os.path.join(ACCEPT_DIR,raw_name)
elif 'WARNING:' in output:
file2 = os.path.join(WARNING_DIR,raw_name)
else:
file2 = os.path.join(CLEAN_DIR,raw_name)
if not os.path.exists(files):
print "Error: file " + files + " does not exist"
elif os.path.exists(file2):
print "Error: file " + file2 + " already exists"
if filecmp.cmp(files,file2):
print "But both are the same, moving {} to Duplicates".format(files)
file2 = os.path.join(DUP_DIR,raw_name)
shutil.move(files,file2)
else:
print "Moving File " + files + " to " + file2
shutil.move(files,file2)
else:
print "Moving File " + files + " to " + file2
shutil.move(files,file2)