-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsizes_epubs.py
executable file
·61 lines (50 loc) · 1.5 KB
/
sizes_epubs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
import os, filecmp, shutil, time, zipfile
from numpy import *
from histogram import *
BASE_DIR = "/home/content/uploaded/epubs"
ignore_dirs = []
path_list = []
for dirpath, dirnames, filenames in os.walk(BASE_DIR):
if dirpath not in ignore_dirs:
for files in filenames:
path_list.append(os.path.join(dirpath,files))
dbook_epub = []
zip_epub = []
largest = 0
over_flow = 0
under_flow = 0
hmin = 0.0
hmax = 450000000.0
nbins = 100
bin_size = (hmax-hmin)/nbins
bin_center = bin_size/2
h = histogram("h", [('freq', arange(hmin+bin_center,hmax+bin_center,bin_size))])
for files in path_list:
raw_base,raw_ext = os.path.splitext(files)
base = raw_base.lower()
ext = raw_ext.lower()
if '.epub' == ext:
fsize = os.path.getsize(files)
if fsize > largest:
largest = fsize
if fsize > hmax:
over_flow += 1
print files
elif fsize <= hmin:
under_flow += 1
else:
bin_value = h[fsize,fsize+0.01].I
bin_value += 1
h[fsize,fsize+0.01] = bin_value ,None
print "Largest File Size = " + str(largest)
hdata = h.I
counter = 0
for bins in hdata:
counter += 1
if bins != 0:
print str((counter-1)*bin_size/1000000) + "Mb - " + str(counter*bin_size/1000000) + "Mb = \t" + str(int(bins))
counter += 1
print str((counter-1)*bin_size/1000000) + "Mb - " + str(counter*bin_size/1000000) + "Mb = \t" + str(int(over_flow))
#print h
#plot(h)