-
Notifications
You must be signed in to change notification settings - Fork 3
/
cacher.py
129 lines (106 loc) · 4.17 KB
/
cacher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import cPickle as pickle
import urllib
import binascii
class Cacher:
"""
Generic class for persistent (on-disk) caching of data related to a specific file.
"""
def __init__ (self):
self.memCache = {}
if os.environ.has_key('XDG_CACHE_HOME') and os.environ['XDG_CACHE_HOME']:
self.basePath = os.path.join(os.environ['XDG_CACHE_HOME'], 'ptrace-sampler')
else:
self.basePath = os.path.expanduser('~/.cache/ptrace-sampler/')
if not(os.path.exists(self.basePath)):
os.makedirs(self.basePath)
assert(os.path.isdir(self.basePath))
def get (self, typ, path, useDisk=True):
"""
Returns the cached data of given type for the specified path.
If the specified file has a newer ctime or mtime or if its CRC doesn't
match the one stored in the cache file, or if there is no matching
cache file at all, None is returned.
"""
memKey = (typ, path)
if self.memCache.has_key(memKey):
return self.memCache[memKey]
if not(useDisk):
return None
assert(os.path.isfile(path))
realStat = os.stat(path)
realTimestamp = max(realStat.st_ctime, realStat.st_mtime)
realCrc = self._calcCrc(path)
for f in os.listdir(self.basePath):
if not f.endswith('.cache'):
continue
filePath = os.path.join(self.basePath, f)
try:
(ftyp, fpath, fcrc) = f[:-6].split('_')
ftyp = urllib.unquote(ftyp)
fpath = urllib.unquote(fpath)
fcrc = int(fcrc, 16)
fstat = os.stat(filePath)
ftimestamp = max(fstat.st_ctime, fstat.st_mtime)
if ftyp == typ and fpath == path and fcrc == realCrc and ftimestamp >= realTimestamp:
fd = open(filePath, 'rb')
data = pickle.load(fd)
self.memCache[memKey] = data
return data
except:
continue
return None
def store (self, typ, path, data, useDisk=True):
"""
Adds the given data of given type as cache item for the given path.
"""
self.memCache[ (typ, path) ] = data
if useDisk:
self._invalidate(typ, path)
cacheFile = self._getFilename(typ, path)
fd = open(cacheFile, 'wb')
pickle.dump(data, fd)
fd.close()
def _invalidate (self, typ, path):
"removes all outdated cache entries of the given type for the given path"
# TODO
#os.listdir()
pass
def _getFilename (self, typ, path):
assert(os.path.isfile(path))
crc = self._calcCrc(path)
fileName = '%s_%s_%08x.cache' % (self._urlquote(typ), self._urlquote(path), crc)
fullPath = os.path.join(self.basePath, fileName)
MAX_PATH_LENGTH = 250 # arbitrary limit - I think this should cover all systems
if len(fullPath) > MAX_PATH_LENGTH:
overflow = len(fullPath) - MAX_PATH_LENGTH
fileName = '%s_%s_%08x.cache' % (self._urlquote(typ), self._urlquote(path)[overflow:], crc)
fullPath = os.path.join(self.basePath, fileName)
assert( len(fullPath) <= MAX_PATH_LENGTH )
return fullPath
def _calcCrc (self, path):
fd = open(path, 'rb')
return (binascii.crc32(fd.read()) & 0xffffffff)
def _urlquote (self, s):
"Like urllib.quote, but quotes all characters except for letters and numbers"
res = ''
for c in s:
if not(
(c >= 'a' and c <= 'z') or
(c >= 'A' and c <= 'Z') or
(c >= '0' and c <= '9')):
res += '%%%02x' % ord(c)
else:
res += c
return res
if __name__ == '__main__':
import sys
if len(sys.argv) != 3:
print "Usage: %s <type> <path>" % sys.argv[0]
sys.exit(1)
cache = Cacher()
data = cache.get(sys.argv[1], sys.argv[2])
if data is None:
print "no cached data found"
else:
print "found cached data; type: %s; length: %d" % (type(data), len(data))