Skip to content

Commit

Permalink
fix dump_to_lmdb for py3(also added saving all keys in the lmdb mimic…
Browse files Browse the repository at this point in the history
…ing vilbert)
  • Loading branch information
ruotianluo committed Jun 6, 2020
1 parent 6c0af5b commit 0b3949d
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions scripts/dump_to_lmdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(self, db_path, fn_list=None):
def __getitem__(self, index):
env = self.env
with env.begin(write=False) as txn:
byteflow = txn.get(self.keys[index])
byteflow = txn.get(self.keys[index].encode())

# load image
imgbuf = byteflow
Expand Down Expand Up @@ -117,7 +117,7 @@ def __init__(self, root, loader, extension, fn_list=None):
if fn_list:
samples = [os.path.join(root, str(_)+extension) for _ in fn_list]
else:
samples = make_dataset(self.root, extention)
samples = make_dataset(self.root, extension)

self.loader = loader
self.extension = extension
Expand Down Expand Up @@ -161,14 +161,16 @@ def folder2lmdb(dpath, fn_list, write_frequency=5000):

txn = db.begin(write=True)

tsvfile = open(args.output_file, 'ab')
tsvfile = open(args.output_file, 'a')
writer = csv.DictWriter(tsvfile, delimiter='\t', fieldnames=FIELDNAMES)
names = []
names = []
all_keys = []
for idx, data in enumerate(tqdm.tqdm(data_loader)):
# print(type(data), data)
name, byte, npz = data[0]
if npz is not None:
txn.put(name, byte)
txn.put(name.encode(), byte)
all_keys.append(name)
names.append({'image_id': name, 'status': str(npz is not None)})
if idx % write_frequency == 0:
print("[%d/%d]" % (idx, len(data_loader)))
Expand All @@ -181,7 +183,8 @@ def folder2lmdb(dpath, fn_list, write_frequency=5000):
names = []
tsvfile.flush()
print('writing finished')

# write all keys
txn.put("keys".encode(), pickle.dumps(all_keys))
# finish iterating through dataset
txn.commit()
for name in names:
Expand Down

0 comments on commit 0b3949d

Please sign in to comment.