forked from mynktwri/Kuzushiji_Character_Classification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clean_kkanji.py
33 lines (27 loc) · 804 Bytes
/
clean_kkanji.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import os
import imageio
import glob
import numpy as np
base_dir = "kkanji2"
train_imgs = []
train_labels = []
i = 0
for dirName, subdirList, fileList in os.walk(base_dir):
arr = dirName.split("+")
st = ""
for im_path in glob.glob(dirName + "/*.png"):
im = imageio.imread(im_path)
#print(im.shape)
train_imgs.append(im)
st = arr[1]
train_labels.append(st)
print(str(i) + ": " + st)
i += 1
unique_labels = list(set(train_labels))
train_labels[:] = [unique_labels.index(label) for label in train_labels]
np.savez("KKanji/kkanji-imgs", train_imgs)
print("saved training images")
np.savez("KKanji/kkanji-unique-labels", unique_labels)
print("saved unique labels")
np.savez("KKanji/kkanji-labels", train_labels)
print("saved training labels")