-
Notifications
You must be signed in to change notification settings - Fork 1
/
load_mnist.py
45 lines (28 loc) · 1.36 KB
/
load_mnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# -*- coding: utf-8 -*-
import struct
import numpy as np
def loadLabelSet(filename):
binfile = open(filename, 'rb') # 读二进制文件
buffers = binfile.read()
head = struct.unpack_from('>II', buffers, 0) # 取label文件前2个整形数
labelNum = head[1]
offset = struct.calcsize('>II') # 定位到label数据开始的位置
numString = '>' + str(labelNum) + "B" # fmt格式:'>60000B'
labels = struct.unpack_from(numString, buffers, offset) # 取label数据
binfile.close()
labels = np.reshape(labels, [labelNum]) # 转型为列表(一维数组)
return labels
def loadImageSet(filename):
binfile = open(filename, 'rb') # 读取二进制文件
buffers = binfile.read()
head = struct.unpack_from('>IIII', buffers, 0) # 取前4个整数,返回一个元组
offset = struct.calcsize('>IIII') # 定位到data开始的位置
imgNum = head[1]
width = head[2]
height = head[3]
bits = imgNum * width * height # data一共有60000*28*28个像素值
bitsString = '>' + str(bits) + 'B' # fmt格式:'>47040000B'
imgs = struct.unpack_from(bitsString, buffers, offset) # 取data数据,返回一个元组
binfile.close()
imgs = np.reshape(imgs, [imgNum, width * height]) # reshape为[60000,784]型数组
return imgs