-
Notifications
You must be signed in to change notification settings - Fork 2
/
PointClouds.py
179 lines (149 loc) · 5.86 KB
/
PointClouds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
""" A reader for the VirtualEmbryo File Format
This module provides classes that assist in the reading and writing of
VirtualEmbryo files from the Berkeley Drosophila Transcription Network Project
(http://bdtnp.lbl.gov/Fly-Net/bioimaging.jsp?w=vpcFormat). It attempts to
preserve as much of the header information as possible in an easily interactible
way.
"""
try:
import numpy as np
HAS_NUMPY = True
except ImportError:
HAS_NUMPY = False
try:
import pandas as pd
HAS_PANDAS = True
except:
HAS_PANDAS = False
class PointCloudReader(object):
def __init__(self, fh):
self.__filehandle__ = fh
pos = fh.tell()
line = fh.readline()
while line[0] == '#':
# Read the header info
if line[1] == '#':
#Double # == comment
pos = fh.tell()
line = fh.readline()
continue
metadata = line.split('=')
dataname = metadata[0].strip(' \t#')
dataval = metadata[1].strip()
if ';' in dataval:
# 2D array, ';' delimits rows, ',' delimits columns
assert dataval[0] == '['
assert dataval[-1] == ']'
datarows = dataval[1:-1].split(';')
datarowscols = list([s.split(',') for s in datarows])
dataval = [[strip_to_number(s) for s in r]
for r in datarowscols]
elif dataval[0] == '[' and dataval[-1] == ']':
# 1D array
dataval = [strip_to_number(v) for v in
dataval[1:-1].split(',')]
else:
# It's a scalar.
dataval = to_number(dataval)
self.__setattr__(dataname, dataval)
pos = fh.tell()
line = fh.readline()
# Clean up afterwards
self.__filehandle__.seek(pos)
def __next__(self):
line = self.__filehandle__.readline()
if line == '':
raise StopIteration
return list(map(strip_to_number,
line.split(',')))
def next(self):
return self.__next__()
def __iter__(self):
return self
def get_gene_names(self):
""" Returns a set of the gene names in the VPC file"""
defined_names = ('id', 'x', 'y', 'z', 'Nx', 'Ny', 'Nz')
names = set(name.split('_')[0] for name in self.column
if not name.startswith(defined_names))
return names
def data_to_arrays(self, usenan=True):
"""Turn raw data from virtual embryo to arrays
Primarily, this separates out the times into its own axis, and puts the
columns into a consistent order
"""
filepos = self.__filehandle__.tell()
all_data = [row for row in self]
self.__filehandle__.seek(filepos)
times = sorted(set(name.split('_')[-1]
for name in self.column
if name != 'id'))
genes = self.get_gene_names()
if HAS_NUMPY:
exparray = np.zeros((len(all_data), len(genes), len(times)))
if usenan:
exparray*= np.nan
else:
exparray = [[[0 for k in times]
for j in genes]
for i in all_data]
for j, gene in enumerate(genes):
for k, time in enumerate(times):
try:
colnum = self.column.index(gene + "__" + time)
for i, row in enumerate(all_data):
exparray[i, j, k] = row[colnum]
except ValueError:
# No data for this gene at this time!
pass
if HAS_NUMPY:
posarray = np.zeros([len(all_data), 3, len(times)],
dtype=np.float32)
else:
posarray = [[[0 for k in times]
for j in range(3)]
for i in enumerate(all_data)]
for k, time in enumerate(times):
for j, dim in enumerate(['x', 'y', 'z']):
colnum = self.column.index(dim + '__' + time)
for i, row in enumerate(all_data):
posarray[i, j, k] = row[colnum]
if HAS_PANDAS:
exparray = pd.Panel(exparray, [item[0] for item in all_data],
major_axis=self.get_gene_names(),
minor_axis=['T{}'.format(i+1)
for i in range(len(times))])
posarray = pd.Panel(posarray, [item[0] for item in all_data],
major_axis=['X', 'Y', 'Z'],
minor_axis=['T{}'.format(i+1)
for i in range(len(times))])
return exparray, posarray
def get_neighbors(self):
filepos = self.__filehandle__.tell()
all_data = [row for row in self]
self.__filehandle__.seek(filepos)
neighbors = {}
for row in all_data:
neighbors[row[0]] = []
for item in row[len(self.column) + 1:]:
neighbors[row[0]].append(item)
assert len(neighbors[row[0]]) == row[len(self.column)]
return neighbors
def strip_to_number(dataval, chars = '\'" \t #'):
return to_number(dataval.strip(chars))
def to_number(dataval):
""" A forgiving number converter.
Will convert to int if possible, float otherwise, and if neither, will return
the input.
"""
try:
datavalf = float(dataval)
# If we could convert it to a float, it might have been an
# int
try:
return int(dataval)
except ValueError:
# not an int, but since we got to the inner try, it is a
# float
return datavalf
except ValueError:
return dataval