-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdef_funs.py
3883 lines (2821 loc) · 171 KB
/
def_funs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# %load_ext autoreload
# from importlib import reload
# import x
# reload(x)
#get_ipython().magic(u'matplotlib inline')
import numpy as np
frame_dur = np.array([0.093]) # sec # mesoscope time resolution (4 depth, 2 areas) (~10.7 Hz; each pair of planes that are recorded simultaneously have time resolution frame_dur)
flash_dur = .25
gray_dur = .5
num_planes = 8
if 'control_single_beam' in locals() and control_single_beam==1: # data resampled as if it was recorded with single beam scope
frame_dur = frame_dur*2
#%% Use below to stop matplotlib.font_manager debug messages in log file
# https://stackoverflow.com/questions/58320567/matplotlib-font-manager-debug-messages-in-log-file
import logging
logger=logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logging.basicConfig(filename='logfile.log', # level=logging.DEBUG
format='%(levelname)s:%(name)s:%(message)s)')
def objective(x):
obj=model(x)
logger.debug('objective = {}'.format(obj))
return obj
#%%
import numpy as np
import socket
import os
import h5py
import pandas as pd
import matplotlib.backends.backend_pdf
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
sns.set_style('whitegrid')
import scipy as sci
import scipy.stats as st
from IPython.display import display
import datetime
import copy
import sys
import re
import numpy.random as rnd
import pickle
import shutil
import gc
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import StandardScaler
from matplotlib import cm # colormap
#%%
try:
from visual_behavior.ophys.io.convert_level_1_to_level_2 import convert_level_1_to_level_2
except Exception as E:
print('cound not import visual_behavior.ophys.io.convert_level_1_to_level_2')
print(E)
from visual_behavior.ophys.dataset.visual_behavior_ophys_dataset import VisualBehaviorOphysDataset
from visual_behavior.ophys.response_analysis.response_analysis import ResponseAnalysis
from visual_behavior.visualization.ophys import experiment_summary_figures as esf
from visual_behavior.visualization.ophys import summary_figures as sf
import visual_behavior.ophys.response_analysis.utilities as ut
from visual_behavior.ophys.io.lims_database import LimsDatabase
#%%
import seaborn
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, AutoMinorLocator)
#%%
import ophysextractor
from ophysextractor.datasets.lims_ophys_session import LimsOphysSession
from ophysextractor.datasets.lims_ophys_experiment import LimsOphysExperiment
from ophysextractor.datasets.motion_corr_physio import MotionCorrPhysio
from ophysextractor.utils.util import mongo, get_psql_dict_cursor
from visual_behavior.ophys.io.convert_level_1_to_level_2 import get_segmentation_dir, get_lims_data, get_roi_locations, get_roi_metrics
import visual_behavior.utilities as vbut
#%%
# from def_paths import *
'''
if socket.gethostname() == 'OSXLT1JHD5.local': # allen mac
dirAna = "/Users/farzaneh.najafi/Documents/analysis_codes/"
dir0 = '/Users/farzaneh.najafi/OneDrive - Allen Institute/Analysis'
elif socket.gethostname() == 'ibs-farzaneh-ux2': # allen pc
dirAna = "/home/farzaneh/Documents/analysis_codes/"
dir0 = '/home/farzaneh/OneDrive/Analysis'
elif socket.gethostname() == 'ibs-is-analysis-ux1': # analysis computer
dirAna = "/home/farzaneh.najafi/analysis_codes/"
dir0 = '/home/farzaneh.najafi/OneDrive/Analysis' # you need to create this!
'''
if sys.platform=='win32':
dir_server_me = 'Z:\\braintv\\workgroups\\nc-ophys\\Farzaneh'
else:
# /allen/programs/braintv/workgroups/nc-ophys/Farzaneh'
dir_server_me = os.path.join(os.sep, 'allen', 'programs', 'braintv', 'workgroups', 'nc-ophys', 'Farzaneh')
#dir_svm = os.path.join(dir_server_me, 'SVM')
#if same_num_neuron_all_planes:
# dir_svm = os.path.join(dir_svm, 'same_num_neurons_all_planes')
#%% Find index of list "ophys_sess" in the main list "stages" (they are both list of strings)
'''
# you don't need the following, all you need to do is:
np.in1d(np.array(stages), np.array(ophys_sess))
def index_list_in_listM(stages, ophys_sess):
stage_now = []
for j in range(len(stages)):
this_stage = [stages[j].find(ophys_sess[i]) for i in range(len(ophys_sess))]
stage_now.append(any(np.in1d(this_stage, 0)))
# len(stage_now), sum(stage_now)
stage_now = np.array(stage_now)
return stage_now
'''
#%% Rename files
'''
for i in allSessName:
d,p = os.path.split(i)
ii = os.path.basename(i)
nn = ii.find('_')
ii = 'this'+ii[nn:]
de = os.path.join(d, ii)
os.rename(i, de)
'''
#%% Set figure labels and fgn (it will be used for setting fign (the final figure named to be saved))
def set_figname_labels(peak_win, flash_win, norm_to_max, doShift, doScale, doShift_again, bl_gray_screen, mean_notPeak, trace_median):
if norm_to_max:
ylab = '(norm. to max sess amp)'
ylab_short = '(norm max)'
fgn = 'normMax' # for figure name
# fgn = fgn0 + 'normMax' # for figure name
else:
# Note: bl_gray_screen is for when the df/f traces were scaled and shifted (the default is not to do so);
# but when quantifying flash/omission responses, we always use 10th percentile of midian traces of omit-aligned traces to compute baseline and subtract it off quantifications (within those windows).
if np.logical_and(doShift, doScale):
if bl_gray_screen: # baseline was computed on the initial gray screen at the beginning of the session
fgn0 = 'blGray_'
else:
fgn0 = 'blP10' # baseline was computed on the 10th percentile of trace during samps_bef frames preceding the omissions
ylab = '(norm. to baseline sd)'
ylab_short = '(norm blSD)'
fgn = fgn0 + 'normBlSd_'
else:
fgn0 = ''
ylab = ''
ylab_short = ''
fgn = fgn0
if ylab=='':
ylabel = 'DF/F'
else:
ylabel = 'DF/F %s' %(ylab) #if iplane==num_depth else ''
if mean_notPeak:
fgn = fgn + 'omitMean' + str(peak_win)
lab_peakMean = 'Mean response'
else:
fgn = fgn + 'omitPeak' + str(peak_win)
lab_peakMean = 'Peak amp'
fgn = fgn + '_flashMean' + str(flash_win)
# fgn = fgn + '_flashTiming' + str(flash_win_timing)
if doShift_again:
fgn = fgn + '_blShiftAgain'
if trace_median: # use peak measures computed on the trial-median traces (less noisy)
lab_med = 'trace med'
else:
lab_med = 'med of Trs'
return fgn, ylabel, ylab_short, lab_peakMean, lab_med
#%% Compute median and 25th, 75th percentile across neurons for each experiment
def med_perc_each_exp(all_exp_traces):
# all_exp_traces has size e, where e = number of experiments
# all_exp_traces[i] has size n, where n = number of neurons
# output variables will have size e, where e = number of experiments
# NOTE: 08/06/2020: I decided to go with mean (across trials, neurons) instead of median: it actually revealed that in the 3rd plane of LM (especially in LM) the mean response of Slc is slightly going up after omissions (we know some neurons are like this among slc).
# Also, when we plot summary mice data, we take average across sessions of mice, so it makes sense that we also take average across trials and neurons (and not mean).
med_ns = np.array([np.mean(all_exp_traces[i]) for i in range(len(all_exp_traces))])
q25_ns = np.array([np.percentile(all_exp_traces[i], 25) for i in range(len(all_exp_traces))]) # total number of planes (8*num_sessions); each element: med of peak timing across neurons in that plane
q75_ns = np.array([np.percentile(all_exp_traces[i], 75) for i in range(len(all_exp_traces))]) # total number of planes (8*num_sessions); each element: med of peak timing across neurons in that plane
return med_ns, q25_ns, q75_ns
#%% Determine if a session is the 1st novel session or not
# match the session date ("date") with all the dates in mouse_trainHist_all2, to find the stage of the current and the previous sessions.
# NOTE: below you call a session novel if its BA: A and preceded by B, or AB: B and preceded by A.
# It perhaps makes more sense to only call the 1st B session (after A sessions) (or the 1st A session after B sessions) a novel session.
# for this from mouse training history figure out the date of the 1st novel session, and then see if the current experiment date matches that.
# to figure out the date of the 1st novel session, look for the 1st B after a row of A (or 1st A after a row of B); you have these pieces of code in set_mousetrainhist_allsess2an_whatsess.py
def is_session_novel(dir_server_me, mouse, date):
# set file name: mouse_trainHist_all2
analysis_name2 = 'mouse_trainHist_all2'
name = 'all_sess_%s_.' %(analysis_name2)
allSessName2, h5_files = all_sess_set_h5_fileName(analysis_name2, dir_server_me)
# print(allSessName2)
# load mouse_trainHist_all2
mouse_trainHist_all2 = pd.read_hdf(allSessName2) #, key='all_sess') #'svm_vars') ## Load all_sess dataframe
trainHist_this_mouse = mouse_trainHist_all2[mouse_trainHist_all2['mouse_id']==mouse]
stages = list(trainHist_this_mouse['stage'].values)
# All ophys_A sessions (all A, habit, passive)
name = 'OPHYS\w+A' # name = 'OPHYS\_[0-9]\w+A'
regex = re.compile(name) # + '.h5')
ophys_sessA = [string for string in stages if re.findall(regex, string)] # string=stages[-1]
# ophys_sessA, len(ophys_sessA)
# All ophys_B sessions
name = 'OPHYS\w+B' # name = 'OPHYS\_[0-9]\w+A'
regex = re.compile(name) # + '.h5')
ophys_sessB = [string for string in stages if re.findall(regex, string)] # string=stages[-1]
# ophys_sessB, len(ophys_sessB)
### match the session date ("date") with all the dates in mouse_trainHist_all2, so you can find the stage of the current and the previous sessions.
d = trainHist_this_mouse['date'].values
this_mouse_sess = np.argwhere(np.in1d(d, date)).squeeze()
stage_this_sess = trainHist_this_mouse.iloc[this_mouse_sess]['stage'] # is this B
stage_prev_sess = trainHist_this_mouse.iloc[this_mouse_sess-1]['stage'] # is this A
# is current session B, and the previous session A?
AB = np.logical_and(np.in1d(ophys_sessB, stage_this_sess).any(), np.in1d(ophys_sessA, stage_prev_sess).any())
BA = np.logical_and(np.in1d(ophys_sessA, stage_this_sess).any(), np.in1d(ophys_sessB, stage_prev_sess).any())
if any([AB,BA]):
session_novel = True
else:
session_novel = False
# print(trainHist_this_mouse)
# print(f'Current data: {date}; session_novel: {session_novel}')
return session_novel
#%%
# Set time for the interpolated traces (every 3ms time points);
# Set the time of flashes and grays for the imaging traces;
# Set some useful vars which you will need for most subsequent analaysis: # all_mice_id, stages_sessionN, mouse_trainHist_all, mouseCre_numSess
# Set plane indeces for each area (V1 and LM)
def set_timetrace_flashesind_allmice_areas(samps_bef, samps_aft, frame_dur, doCorrs, all_sess):
#%% Set the time trace (original and upsampled)
# Note: the following two quantities are very similar (though time_orig is closer to the truth!):
# time_orig and time_trace (defined below)
# time_orig = np.mean(local_time_allOmitt, axis=1)
time_trace, time_trace_new = upsample_time_imaging(samps_bef, samps_aft, 31.) # set time for the interpolated traces (every 3ms time points)
#xnow = np.unique(np.concatenate((np.arange(0, -(samps_bef+1), -1), np.arange(0, samps_aft))))
# set x tick marks
xmj = np.unique(np.concatenate((np.arange(0, time_trace[0], -1), np.arange(0, time_trace[-1], 1))))
xmn = np.arange(-.5, time_trace[-1], 1)
# xmn = np.arange(.25, time_trace[-1], .5)
xmjn = [xmj, xmn]
#%% Set the time of flashes and grays for the imaging traces
### NOTE: you should remove 0 from flashes_win_trace_index_unq_time
### because at time 0, there is no flash, there is omission!!
flashes_win_trace_index_unq_time, grays_win_trace_index_unq_time, flashes_win_trace_index_unq, grays_win_trace_index_unq = \
flash_gray_onset_relOmit(samps_bef, samps_aft, frame_dur)
#%% Set some useful vars which you will need for most subsequent analaysis:
# all_mice_id, stages_sessionN, mouse_trainHist_all, mouseCre_numSess
all_mice_id, stages_sessionN, mouse_trainHist_all, mouseCre_numSess = all_sess_sum_stages_trainHist(all_sess, doCorrs)
#%% Set plane indeces for each area (V1 and LM)
if doCorrs==1:
distinct_areas, i_areas = np.unique(all_sess[all_sess['mouse_id']==all_mice_id[0]]['area'].iloc[0], return_inverse=True) # take this info from any mouse ... it doesn't
else:
distinct_areas, i_areas = np.unique(all_sess[all_sess['mouse_id']==all_mice_id[0]]['area'].values, return_inverse=True) # take this info from any mouse ... it doesn't matter... we got with mouse 0
i_areas = i_areas[range(num_planes)]
# distinct_areas = np.array(['VISl', 'VISp'])
# i_areas = np.array([0, 0, 0, 0, 1, 1, 1, 1])
# v1 : shows as VISp
v1_ai = np.argwhere([distinct_areas[i].find('p')!=-1 for i in range(len(distinct_areas))]).squeeze()
# LM : shown as VISl or LM
lm_ai = np.argwhere([(distinct_areas[i].find('l')!=-1 or distinct_areas[i].find('L')!=-1) for i in range(len(distinct_areas))]).squeeze()
inds_v1 = np.argwhere(i_areas==v1_ai).squeeze()
inds_lm = np.argwhere(i_areas==lm_ai).squeeze()
print('V1 plane indeces: ', inds_v1)
print('LM plane indeces: ', inds_lm)
#inds_lm = np.arange(num_depth) # we can get these from the following vars: distinct_areas and i_areas[range(num_planes)]
#inds_v1 = np.arange(num_depth, num_planes)
return time_trace, time_trace_new, xmjn, flashes_win_trace_index_unq_time, grays_win_trace_index_unq_time, flashes_win_trace_index_unq, grays_win_trace_index_unq, all_mice_id, stages_sessionN, mouse_trainHist_all, mouseCre_numSess, distinct_areas, i_areas, inds_v1, inds_lm
#%% Set time windows, in frame units, for computing flash and omission evoked responese
# the output includes frame indices that are relative to "trace" begining; i.e. index on the trace whose time 0 is trace[samps_bef].
def set_frame_window_flash_omit(peak_win, flash_win, flash_win_timing, samps_bef, frame_dur):
# np.floor was used here. On 04/29/2020 I changed it all to np.round, bc it is more accurate.
# just define one of these below ... no need to do it 3 times! for now leaving it like this.
# Compute peak_win_frames, ie window to look for peak in frame units
peak_win_frames = samps_bef + np.round(peak_win / frame_dur).astype(int) # convert peak_win to frames (new way to code list_times)
peak_win_frames[-1] = peak_win_frames[0] + np.round(np.diff(peak_win) / frame_dur).astype(int) # we redefine the upper limit of the window, otherwise the same window duration can lead to different upper limit values due to the division and flooring, and turning into int.
list_times = np.arange(peak_win_frames[0], peak_win_frames[-1]) #+1) # [40, 41, 42, 43, 44, 45, 46, 47, 48]
# for omit-evoked peak timing, compute it relative to samps_bef (which is the index of omission)
# note: list_times goes upto peak_win_frames[-1]+1 but we dont add +1 for flash (see below)... omissions peak late, so we do it; but perhaps it makes sense to be consistent and just remove +1 from omissions too!
# you removed it on 04/20/2020
# same as above, now for flash-evoked responses :compute peak_win_frames, ie window to look for peak in frame units
if ~np.isnan(flash_win).any():
peak_win_frames_flash = samps_bef + np.round(flash_win / frame_dur).astype(int)
peak_win_frames_flash[-1] = peak_win_frames_flash[0] + np.round(np.diff(flash_win) / frame_dur).astype(int) # we redefine the upper limit of the window, otherwise the same window duration can lead to different upper limit values due to the division and flooring, and turning into int.
list_times_flash = np.arange(peak_win_frames_flash[0], peak_win_frames_flash[-1]) # [31, 32, 33, 34] #, 35, 36, 37, 38, 39]
else:
list_times_flash = np.nan
# window for measuring the timing of flash-evoked responses
if ~np.isnan(flash_win_timing).any():
peak_win_frames_flash_timing = samps_bef + np.round(flash_win_timing / frame_dur).astype(int)
peak_win_frames_flash_timing[-1] = peak_win_frames_flash_timing[0] + np.round(np.diff(flash_win_timing) / frame_dur).astype(int) # we redefine the upper limit of the window, otherwise the same window duration can lead to different upper limit values due to the division and flooring, and turning into int.
list_times_flash_timing = np.arange(peak_win_frames_flash_timing[0], peak_win_frames_flash_timing[-1]) # [30, 31, 32, 33]
else:
list_times_flash_timing = np.nan
return list_times, list_times_flash, list_times_flash_timing
'''
#%% Set time windows, in frame units, for computing flash and omission evoked responese
def set_frame_window_flash_omit_new(peak_win, samps_bef, frame_dur):
# np.floor was used here. On 04/29/2020 I changed it all to np.round, bc it is more accurate.
# Compute peak_win_frames, ie window to look for peak in frame units
peak_win_frames = samps_bef + np.round(peak_win / frame_dur).astype(int) # convert peak_win to frames (new way to code list_times)
peak_win_frames[-1] = peak_win_frames[0] + np.round(np.diff(peak_win) / frame_dur).astype(int) # we redefine the upper limit of the window, otherwise the same window duration can lead to different upper limit values due to the division and flooring, and turning into int.
list_times = np.arange(peak_win_frames[0], peak_win_frames[-1]) #+1) # [40, 41, 42, 43, 44, 45, 46, 47, 48]
# for omit-evoked peak timing, compute it relative to samps_bef (which is the index of omission)
return list_times
'''
def running_align_on_imaging(running_times, running_speed, yy_alltrace_time, doplots=0):
# Take care of running trace:
# align its timing on yy_alltrace_time
# turn it from stimulus-computer time resolution to imaging-computer time resolution, so it can be plotted on the same timescale as imaging.
# set the running time, speed, and stimulus computer time resolution (which recorded the running)
stim_computer_time_res = np.mean(np.diff(running_times))
frdur = np.mean(np.diff(yy_alltrace_time)) # same as frame_dur
runningSamps_per_imagingSamps = int(np.ceil(frdur / stim_computer_time_res)) # compare time resolution of imaging vs stimulus computer
print(f'Max running samples per imaging samples: {runningSamps_per_imagingSamps}')
# first set the begining and end indeces (of running_times) to take the part of running array that was recorded at the same time as imaging
running_times_firstInd_durImg = np.argmin(np.abs(running_times - yy_alltrace_time[0]))
running_times_lastInd_durImg = np.argmin(np.abs(running_times - yy_alltrace_time[-1])) # on what index of running_times, the last frame of imaging happened.
# when i got running_times_win_yyalltrace_index, i noticed that the following was totally not needed:
# adjust indeces for the higher sampling rate of running vs imaging (n=runningSamps_per_imagingSamps)
# running_times_firstInd_durImg = running_times_firstInd_durImg - (runningSamps_per_imagingSamps - 2) # I subtract 1 to be more conservative and not include running values that perhaps were recorded before imaging # another 1 is subtracted bc this index intself will also be included
# running_times_lastInd_durImg = running_times_lastInd_durImg + runningSamps_per_imagingSamps - 2 # I subtract 1 to be more conservative and not include running values that perhaps were recorded after imaging
# now use the above indeces to take the part of running array that was recorded at the same time as imaging
running_times_duringImaging = running_times[running_times_firstInd_durImg : running_times_lastInd_durImg+1]
running_speed_duringImaging = running_speed[running_times_firstInd_durImg : running_times_lastInd_durImg+1]
# now convert the running times to imaging frame indeces in yy_alltrace_time
# this is a bit slow
running_times_win_yyalltrace_index = convert_time_to_frame(yy_alltrace_time, running_times_duringImaging) # find the frame number in yy_alltrace_time during which each element in running_times_duringImaging happened.
# now downsample running elements to frame resolution
# frame-bin index for each running time value
bins = np.arange(max(running_times_win_yyalltrace_index)+1+1)
hist_inds = np.digitize(running_times_win_yyalltrace_index, bins)
# average running elements within each bin (frame): downsample running # the following is a bit slow
running_speed_duringImaging_downsamp = np.array([np.mean(running_speed_duringImaging[hist_inds==ihi]) for ihi in np.arange(1,len(bins))])
if len(running_speed_duringImaging_downsamp) != len(yy_alltrace_time):
sys.exit('something wrong!')
# as a test see how many running elements occurred within 1 imaging frame
hist, bin_edges = np.histogram(running_times_win_yyalltrace_index, bins)
print(f'{np.unique(hist[1:-1])}: number of running points that was recorded within an imaging frame (excluding the 1st and last frames)\n')
print(f'{hist[0]}: number of running points that was recorded within the 1st imaging frame\n')
print(f'{hist[-1]}: number of running points that was recorded within the last imaging frame\n')
# plt.plot(bin_edges[0:-1]+1/2., hist)
##### Final result: now running trace is on the same time scale as the imaging trace, ie:
# each element of running_speed_duringImaging_downsamp corresponds to yy_alltrace_time
if doplots:
plt.plot(running_time, running_speed)
plt.plot(running_times_duringImaging, running_speed_duringImaging)
plt.plot(yy_alltrace_time[running_times_win_yyalltrace_index], running_speed_duringImaging)
plt.plot(yy_alltrace_time, running_speed_duringImaging_downsamp)
# df/f trace
plt.plot(yy_alltrace_time, yy_alltrace)
return running_speed_duringImaging_downsamp
#### Old method
'''
running_win_yyalltrace_index = convert_time_to_frame(yy_alltrace_time, running_times)
# find the first running element that corresponds to frame 0 of imaging (remember running recording may have started earlier than imaging)
running_ind_fist = np.argwhere(running_win_yyalltrace_index == 1).squeeze()[0] - runningSamps_per_imagingSamps
# find the last running element that corresponds to the last frame of imaging (remember running recording may have ended later than imaging)
running_ind_last = np.argwhere(running_win_yyalltrace_index == np.unique(running_win_yyalltrace_index)[-2]).squeeze()[-1] + runningSamps_per_imagingSamps
# take only the portion of running speed and time trace that was recorded during imaging
running_win_yyalltrace_index_duringImaging = running_win_yyalltrace_index[running_ind_fist: running_ind_last+1]
running_speed_duringImaging = running_speed[running_ind_fist: running_ind_last+1]
plt.plot(running_time, running_speed)
plt.plot(running_win_yyalltrace_index, running_speed)
plt.plot(running_win_yyalltrace_index_duringImaging, running_speed_duringImaging)
# average running elements within a frame: downsample running
# hist, bin_edges = np.histogram(running_win_yyalltrace_index_duringImaging, bins=np.arange(max(running_win_yyalltrace_index_duringImaging)+1+1))
# plt.plot(bin_edges[0:-1]+1/2., hist)
# frame-bin index for each running time value
bins = np.arange(max(running_win_yyalltrace_index_duringImaging)+1+1)
hist_inds = np.digitize(running_win_yyalltrace_index_duringImaging, bins)
running_speed_duringImaging_downsamp = np.array([np.mean(running_speed_duringImaging[hist_inds==ihi]) for ihi in np.arange(1,len(bins))])
running_win_yyalltrace_index_duringImaging_downsamp = bins[:-1]
'''
#%% save the figure if it does not exist
def save_fig_if_notexist(aname, dir_planePair, nowStr, fmt='pdf'):
# aname is the main part of the figure name (not including the nowStr (ie the date)); eg :
# figname = 'traces_x0_plane0to0_200320-100429.pdf'
# aname = 'traces_x0_plane0to0_'
list_figs = os.listdir(dir_planePair) # list of files in dir_planePair
regex = re.compile(f'{aname}(.*).{fmt}')
files = [string for string in list_figs if re.match(regex, string)]
if len(files)>0: #os.path.isfile(os.path.join(dir_planePair, files)):
files = files[-1] # take the last file
print(f"Figure exists: {files}")
else:
fign = os.path.join(dir_planePair, f'{aname}{nowStr}.pdf')
print(f'Saving figure:\n{fign}')
plt.savefig(fign, bbox_inches='tight') # , bbox_extra_artists=(lgd,)
#%% plot histogram for one or multiple arrays
def plothist(topall, nbins=20, doSmooth=0, colors = ('c','b','k'), linestyles=('-','-','-'), labs=('','',''), xlab='', ylab='', tit_txt=''):
'''
topall = a,b,c
tit_txt = 'sigFractNs %.2f, aucRandTest %.2f' %(sigFract_mw, auc_r2)
labs = 'rand', 'test', 'train'
colors = 'gray', 'k','b'
linestyles = 'dotted', 'solid', 'loosely dashed'
xlab = 'Explained variance'
ylab ='Fraction neurons'
doSmooth = 3
nbins = 50
'''
if type(topall)==tuple:
topc = np.concatenate((topall))
else:
topc = topall
topall = topc,
r = np.max(topc) - np.min(topc)
binEvery = r/float(nbins)
# set bins
bn = np.arange(np.min(topc), np.max(topc), binEvery)
bn[-1] = np.max(topc)#+binEvery/10. # unlike digitize, histogram doesn't count the right most value
plt.subplots() #plt.subplot(h1) #(gs[0,0:2])
for itp in range(len(topall)):
hist, bin_edges = np.histogram(topall[itp], bins=bn)
hist = hist/float(np.sum(hist))
# if plotCumsum:
# hist = np.cumsum(hist)
if doSmooth!=0:
hist = smooth(hist, doSmooth)
# plot the center of bins
plt.plot(bin_edges[0:-1]+binEvery/2., hist, color=colors[itp], label=labs[itp], linestyle=linestyles[itp]) # plt.bar(bin_edges[0:-1], hist, binEvery, color=colors[0], alpha=.4, label=lab1)
fs = 12
plt.legend(loc='center left', bbox_to_anchor=(1, .7), frameon=False, fontsize=fs)
plt.ylabel(ylab)
plt.xlabel(xlab)
plt.title(tit_txt)
makeNicePlots(plt.gca())
#%%
def zscore(traces, softNorm=False):
# or use :
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()
# train = scaler.fit_transform(traces_x00.T)
# plt.plot(scaler.mean_)
# plt.plot(scaler.scale_)
m = np.mean(traces, axis=1) # neurons
s = np.std(traces, axis=1) # neurons
if softNorm==1:
s = s + thAct
traces = ((traces.T - m) / s).T # neurons x times
return traces, m, s
#%%
def evaluate_components(traces, N=5, robust_std=False):
# traces: neurons x frames
""" Define a metric and order components according to the probabilty if some "exceptional events" (like a spike). Suvh probability is defined as the likeihood of observing the actual trace value over N samples given an estimated noise distribution.
The function first estimates the noise distribution by considering the dispersion around the mode. This is done only using values lower than the mode. The estimation of the noise std is made robust by using the approximation std=iqr/1.349.
Then, the probavility of having N consecutive eventsis estimated. This probability is used to order the components.
Parameters
----------
traces: ndarray
Fluorescence traces
N: int
N number of consecutive events
Returns
-------
idx_components: ndarray
the components ordered according to the fitness
fitness: ndarray
erfc: ndarray
probability at each time step of observing the N consequtive actual trace values given the distribution of noise
Created on Tue Aug 23 09:40:37 2016
@author: Andrea G with small modifications from farznaj
"""
# import scipy # import numpy # from scipy.stats import norm
# import numpy as np
# import scipy.stats as st
T=np.shape(traces)[-1]
# import pdb
# pdb.set_trace()
md = mode_robust(traces, axis=1)
ff1 = traces - md[:, None]
# only consider values under the mode to determine the noise standard deviation
ff1 = -ff1 * (ff1 < 0)
if robust_std:
# compute 25 percentile
ff1 = np.sort(ff1, axis=1)
ff1[ff1 == 0] = np.nan
Ns = np.round(np.sum(ff1 > 0, 1) * .5)
iqr_h = np.zeros(traces.shape[0])
for idx, el in enumerate(ff1):
iqr_h[idx] = ff1[idx, -Ns[idx]]
# approximate standard deviation as iqr/1.349
sd_r = 2 * iqr_h / 1.349
else:
Ns = np.sum(ff1 > 0, 1)
sd_r = np.sqrt(np.sum(ff1**2, 1) / Ns)
#
# compute z value
z = (traces - md[:, None]) / (3 * sd_r[:, None])
# probability of observing values larger or equal to z given notmal
# distribution with mean md and std sd_r
erf = 1 - st.norm.cdf(z)
# use logarithm so that multiplication becomes sum
erf = np.log(erf)
filt = np.ones(N)
# moving sum
erfc = np.apply_along_axis(lambda m: np.convolve(m, filt, mode='full'), axis=1, arr=erf)
erfc = erfc[:,:T]
# select the maximum value of such probability for each trace
fitness = np.min(erfc, 1)
ordered = np.argsort(fitness)
idx_components = ordered # [::-1]# selec only portion of components
# fitness = fitness[idx_components] % FN commented bc we want the indexing to match C and YrA.
# erfc = erfc[idx_components] % FN commented bc we want the indexing to match C and YrA.
return idx_components, fitness, erfc
def mode_robust(inputData, axis=None, dtype=None):
"""
Robust estimator of the mode of a data set using the half-sample mode.
.. versionadded: 1.0.3
"""
if axis is not None:
fnc = lambda x: mode_robust(x, dtype=dtype)
dataMode = np.apply_along_axis(fnc, axis, inputData)
else:
# Create the function that we can use for the half-sample mode
def _hsm(data):
if data.size == 1:
return data[0]
elif data.size == 2:
return data.mean()
elif data.size == 3:
i1 = data[1] - data[0]
i2 = data[2] - data[1]
if i1 < i2:
return data[:2].mean()
elif i2 > i1:
return data[1:].mean()
else:
return data[1]
else:
# wMin = data[-1] - data[0]
wMin = np.inf
N = data.size / 2 + data.size % 2
N = int(N)
for i in range(0, N):
w = data[i + N - 1] - data[i]
if w < wMin:
wMin = w
j = i
return _hsm(data[j:j + N])
data = inputData.ravel()
if type(data).__name__ == "MaskedArray":
data = data.compressed()
if dtype is not None:
data = data.astype(dtype)
# The data need to be sorted for this to work
data = np.sort(data)
# Find the mode
dataMode = _hsm(data)
return dataMode
#%% Find length of gaps between events
def find_event_gaps(evs):
# evs: boolean; neurons x frames
# evs indicates if there was an event. (it can be 1 for several continuous frames too)
# eg: evs = (traces >= th_ag) # neurons x frames
d = np.diff(evs.astype(int), axis=1) # neurons x frames
begs = np.array(np.nonzero(d==1)) # 2 x sum(d==1) # first row is row index (ie neuron) in d; second row is column index (ie frame) in d.
ends = np.array(np.nonzero(d==-1)) # 2 x sum(d==-1)
#np.shape(begs)
#np.shape(ends)
##### Note: begs_evs does not include the first event; so index i in gap_evs corresponds to index i in begs_evs and ends_evs.
##### however, gaps are truely gaps, ie number of frames without an event that span the interval between two frames.
gap_evs_all = [] # # includes the gap before the 1st event and the gap before the last event too, in addition to inter-event gaps
gap_evs = [] # includes only gap between events
begs_evs = [] # index of event onsets, excluding the 1st events. (in fact these are 1 index before the actual event onset; since they are computed on the difference trace ('d'))
ends_evs = [] # index of event offsets, excluding the last event.
bgap_evs = [] # number of frames before the first event
egap_evs = [] # number of frames after the last event
for iu in range(evs.shape[0]):
# print(iu)
# sum(begs[0]==0)
if sum(evs[iu]) > 0: # make sure there are events in the trace of unit iu
begs_this_n = begs[1,begs[0]==iu] # indeces belong to "d" (the difference trace)
ends_this_n = ends[1,ends[0]==iu]
# gap between event onsets will be begs(next event) - ends(current event)
if evs[iu, 0]==False and evs[iu, -1]==False: # normal case
#len(begs_this_n) == len(ends_this_n):
b = begs_this_n[1:]
e = ends_this_n[:-1]
bgap = [begs_this_n[0] + 1] # after how many frames the first event happened
egap = [evs.shape[1] - ends_this_n[-1] - 1] # how many frames with no event exist after the last event
elif evs[iu, 0]==True and evs[iu, -1]==False: # first event was already going when the recording started.
#len(begs_this_n)+1 == len(ends_this_n):
b = begs_this_n
e = ends_this_n[:-1]
bgap = []
egap = [evs.shape[1] - ends_this_n[-1] - 1]
elif evs[iu, 0]==False and evs[iu, -1]==True: # last event was still going on when the recording ended.
#len(begs_this_n) == len(ends_this_n)+1:
b = begs_this_n[1:]
e = ends_this_n
bgap = [begs_this_n[0] + 1]
egap = []
elif evs[iu, 0]==True and evs[iu, -1]==True: # first event and last event were happening when the recording started and ended.
# print('cool :)')
b = begs_this_n
e = ends_this_n
bgap = []
egap = []
else:
sys.exit('doesnt make sense! plot d to debug')
gap_this_n = b - e
gap_this = np.concatenate((bgap, gap_this_n, egap)).astype(int) # includes all gaps, before the 1st event, between events, and after the last event.
else: # there are no events in this neuron; set everything to nan
gap_this = np.nan
gap_this_n = np.nan
b = np.nan
e = np.nan
bgap = np.nan
egap = np.nan
gap_evs_all.append(gap_this) # includes the gap before the 1st event and the gap before the last event too.
gap_evs.append(gap_this_n) # only includes gaps between events: b - e
begs_evs.append(b)
ends_evs.append(e)
bgap_evs.append(bgap)
egap_evs.append(egap)
gap_evs_all = np.array(gap_evs_all) # size: number of neurons; each element shows the gap between events for a given neuron
gap_evs = np.array(gap_evs)
begs_evs = np.array(begs_evs)
ends_evs = np.array(ends_evs)
bgap_evs = np.array(bgap_evs)
egap_evs = np.array(egap_evs)
return gap_evs_all, begs_evs, ends_evs, gap_evs, bgap_evs, egap_evs, begs, ends
#%% shift and scale traces so they can be superimposed and easily compared.
# Note: this code needs more work to take care of cases where max(trace) is 0 or inf.
def supimpose(tt):
# tt: neurons x frames
p = np.nanmean(tt, axis=0)
p = p-p[0]
# p = p / min(max(p),40)
p = p / max(p) # this needs work... take care of 0, also inf,
return p
#%% Pool all sessions and layers for each area, do this for each mouse
def pool_sesss_planes_eachArea(area, y):
# area: ['LM', 'LM', 'LM', 'LM', 'VISp', 'VISp', 'VISp', 'VISp', 'LM', 'LM', 'LM', 'LM', 'VISp', 'VISp', 'VISp', 'VISp'] # (8*num_sessions)
# y : (8*num_sessions) x time or # (8*num_sessions)
# set area indeces
# area = trace_peak_allMice.iloc[im]['area'] # (8*num_sessions)
distinct_areas, i_areas = np.unique(area, return_inverse=True)
# print(distinct_areas)
if len(distinct_areas) > 2:
sys.exit('There should not be more than two areas!! Fix the area names!')
# below has size: num_areas x (num_layers_per_area x num_sessions) x nFrames_upsampled
# so, 2 x (4 x num_sessions) x nFrames_upsampled
# For each area, first take all the layers of session 1, then take all the layers of session 2
y_pooled_sesss_planes_eachArea = np.array([y[i_areas == ida] for ida in range(len(distinct_areas))]) # 2 x (8/2 x num_sessions) x nFrames_upsampled
return y_pooled_sesss_planes_eachArea, distinct_areas, i_areas
#%% For each layer (depth), pool data across sessions and areas
def pool_sesss_areas_eachDepth(planes_allsess, y, num_depth=4):
# planes_allsess: [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7] # (8*num_sessions)
# y: (8*num_sessions) x time or (8*num_sessions)
# num_depth: 4
y_pooled_sesss_areas_eachDepth = []
for idepth in range(num_depth): # idepth = 0
# merge data with the same depth across 2 areas
# For each depth, first take all the areas of session 1, then take all the areas of session 2
b = np.logical_or(planes_allsess==idepth, planes_allsess==idepth + num_depth)
a = y[b] #np.array([t[b]]).squeeze() # (2 x num_sess) x nFrames_upsampled
y_pooled_sesss_areas_eachDepth.append(a)
y_pooled_sesss_areas_eachDepth = np.array(y_pooled_sesss_areas_eachDepth) # 4 x (2 x num_sess) x nFrames_upsampled # 4 is the number of distinct depths: depth1_area2
return y_pooled_sesss_areas_eachDepth
#%% Add to the plots the following: flash/ gray screen lines , proper tick marks, and legend
def plot_flashLines_ticks_legend(lims, H, flashes_win_trace_index_unq_time, grays_win_trace_index_unq_time, x='', bbox_to_anchor=(1, .7), ylab='% Classification accuracy', xmjn='', xlab='Time after omission (sec)', omit_aligned=0):
# h1 = plt.plot()
# plot_flashLines_ticks_legend([], h1, flashes_win_trace_index_unq_time, grays_win_trace_index_unq_time, time_trace, bbox_to_anchor=bb, ylab=ylabel, xmjn=xmjn)
if len(lims)!=0: # set lims=[] when lims is not known.
mn = lims[0] # np.round(np.nanmin(av_test_shfl_avSess_eachP)) # 45
mx = lims[1] # np.round(np.nanmax(av_test_data_avSess_eachP)) # 80
# rg = (mx - mn) / 10.
else:
mn = plt.gca().get_ylim()[0];
mx = plt.gca().get_ylim()[1];
# mark omission onset
plt.vlines([0], mn, mx, color='k', linestyle='--')
# mark flash duration with a shaded area
### NOTE: you should remove 0 from flashes_win_trace_index_unq_time
### because at time 0, there is no flash, there is omission!!
flashes_win_trace_index_unq_time0 = flashes_win_trace_index_unq_time
flash_dur = .25 #np.unique(grays_win_trace_index_unq_time - flashes_win_trace_index_unq_time0)
if omit_aligned:
omit_ind = np.argwhere(flashes_win_trace_index_unq_time0==0).squeeze()
# flashes_win_trace_index_unq_time = np.delete(flashes_win_trace_index_unq_time, omit_ind)
flashes_win_trace_index_unq_time_new = np.delete(flashes_win_trace_index_unq_time0, omit_ind)
else:
flashes_win_trace_index_unq_time_new = flashes_win_trace_index_unq_time0
for i in range(len(flashes_win_trace_index_unq_time_new)):
plt.axvspan(flashes_win_trace_index_unq_time_new[i], flashes_win_trace_index_unq_time_new[i] + flash_dur, alpha=0.2, facecolor='y')
'''
# mark the onset of flashes
plt.vlines(flashes_win_trace_index_unq_time_new, mn, mx, color='y', linestyle='-.', linewidth=.7)
# mark the onset of grays
plt.vlines(grays_win_trace_index_unq_time, mn, mx, color='gray', linestyle=':', linewidth=.7)
'''
if xmjn=='':
xmj = np.unique(np.concatenate((np.arange(0, x[0], -.5), np.arange(0, x[-1], .5))))
xmn = np.arange(.25, x[-1], .5)
else:
xmj = xmjn[0]
xmn = xmjn[1]
ax = plt.gca()
ax.set_xticks(xmj); # plt.xticks(np.arange(0,x[-1],.25)); #, fontsize=10)
ax.set_xticklabels(xmj, rotation=45)
ax.xaxis.set_minor_locator(ticker.FixedLocator(xmn))
ax.tick_params(labelsize=10, length=6, width=2, which='major')
ax.tick_params(labelsize=10, length=5, width=1, which='minor')
# plt.xticklabels(np.arange(0,x[-1],.25))
if len(H)!=0:
plt.legend(handles=H, loc='center left', bbox_to_anchor=bbox_to_anchor, frameon=False, handlelength=1, fontsize=12)
plt.ylabel(ylab, fontsize=12)
plt.xlabel(xlab, fontsize=12)
if len(lims)!=0:
if ~np.isnan(lims).any():
plt.ylim(lims)
# plt.legend(handles=[h1[0], h1[1], h1[2], h1[3], h1[4], h1[5], h1[6], h1[7], h2], loc='center left', bbox_to_anchor=(1, .7), frameon=False, handlelength=1, fontsize=12)
plt.grid(False) # plt.box(on=None) # plt.axis(True)
seaborn.despine()#left=True, bottom=True, right=False, top=False)
#%% Look for a file in a directory; if desired, sort by modification time, and only return the latest file.
# example inputs:
# name = 'all_sess_%s_.' %(analysis_name)
# name = 'Yneuron%d_model_' %neuron_y
# Set the name of all_sess h5 files made in *_init scripts which ran the analysis of interest, named analysis_name, and exist in dir_now
def all_sess_set_h5_fileName(name, dir_now, all_files=0):
regex = re.compile(name) # + '.h5')
# regex = re.compile(aname + '(.*).hdf5') # note: "aname" does not include the directory path; it's just the file name.
l = os.listdir(dir_now)
h5_files = [string for string in l if re.match(regex, string)] # string=l[0]
if len(h5_files)==0:
print('Error: no h5 file exists!!!')
allSessName = ''
print(regex)
if all_files==0: # only get the latest file, otherwise get all file names
# Get the modification times of the existing analysis folders
modifTimes = [os.path.getmtime(os.path.join(dir_now, h5_files[i])) for i in range(len(h5_files))]
# Find all the old analysis folders
if len(modifTimes) > 1:
h5_files = np.array(h5_files)[np.argsort(modifTimes).squeeze()]
print(h5_files)
if len(h5_files)==0:
print('h5 file does not exist! (run svm_init to call svm_plots_init and save all_sess)')
elif len(h5_files)>1:
print('More than 1 h5 file exists! Loading the latest file')
allSessName = os.path.join(dir_now, h5_files[-1])
else:
allSessName = os.path.join(dir_now, h5_files[0])
print(allSessName)
else:
allSessName = []
for i in range(len(h5_files)):
allSessName.append(os.path.join(dir_now, h5_files[i]))
print(allSessName)
# read hdf file