1
+ use std:: io:: Cursor ;
1
2
use std:: path:: Path ;
2
- use csv:: ReaderBuilder ;
3
+ use csv:: { Reader , ReaderBuilder } ;
3
4
use tokio:: fs;
4
5
use tokio:: fs:: File ;
5
6
use tokio:: io:: { AsyncSeekExt , AsyncWriteExt } ;
6
7
7
8
8
9
pub type Day = u64 ;
9
10
11
+ pub type Mouse = String ;
12
+
10
13
pub type Label = String ;
11
14
12
- pub struct Protein {
15
+ #[ derive( Debug ) ]
16
+ pub struct Peptide {
13
17
pub name : String ,
14
- pub peptide : String ,
18
+ pub protein : String ,
15
19
pub charge_mass_ratio : f64 ,
16
- pub insensities : Vec < Option < u64 > > ,
20
+ pub intensities : Vec < Option < u64 > > ,
17
21
}
18
22
19
23
pub async fn parse ( spreadsheet : & Path )
20
24
-> Result <
21
- ( Vec < Day > , Vec < Label > , Vec < Protein > ) ,
25
+ ( Vec < Day > , Vec < Mouse > , Vec < Label > , Vec < Peptide > ) ,
22
26
Box < dyn std:: error:: Error >
23
27
>
24
28
{
25
- let mut file = File :: open ( spreadsheet) . await ?;
29
+ let contents = fs :: read ( spreadsheet) . await ?;
26
30
let mut rdr = ReaderBuilder :: new ( )
27
31
. has_headers ( false )
28
- . from_reader ( file. seek ( 1 ) ) ;
32
+ . from_reader ( Cursor :: new ( contents) ) ;
33
+
34
+ let ( days, mice, labels) = extract_headers ( & mut rdr) ?;
35
+ let peptides = extract_peptides ( & mut rdr) ?;
36
+
37
+ Ok ( ( days, mice, labels, peptides) )
38
+ }
39
+
40
+ fn extract_peptides ( rdr : & mut Reader < Cursor < Vec < u8 > > > ) -> Result < Vec < Peptide > , Box < dyn std:: error:: Error > > {
41
+ let mut peptides = vec ! [ ] ;
42
+
43
+ for result in rdr. records ( ) . skip ( 1 ) {
44
+ let record = result?;
45
+ let name = record[ 0 ] . to_string ( ) ;
46
+ let protein = record[ 1 ] . to_string ( ) ;
47
+ let charge_mass_ratio = record[ 2 ] . parse :: < f64 > ( ) ?;
48
+ let intensities = record. iter ( ) . skip ( 3 ) . map ( |value| {
49
+ if value == "#N/A" {
50
+ None
51
+ } else {
52
+ value. parse :: < u64 > ( ) . ok ( )
53
+ }
54
+ } )
55
+ . collect :: < Vec < Option < u64 > > > ( ) ;
56
+
57
+ peptides. push ( Peptide {
58
+ name,
59
+ protein,
60
+ charge_mass_ratio,
61
+ intensities,
62
+ } ) ;
63
+ }
64
+
65
+ Ok ( peptides)
66
+ }
67
+
68
+ fn extract_headers ( rdr : & mut Reader < Cursor < Vec < u8 > > > ) -> Result < ( Vec < Day > , Vec < Mouse > , Vec < Label > ) , Box < dyn std:: error:: Error > > {
69
+ let mut non_empty_row_count = 0 ;
70
+ let mut days = vec ! [ ] ;
71
+ let mut mice = vec ! [ ] ;
72
+ let mut labels = vec ! [ ] ;
73
+
74
+ for row in rdr. records ( ) {
75
+ let record = row?;
76
+ if record. iter ( ) . any ( |field| !field. is_empty ( ) ) {
77
+ non_empty_row_count += 1 ;
78
+
79
+ if non_empty_row_count == 1 {
80
+ days = record
81
+ . iter ( )
82
+ . skip ( 3 )
83
+ . map ( |col| col. to_string ( ) . parse :: < Day > ( ) . unwrap ( ) )
84
+ . collect :: < Vec < _ > > ( ) ;
85
+ }
86
+
87
+ if non_empty_row_count == 2 {
88
+ mice = record
89
+ . iter ( )
90
+ . skip ( 3 )
91
+ . map ( |col| col. to_string ( ) . parse :: < Mouse > ( ) . unwrap ( ) )
92
+ . collect :: < Vec < _ > > ( ) ;
93
+ }
94
+
95
+ if non_empty_row_count == 3 {
96
+ labels = record
97
+ . iter ( )
98
+ . skip ( 3 )
99
+ . map ( |col| col. to_string ( ) . parse :: < Label > ( ) . unwrap ( ) )
100
+ . collect :: < Vec < _ > > ( ) ;
101
+ }
29
102
103
+ if non_empty_row_count == 4 {
104
+ break ;
105
+ }
106
+ }
107
+ }
30
108
109
+ Ok ( ( days, mice, labels) )
31
110
}
0 commit comments