-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparseFile.java
147 lines (120 loc) · 3.61 KB
/
parseFile.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
import java.util.HashMap;
import java.util.Map;
public class parseFile{
private ArrayList<Integer> positionV;
private ArrayList<String> sequenceV;
private Integer length;
/**Creates a parsed file to be accepted by the main method
* @throws FileNotFoundException **/
public parseFile(File newFile) throws FileNotFoundException{
positionV = new ArrayList<Integer>();
positionV.add(0); //fixes indexing
sequenceV = new ArrayList<String>();
sequenceV.add("#"); //fixes indexing
ArrayList<String> initArray = new ArrayList<String>();
Scanner reader = new Scanner(newFile);
//eliminates initial 4 lines of non-sequence related data
reader.nextLine();
reader.nextLine();
reader.nextLine();
reader.nextLine();
//Begins sorting through sequence data
//this gets rid of whitespace (ex spaces, tabs, etc.)
while(reader.hasNext()){
String ln = reader.nextLine();
if(ln.matches("\\d+\\s+\\w\\s+\\d+")){
String[] sa = ln.split("\\s+");
String base = sa[1];
int pos = Integer.valueOf(sa[2]);
positionV.add(pos);
sequenceV.add(base);
}
//initArray.add(reader.nextLine());
}
// int count = 0;
// while(count < initArray.size()){
// StringBuilder temp = new StringBuilder(initArray.get(count));
//
// //deletes bp position and all spaces in each line
// int inCount = 0;
// while(temp.charAt(inCount) != ' '){
// temp.deleteCharAt(inCount);
// }
// temp.deleteCharAt(0);
//
// sequenceV.add(""+temp.charAt(0));
//
// //deletes nucleotide and space after
// temp.delete(0, 2);
//
// //puts matching location sequence into a new arraylist called position
// positionV.add(new Integer(temp.toString()));
//
// count++;
// }
reader.close();
}
//remvoes all noncanonicals from the sequence
public void removeNonCanonicals(){
int count = 0;
ArrayList<Integer> positionTemp = new ArrayList<Integer>();
//Cycles through the stored sequence
while(count < positionV.size()){
//If position is paired...
Integer x;
if((x = positionV.get(count)) != 0){
//if base pairing follows Watson-Crick add pairing back into paired region
if(LocCheck(sequenceV.get(count), sequenceV.get(x))){
positionTemp.add(x);
}
else{
//If position does not follow Watson-Crick, it is removed as a base-paired region
positionTemp.add(0);
}
}else{
//Adds zero if original position does not have pairing
positionTemp.add(0);
}
count++;
}
positionV = positionTemp;
}
//returns created sequence vector
public ArrayList<String> sequences(){
return sequenceV;
}
//returns created position vector
public ArrayList<Integer> positions(){
return positionV;
}
//returns length of the sequence
public int length(){
int count = 0;
for(int i = 0; i < positionV.size(); i++){
if(positionV.get(i)!=0){
count++;
}
}
return positionV.size();
}
private boolean LocCheck(String bp1, String bp2){
if(bp1.equals("C") && bp2.equals("G")){
return true;
}
else if(bp1.equals("G") && (bp2.equals("C") || bp2.equals("U"))){
return true;
}
else if(bp1.equals("A") && bp2.equals("U")){
return true;
}
else if(bp1.equals("U") && (bp2.equals("A") || bp2.equals("G"))){
return true;
}
else{
return false;
}
}
}