forked from caraortizmah/x-ray_scripting_out
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstep5.sh
executable file
·116 lines (93 loc) · 4.68 KB
/
step5.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/bash
# B_ini and B_fin are the atom range that represents the
# residue of interest (res B) as virtual MO
# out_file is the core MO population in the specific format
# that was created previously in the step1.sh
B_ini="$1" #first atom number for residue B
B_fin="$2" #last atom number for residue B
#MO_ini="$3" #first 1s core MO
#MO_fin="$4" #last 1s core MO
out1_file="$3" # core MO population obtained from step1.sh
out1_file4="$4" # virt_MO.tmp core MO population obtained from step4.sh
# Selecting just the virtual MOs that represents the target atoms
# here called as the residue B (res B) because of the interest of studying
# amino acids on proteins.
# Delete tmp if necessary
rm -rf resB_mo_3.tmp resB_mo_2.tmp resB_mo_2_1.tmp resB_mo.tmp vmo_line.tmp
# Instancing unique virtual MOs in one variable
virt_mo="$(cat $out1_file4 | sort -nu | uniq)"
# Copying from the linenumber, where the MO target is, to the first blank
# line is found.
# In this temporary file, MOs are copied with a subsequent list of atoms
# that correspond to their population contributions to that MO
for ii in $virt_mo
do
sed -n "/ $ii /,/^$/p" $out1_file >> resB_mo2.tmp
done
# There are until 6 MOs placed in the same numberline in step1.sh output,
# it means that sections having a MO and its atom list contribution can be
# repeated up to 6 times. There may be redundancies.
# Removing duplicates, and preserving unique
# and throwing away stderr
awk '!seen[$0]++' resB_mo2.tmp > resB_mo3.tmp 2> /dev/null
for ii in $virt_mo # screening in the virtual MOs range
do
# getting position lines having redundancies
echo "$(grep -n " $ii " resB_mo3.tmp | cut -d':' -f1)" >> vmo_line.tmp
done
# Creating a list of uniq linenumber positions including the last linenumber
# of the file
echo "$(wc -l resB_mo3.tmp | cut -d" " -f1)" >> vmo_line.tmp
uniq_vmo_l="$(cat vmo_line.tmp | sort -nu | uniq)"
# The previous list (uniq_vmol_l) now is organized by tuples
# where the first position of the tuple is the initial linenumber of the
# MO-atom-list section and the second position of the tuple is the last
# linenumber of that MO-atom-list section
echo $uniq_vmo_l | awk -F" " '{for (i=1; i<NF; i++) print $i,$(i+1)}' > vmo_line.tmp
# Each line in vmo_line.tmp corresponds to a range linenumber of virtual
# MO-atom-list section.
# All the virtual MO-atom-list sections were copied (no redundancies)
# previously in the temporary file resB_mo3.tmp
# for each virtual MO-atom-list section, do:
while read -r line # virtual MOs
do
row1="$(echo $line | awk '{print $1}')" #initial position
row2="$(echo $line | awk '{print $2}')" #final position
#virt_mo="$(awk '{printf "%s ", $0}' $out1_file4)"
for jj in $( seq $B_ini 1 $B_fin ) #screening in the atom range
do
# getting MO number list (usually 6 MOs) in that specified position line
head="$(sed -n ''"$row1"'p' resB_mo3.tmp)"
# looking for a specific atom ($jj), with some specific pattern (grep command), in
# a specific range linenumber (sed command) in the file resB_mo3.tmp.
# After cutting it and taking the second field (cut command). The numerical match
# is done (1st awk command) and print it just if contains 9 fields (2nd awk command)
# as in the original out file
sed -n ''"$row1"','"$row2"'p' resB_mo3.tmp | grep -n "${jj} " | cut -d':' -f2 | \
awk -v x=${jj} '{if($1==x) print $0}' | awk '{if(NF==9) print $0}' > resB_mo_2_1.tmp
# Print $head as first line and after the line pattern found
# in the temporary file resB_mo_2_1.tmp
# Last command above: (...) | awk '{if($1==x) print $0}' is to avoid wrong string
# matches e.g. '8 C' and '78 C'
awk -v x="${head}" '{printf "num-1 sym lvl %s\n%s\n\n", x, $0}' resB_mo_2_1.tmp >> resB_mo_2.tmp
# DO NOT REMOVE THIS COMMENTED CODE
# separating, even for the same atom number, by MO level (s,p,d)
#if (( $(wc -l resB_mo_2_1.tmp | cut -d' ' -f1) > 1)); then
#echo "here" $head
#else
# awk -v x="${head}" '{printf "num-1 sym lvl %s\n\n", x, $0}' resB_mo_2_1.tmp >> resB_mo_2.tmp
#echo " " | awk '{printf "\n"}' >> resB_mo_2_1.tmp
#mv resB_mo_2_1.tmp resB_mo_2.tmp
#fi
#$"(grep -n "${jj} " resB_mo.tmp)" resB_mo_2.tmp
done
done < vmo_line.tmp
# Removing duplicates and throwing away stderr
awk '!seen[$0]++' resB_mo_2.tmp > resB_mo_3.tmp 2> /dev/null
# Removing empty lines
sed -i '/^$/d' resB_mo_3.tmp
echo " " >> resB_mo_3.tmp
mv resB_mo_3.tmp resB_mo.out
# Comment the following line to check the writing-on-disk process
rm -rf resB_mo_2.tmp resB_mo_2_1.tmp resB_mo.tmp
# One file as output from this script (resB_mo.out)