Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add files via upload #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 18 additions & 27 deletions makeOnemapInput.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
# Ezra Huscher, December 2020
# feed in initial VCF (.vcf) file,
# -------------------------------------
# Ezra Huscher, March 2021
# feed in filtered VCF (.vcf) file,
# convert it to a onemap input file
# reference: https://cristianetaniguti.github.io/htmls/Inbred_Based_Populations.html

# USAGE: bash makeOnemapInput.sh final_vcf.vcf
# ------------------------------------
orig_vcf=$1

# Add these 2 lines to the top:
#data type f2 backcross
#102 16552 0 0 0
#ID1 ID2 ID3 ID4 ID5 ID6 ID7 ID8 ID9 ID10 ID11 ID12 ID13 ID14 ID15 ID16 ID17 ID18 ID19 ID20 ID21 ID22 ID23 ID24 ID25 ID26 ID27 ID28 ID29 ID30 ID31 ID32 ID33 ID34 ID35 ID36 ID37 ID38 ID39 ID40 ID41 ID42 ID43 ID44 ID45 ID46 ID47 ID48 ID49 ID50 ID51 ID52 ID53 ID54 ID55 ID56 ID57 ID58 ID59 ID60 ID61 ID62 ID63 ID64 ID65 ID66 ID67 ID68 ID69 ID70 ID71 ID72 ID73 ID74 ID75 ID76 ID77 ID78 ID79 ID80 ID81 ID82 ID83 ID84 ID85 ID86 ID87 ID88 ID89 ID90 ID91 ID92 ID93 ID94 ID95 ID96 ID97 ID98 ID99 ID100 ID101 ID102
# Create a separate header file with these 3 lines at the top:
echo "data type f2 backcross" > onemap_header.txt
echo "102 16552 0 0 0" >> onemap_header.txt
echo "ID1 ID2 ID3 ID4 ID5 ID6 ID7 ID8 ID9 ID10 ID11 ID12 ID13 ID14 ID15 ID16 ID17 ID18 ID19 ID20 ID21 ID22 ID23 ID24 ID25 ID26 ID27 ID28 ID29 ID30 ID31 ID32 ID33 ID34 ID35 ID36 ID37 ID38 ID39 ID40 ID41 ID42 ID43 ID44 ID45 ID46 ID47 ID48 ID49 ID50 ID51 ID52 ID53 ID54 ID55 ID56 ID57 ID58 ID59 ID60 ID61 ID62 ID63 ID64 ID65 ID66 ID67 ID68 ID69 ID70 ID71 ID72 ID73 ID74 ID75 ID76 ID77 ID78 ID79 ID80 ID81 ID82 ID83 ID84 ID85 ID86 ID87 ID88 ID89 ID90 ID91 ID92 ID93 ID94 ID95 ID96 ID97 ID98 ID99 ID100 ID101 ID102" >> onemap_header.txt

# Filter by quality and depth, if necessary
#grep -v "#" $orig_vcf #| awk '$6 > 800 && $4 != "N" && $5 !~ ","' > temp.vcf
# Add chromosome information from VCF file. What to do with all fragments (ctgX, etc)? Currently labeling as "11"
# grep -v "#" FinalFinalFinalGenotypedGATK_SC_SNV_FD_HighDepth.g.vcf | cut -f 1 | sed 's/ctg..../11/g' | sed 's/ctg.../11/g' | sed 's/ctg../11/g' | datamash transpose | sed 's/\t/ /g' | less -S

# Remove header from VCF
grep -v "#" $orig_vcf > temp1.txt
Expand All @@ -19,7 +23,7 @@ grep -v "#" $orig_vcf > temp1.txt
cut -f10- temp1.txt > temp2.txt

# Add first column which onemap requires, i.e. *M1, *M2, etc
#awk '{print "*M"NR,$0}' temp2.txt > temp3.txt
# and add 2nd column which specifies the marker for a backcross (A.H)
awk '{print "*M"NR,"A.H",$0}' temp2.txt > temp3.txt

# Sometimes there are vertical pipes "|". replace these with "/"s.
Expand All @@ -38,32 +42,19 @@ sed -i 's/0\/1/ab/g' temp3.txt
sed -i 's/1\/0/ab/g' temp3.txt

# Replace 0/0 with b
sed -i 's/1\/1/a/g' temp3.txt
sed -i 's/1\/1/b/g' temp3.txt

# Replace ./. with -
sed -i 's/\.\/\./-/g' temp3.txt

# Combine header and marker information into one file
cat onemap_header.txt temp3.txt > final.raw

# Insert required header for onemap input file, # of individuals, # of positions, chromosome information (0 if none), physical position information, phenotypic data
#sed -i '1 i\102 16552 0 0 0' temp3.txt
#sed -i '1 i\data type f2 backcross' temp3.txt

# Replace any tabs with spaces
sed 's/\t/ /g' temp3.txt > onemap_input.raw

#sed 's/[01]\/[01]:0,0,0:*/-9\t-9\t/g' > temp2.txt
#sed 's/:[0-9]*,[0-9]*,[0-9]*:[0-9]*:[0-9]*//g' |
#sed 's/:[0-9]*,[0-9]*,[0-9]*//g' | replace :#*,#*,# with blank
#sed 's/0\/0/0\t0/g' | replace 0/0 with 0 0
#sed 's/0\/1/0\t1/g' | replace 0/1 with 0 1
#sed 's/1\/1/1\t1/g' | replace 1/1 with 1 1
#sed 's/\.\/\./-9\t-9\t/g' replace ./. with -9 -9

#| sed 's/|/\//g' | sed -E 's/(\S{3})\S*:\S*/\1/g' | sed 's/0\/0/0\t0/g' | sed 's/0\/1/0\t1/g' | sed 's/1\/1/1\t1/g' | sed 's/\.\/\./-9\t-9\t/g' > temp2.txt

sed 's/\t/ /g' final.raw > onemap_input.raw

#grep -v DP=[0-9][0-9]\; temp.vcf | cut -f1,2,10- | sed 's/[01]\/[01]:0,0,0:*/-9\t-9\t/g' | sed 's/:[0-9]*,[0-9]*,[0-9]*:[0-9]*:[0-9]*//g' | sed 's/:[0-9]*,[0-9]*,[0-9]*//g' | sed 's/0\/0/0\t0/g' | sed 's/0\/1/0\t1/g' | sed 's/1\/1/1\t1/g' | sed 's/\.\/\./-9\t-9\t/g' > temp2.txt
# Remove intermediate files
rm onemap_header.txt | rm final.raw | rm temp1.txt | rm temp2.txt | rm temp3.txt

echo 'Complete.'

Loading