diff --git a/.travis.yml b/.travis.yml index f4ac587..4e0d390 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,9 +11,9 @@ before_install: # PRs to master are only ok if coming from dev branch - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])' # Pull the docker image first so the test doesn't wait for this - - docker pull nfcore/lncpipe + - docker pull nfcore/lncpipe:dev # Fake the tag locally so that the pipeline runs properly - - docker tag nfcore/lncpipe nfcore/lncpipe:latest + - docker tag nfcore/lncpipe:dev nfcore/lncpipe:dev install: # Install Nextflow @@ -27,10 +27,14 @@ install: env: - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work - - NXF_VER='' # Plus: get the latest NF version and check that it works + # - NXF_VER='' # Plus: get the latest NF version and check that it works script: # Lint the pipeline code - nf-core lint ${TRAVIS_BUILD_DIR} + # download the test data + - wget http://cancerbio.info/pub/lncpipe/testdata.tar.gz + - tar -xvzf testdata.tar.gz + - cd testdata # Run the pipeline with the test profile - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker diff --git a/CHANGELOG.md b/CHANGELOG.md index 70e77fa..d225e28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,3 +2,5 @@ ## v1.0dev - Initial release of nf-core/lncpipe, created with the [nf-core](http://nf-co.re/) template. + * add rename the process name that solved the duplicated processed error by update Nextflow version + * fix typos. diff --git a/Dockerfile b/Dockerfile index 163edfc..ac17fb6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,135 +1,10 @@ -FROM ubuntu LABEL authors="zhaoqi@sysucc.org.cn,sun_yu@mail.nankai.edu.cn" \ - description="Docker image containing all requirements for LncPipe" -# Update OS # DEBIAN_FRONTEND=noninteractive is for relieving the dependence of readline perl library by prohibiting interactive frontend # default-jre is for NextFlow (run groovy) # gcc and g++ is for compiling CPAT, PLEK as well as some R packages # gfortran is for compiling R package hexbin (required by plotly) # make is for executing makefiles for several tools # Cython provides C header files like Python.h for CPAT compiling # DO NOT use pip for installing Cython, which will cause missing .h files # zlib1g-dev is for CPAT compiling dependency # libncurses5-dev for samtools (may be used later) # libssl-dev is for R package openssl # libcurl4-openssl-dev is for R package curl # perl brings us FindBin module, which is required by FastQC # ca-certificates is required by aria2 RUN export DEBIAN_FRONTEND=noninteractive && \ - apt-get -qq update && \ - apt-get -qq install -y --no-install-recommends \ - default-jre \ - unzip \ - pbzip2 \ - pigz \ - aria2 \ - gcc \ - g++ \ - gfortran \ - make \ - python-dev \ - cython \ - zlib1g-dev \ - libssl-dev \ - libcurl4-openssl-dev \ - perl \ - ca-certificates -# Install latest pip WITHOUT wheel and setuptools # DO NOT use apt-get python-pip in ubuntu for preventing from complicated related tools and libraries # Setuptools is required by CPAT during its installation RUN aria2c https://bootstrap.pypa.io/get-pip.py -q -o /opt/get-pip.py && \ - python /opt/get-pip.py --no-wheel && \ - rm /opt/get-pip.py -# Install required python packages RUN pip -qqq install numpy -# Install nextflow RUN aria2c https://github.com/nextflow-io/nextflow/releases/download/v0.25.6/nextflow -q -o /opt/nextflow && \ - chmod 777 /opt/nextflow && \ - ln -s /opt/nextflow /usr/local/bin -# Install STAR RUN aria2c https://raw.githubusercontent.com/alexdobin/STAR/master/bin/Linux_x86_64/STAR -q -o /opt/STAR && \ - chmod 777 /opt/STAR && \ - ln -s /opt/STAR /usr/local/bin -# Install cufflinks RUN aria2c https://github.com/bioinformatist/cufflinks/releases/download/v2.2.1/cufflinks-2.2.1.Linux_x86_64.tar.gz -q -o /opt/cufflinks-2.2.1.Linux_x86_64.tar.gz && \ - tar xf /opt/cufflinks-2.2.1.Linux_x86_64.tar.gz --use-compress-prog=pigz -C /opt/ && \ - rm /opt/cufflinks-2.2.1.Linux_x86_64/README && \ - ln -s /opt/cufflinks-2.2.1.Linux_x86_64/* /usr/local/bin/ && \ - rm /opt/cufflinks-2.2.1.Linux_x86_64.tar.gz -# Install PLEK # Remove documents, demo files, source files, object files and R scripts # dos2unix in perl one-liner: remove BOM head and deal with \r problem RUN aria2c https://ncu.dl.sourceforge.net/project/plek/PLEK.1.2.tar.gz -q -o /opt/PLEK.1.2.tar.gz && \ - tar xf /opt/PLEK.1.2.tar.gz --use-compress-prog=pigz -C /opt/ && \ - cd /opt/PLEK.1.2/ && \ - python PLEK_setup.py || : && \ - rm *.pdf *.txt *.h *.c *.fa *.cpp *.o *.R *.doc PLEK_setup.py && \ - chmod 777 * && \ - perl -CD -pi -e'tr/\x{feff}//d && s/[\r\n]+/\n/' *.py && \ - ln -s /opt/PLEK.1.2/PLEK* /usr/local/bin/ && \ - ln -s /opt/PLEK.1.2/svm* /usr/local/bin/ && \ - rm /opt/PLEK.1.2.tar.gz -# Use bash instead for shopt only works with bash SHELL ["/bin/bash", "-c"] # Install CNCI # Enable the extglob shell option # Parentheses and the pipe symbol should be escaped RUN aria2c https://codeload.github.com/www-bioinfo-org/CNCI/zip/master -q -o /opt/CNCI-master.zip && \ - unzip -qq /opt/CNCI-master.zip -d /opt/ && \ - rm /opt/CNCI-master.zip && \ - unzip -qq /opt/CNCI-master/libsvm-3.0.zip -d /opt/CNCI-master/ && \ - rm /opt/CNCI-master/libsvm-3.0.zip && \ - cd /opt/CNCI-master/libsvm-3.0 && \ - make > /dev/null 2>&1 && \ - shopt -s extglob && \ - rm -rfv !\("svm-predict"\|"svm-scale"\) && \ - cd .. && \ - rm draw_class_pie.R LICENSE README.md && \ - chmod -R 777 * && \ - ln -s /opt/CNCI-master/*.py /usr/local/bin/ -# Install CPAT # DO NOT use absolute path when setup, and changing directory is necessary. Python interpreter will check current directory for dependencies # Remove line 21 from setup.py: distribute_setup::use_setuptools() for: https://stackoverflow.com/questions/46967488/getting-error-403-while-installing-package-with-pip/46979531#46979531 RUN aria2c https://nchc.dl.sourceforge.net/project/rna-cpat/v1.2.3/CPAT-1.2.3.tar.gz -q -o /opt/CPAT-1.2.3.tar.gz && \ - tar xf /opt/CPAT-1.2.3.tar.gz --use-compress-prog=pigz -C /opt/ && \ - cd /opt/CPAT-1.2.3/ && \ - perl -i -lanE'say unless $. == 21' setup.py && \ - python setup.py install && \ - rm -rfv !"dat" && \ - chmod -R 777 * -# Set back to default shell SHELL ["/bin/sh", "-c"] # Install StringTie RUN aria2c http://ccb.jhu.edu/software/stringtie/dl/stringtie-1.3.3b.Linux_x86_64.tar.gz -q -o /opt/stringtie-1.3.3b.Linux_x86_64.tar.gz && \ - tar xf /opt/stringtie-1.3.3b.Linux_x86_64.tar.gz --use-compress-prog=pigz -C /opt/ && \ - rm /opt/stringtie-1.3.3b.Linux_x86_64/README && \ - ln -s /opt/stringtie-1.3.3b.Linux_x86_64/stringtie /usr/local/bin/stringtie && \ - rm /opt/stringtie-1.3.3b.Linux_x86_64.tar.gz -# Install Hisat2 RUN aria2c ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.1.0-Linux_x86_64.zip -q -o /opt/hisat2-2.1.0-Linux_x86_64.zip && \ - unzip -qq /opt/hisat2-2.1.0-Linux_x86_64.zip -d /opt/ && \ - rm /opt/hisat2-2.1.0-Linux_x86_64.zip && \ - cd /opt/hisat2-2.1.0 && \ - rm -rf doc example *debug MANUAL* NEWS TUTORIAL && \ - ln -s /opt/hisat2-2.1.0/hisat2* /usr/local/bin/ && \ - ln -sf /opt/hisat2-2.1.0/*.py /usr/local/bin/ -# Install Kallisto # There's some trashy pointers in Kallisto tarball archieve RUN aria2c https://github.com/pachterlab/kallisto/releases/download/v0.43.1/kallisto_linux-v0.43.1.tar.gz -q -o /opt/kallisto_linux-v0.43.1.tar.gz && \ - tar xf /opt/kallisto_linux-v0.43.1.tar.gz --use-compress-prog=pigz -C /opt/ && \ - cd /opt && \ - rm ._* kallisto_linux-v0.43.1.tar.gz && \ - cd kallisto_linux-v0.43.1 && \ - rm -rf ._* README.md test && \ - ln -s /opt/kallisto_linux-v0.43.1/kallisto /usr/local/bin/ -# Install Microsoft-R-Open with MKL, you must use MRO v3.4.2 or later # For more, see this GitHub issue comment: https://github.com/Microsoft/microsoft-r-open/issues/26#issuecomment-340276347 RUN aria2c https://mran.blob.core.windows.net/install/mro/3.4.2/microsoft-r-open-3.4.2.tar.gz -q -o /opt/microsoft-r-open-3.4.2.tar.gz && \ - tar xf /opt/microsoft-r-open-3.4.2.tar.gz --use-compress-prog=pigz -C /opt/ && \ - cd /opt/microsoft-r-open && \ - ./install.sh -as && \ - rm -rf /opt/microsoft-r* -# Cleaning up the apt cache helps keep the image size down (must be placed here, since MRO installation need the cache) RUN rm -rf /var/lib/apt/lists/* -# Install cpanminus # RUN aria2c https://cpanmin.us/ -q -o /opt/cpanm && \ # chmod +x /opt/cpanm && \ # ln -s /opt/cpanm /usr/local/bin/ # Install Perl module FindBin, which is required by FastQC # RUN cpanm FindBin # Install FastQC RUN aria2c https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.5.zip -q -o /opt/fastqc_v0.11.5.zip && \ - unzip -qq /opt/fastqc_v0.11.5.zip -d /opt/ && \ - rm /opt/fastqc_v0.11.5.zip && \ - cd /opt/FastQC && \ - shopt -s extglob && \ - rm -rfv !\("fastqc"\|*.jar\) && \ - chmod 777 * && \ - ln -s /opt/FastQC/fastqc /usr/local/bin/ -# Install Pandoc (required by reporter) RUN aria2c https://github.com/jgm/pandoc/releases/download/1.19.2.1/pandoc-1.19.2.1-1-amd64.deb -q -o /opt/pandoc-1.19.2.1-1-amd64.deb && \ - dpkg -i /opt/pandoc-1.19.2.1-1-amd64.deb && \ - rm /opt/pandoc-1.19.2.1-1-amd64.deb -# Install PyPy RUN aria2c https://bitbucket.org/squeaky/portable-pypy/downloads/pypy-5.9-linux_x86_64-portable.tar.bz2 -q -o /opt/pypy-5.9-linux_x86_64-portable.tar.bz2 && \ - tar xf /opt/pypy-5.9-linux_x86_64-portable.tar.bz2 --use-compress-prog=pbzip2 -C /opt/ && \ - rm /opt/pypy-5.9-linux_x86_64-portable/README.rst /opt/pypy-5.9-linux_x86_64-portable.tar.bz2 && \ - ln -s /opt/pypy-5.9-linux_x86_64-portable/bin/pypy /usr/local/bin/ -# Install BEDOPS RUN aria2c https://github.com/bedops/bedops/releases/download/v2.4.29/bedops_linux_x86_64-v2.4.29.tar.bz2 -q -o /opt/bedops_linux_x86_64-v2.4.29.tar.bz2 && \ - tar xf /opt/bedops_linux_x86_64-v2.4.29.tar.bz2 --use-compress-prog=pbzip2 -C /opt/ && \ - ln -s /opt/bin/* /usr/local/bin/ && \ - rm /opt/bedops_linux_x86_64-v2.4.29.tar.bz2 -# Install AfterQC # Use PyPy to run AfterQC as default RUN aria2c https://github.com/OpenGene/AfterQC/archive/v0.9.7.tar.gz -q -o /opt/AfterQC-0.9.7.tar.gz && \ - tar xf /opt/AfterQC-0.9.7.tar.gz --use-compress-prog=pigz -C /opt/ && \ - cd /opt/AfterQC-0.9.7 && \ - make && \ - perl -i -lape's/python/pypy/ if $. == 1' after.py && \ - rm -rf Dockerfile Makefile README.md testdata report_sample && \ - rm editdistance/*.cpp editdistance/*.h && \ - ln -s /opt/AfterQC-0.9.7/*.py /usr/local/bin/ && \ - rm /opt/AfterQC-0.9.7.tar.gz -# Install R package LncPipeReporter(via GitHub) RUN Rscript -e "source('http://bioconductor.org/biocLite.R'); install.packages(c('curl', 'httr')); install.packages('devtools'); devtools::install_github('bioinformatist/LncPipeReporter')" -# Install GffCompare RUN aria2c https://github.com/gpertea/gffcompare/archive/master.zip -q -o /opt/gffcompare-master.zip && \ - aria2c https://github.com/gpertea/gclib/archive/master.zip -q -o /opt/gclib-master.zip && \ - unzip -qq /opt/gffcompare-master.zip -d /opt/ && \ - unzip -qq /opt/gclib-master.zip -d /opt/ && \ - rm /opt/gffcompare-master.zip /opt/gclib-master.zip && \ - mv /opt/gclib-master /opt/gclib && \ - cd /opt/gffcompare-master && \ - make release && \ - rm Makefile README.md gtf_tracking.h *.o *.cpp *.sh && \ - ln -s /opt/gffcompare-master/gffcompare /usr/local/bin/ -# Install sambamba RUN aria2c https://github.com/biod/sambamba/releases/download/v0.6.7/sambamba_v0.6.7_linux.tar.bz2 -q -o /opt/sambamba_v0.6.7_linux.tar.bz2 && \ - tar xf /opt/sambamba_v0.6.7_linux.tar.bz2 --use-compress-prog=pbzip2 -C /opt/ && \ - ln -s /opt/sambamba /usr/local/bin/ && \ - rm /opt/sambamba_v0.6.7_linux.tar.bz2 + +FROM nfcore/base +LABEL maintainer="zhaoqi@sysucc.org.cn" +LABEL authors="zhaoqi@sysucc.org.cn" \ + description="Docker image containing all requirements for the nfcore/lncpipe pipeline" + +COPY environment.yml / +RUN conda env create -f /environment.yml && conda clean -a +ENV PATH /opt/conda/envs/nf-core-lncpipe-1.0dev/bin:$PATH diff --git a/InstallSoftwareLocally.md b/InstallSoftwareLocally.md deleted file mode 100644 index e66d668..0000000 --- a/InstallSoftwareLocally.md +++ /dev/null @@ -1,175 +0,0 @@ -# Dependencies for run lncPipe Locally - -Prerequisites install command (required when docker image is not favored, you should execute them via root) - -* [HISAT2](https://ccb.jhu.edu/software/hisat2/index.shtml) - - - aria2c ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.1.0-Linux_x86_64.zip -q -o /opt/hisat2-2.1.0-Linux_x86_64.zip && \ - unzip -qq /opt/hisat2-2.1.0-Linux_x86_64.zip -d /opt/ && \ - rm /opt/hisat2-2.1.0-Linux_x86_64.zip && \ - cd /opt/hisat2-2.1.0 && \ - rm -rf doc example *debug MANUAL* NEWS TUTORIAL && \ - ln -s /opt/hisat2-2.1.0/hisat2* /usr/local/bin/ && \ - ln -sf /opt/hisat2-2.1.0/*.py /usr/local/bin/ - - -* [StringTie](http://www.ccb.jhu.edu/software/stringtie/) - - - aria2c http://ccb.jhu.edu/software/stringtie/dl/stringtie-1.3.3b.Linux_x86_64.tar.gz -q -o /opt/stringtie-1.3.3b.Linux_x86_64.tar.gz && \ - tar xf /opt/stringtie-1.3.3b.Linux_x86_64.tar.gz --use-compress-prog=pigz -C /opt/ && \ - rm /opt/stringtie-1.3.3b.Linux_x86_64/README && \ - ln -s /opt/stringtie-1.3.3b.Linux_x86_64/stringtie /usr/local/bin/stringtie && \ - rm /opt/stringtie-1.3.3b.Linux_x86_64.tar.gz - - -* [gffcompare](http://www.ccb.jhu.edu/software/stringtie/gff.shtml#gffcompare) - - - aria2c https://github.com/gpertea/gffcompare/archive/master.zip -q -o /opt/gffcompare-master.zip && \ - aria2c https://github.com/gpertea/gclib/archive/master.zip -q -o /opt/gclib-master.zip && \ - unzip -qq /opt/gffcompare-master.zip -d /opt/ && \ - unzip -qq /opt/gclib-master.zip -d /opt/ && \ - rm /opt/gffcompare-master.zip /opt/gclib-master.zip && \ - cd /opt/gffcompare-master && \ - make release - - -* [Bedops](http://bedops.readthedocs.io/en/latest/): - - - aria2c https://github.com/bedops/bedops/releases/download/v2.4.29/bedops_linux_x86_64-v2.4.29.tar.bz2 -q -o /opt/bedops_linux_x86_64-v2.4.29.tar.bz2 && \ - tar xf /opt/bedops_linux_x86_64-v2.4.29.tar.bz2 --use-compress-prog=pbzip2 -C /opt/ && \ - ln -s /opt/bin/* /usr/local/bin/ && \ - rm /opt/bedops_linux_x86_64-v2.4.29.tar.bz2 - - -* [PLEK](www.ibiomedical.net): - - - aria2c https://nchc.dl.sourceforge.net/project/plek/PLEK.1.2.tar.gz -q -o /opt/PLEK.1.2.tar.gz && \ - tar xf /opt/PLEK.1.2.tar.gz --use-compress-prog=pigz -C /opt/ && \ - cd /opt/PLEK.1.2/ && \ - python PLEK_setup.py || : && \ - rm *.pdf *.txt *.h *.c *.fa *.cpp *.o *.R *.doc PLEK_setup.py && \ - chmod 755 * && \ - perl -CD -pi -e'tr/\x{feff}//d && s/[\r\n]+/\n/' *.py && \ - ln -s /opt/PLEK.1.2/* /usr/local/bin/ && \ - rm /opt/PLEK.1.2.tar.gz - - -* [CNCI](https://github.com/www-bioinfo-org/CNCI): - - - aria2c https://codeload.github.com/www-bioinfo-org/CNCI/zip/master -q -o /opt/CNCI-master.zip && \ - unzip -qq /opt/CNCI-master.zip -d /opt/ && \ - rm /opt/CNCI-master.zip && \ - unzip -qq /opt/CNCI-master/libsvm-3.0.zip -d /opt/CNCI-master/ && \ - rm /opt/CNCI-master/libsvm-3.0.zip && \ - cd /opt/CNCI-master/libsvm-3.0 && \ - make > /dev/null 2>&1 && \ - shopt -s extglob && \ - rm -rfv !\("svm-predict"\|"svm-scale"\) && \ - cd .. && \ - rm draw_class_pie.R LICENSE README.md && \ - chmod -R 755 * && \ - ln -s /opt/CNCI-master/*.py /usr/local/bin/ - - -* [CPAT](http://rna-cpat.sourceforge.net):[Citation](https://academic.oup.com/nar/article/41/6/e74/2902455/CPAT-Coding-Potential-Assessment-Tool-using-an) - - - aria2c https://jaist.dl.sourceforge.net/project/rna-cpat/v1.2.3/CPAT-1.2.3.tar.gz -q -o /opt/CPAT-1.2.3.tar.gz && \ - tar xf /opt/CPAT-1.2.3.tar.gz --use-compress-prog=pigz -C /opt/ && \ - cd /opt/CPAT-1.2.3/ && \ - mv dat/* /LncPipeDB/ && \ - python setup.py install > /dev/null 2>&1 && \ - rm -rf /opt/CPAT* - - -* [fastp](https://github.com/OpenGene/fastp) - - RUN aria2c http://opengene.org/fastp/fastp -q -o /usr/local/bin/fastp && \ - chmod a+x /usr/local/bin/fastp - - -* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc) - - - aria2c https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.5.zip -q -o /opt/fastqc_v0.11.5.zip && \ - unzip -qq /opt/fastqc_v0.11.5.zip -d /opt/ && \ - rm /opt/fastqc_v0.11.5.zip && \ - cd /opt/FastQC && \ - shopt -s extglob && \ - rm -rfv !\("fastqc"\|*.jar\) && \ - chmod 755 * && \ - ln -s /opt/FastQC/fastqc /usr/local/bin/ - - -* or [AfterQC](https://github.com/OpenGene/AfterQC) - - - aria2c https://github.com/OpenGene/AfterQC/archive/v0.9.7.tar.gz -q -o /opt/AfterQC-0.9.7.tar.gz && \ - tar xf /opt/AfterQC-0.9.7.tar.gz --use-compress-prog=pigz -C /opt/ && \ - cd /opt/AfterQC-0.9.7 && \ - make && \ - perl -i -lape's/python/pypy/ if $. == 1' after.py && \ - rm -rf Dockerfile Makefile README.md testdata report_sample && \ - rm editdistance/*.cpp editdistance/*.h && \ - ln -s /opt/AfterQC-0.9.7/*.py /usr/local/bin/ && \ - rm /opt/AfterQC-0.9.7.tar.gz - - -When using afterQC, we recommend that users install `pypy` in their operation system, which can accelerate about 3X speed for raw reads processing, as [suggested]((https://github.com/OpenGene/AfterQC#pypy-suggestion)) by the author of AfterQC. - -* [LncPipeReporter](https://github.com/bioinformatist/LncPipe-Reporter) - - Install [pandoc](https://pandoc.org/installing.html) first. Then run commands: - - Rscript -e "install.packages('devtools'); devtools::install_github('bioinformatist/LncPipeReporter')" - - For detailed usage of LncPipeReporter in case you are going to run it separately, plz refers to [README](https://github.com/bioinformatist/LncPipeReporter#lncpipereporter) of LncPipeReporter. - -* [kallisto](https://github.com/pachterlab/kallisto) - - - aria2c https://github.com/pachterlab/kallisto/releases/download/v0.43.1/kallisto_linux-v0.43.1.tar.gz -q -o /opt/kallisto_linux-v0.43.1.tar.gz && \ - tar xf /opt/kallisto_linux-v0.43.1.tar.gz --use-compress-prog=pigz -C /opt/ && \ - cd /opt && \ - rm ._* kallisto_linux-v0.43.1.tar.gz && \ - cd kallisto_linux-v0.43.1 && \ - rm -rf ._* README.md test && \ - ln -s /opt/kallisto_linux-v0.43.1/kallisto /usr/local/bin/ - - -* [sambamba](http://lomereiter.github.io/sambamba/) - - - aria2c https://github.com/biod/sambamba/releases/download/v0.6.7/sambamba_v0.6.7_linux.tar.bz2 -q -o /opt/sambamba_v0.6.7_linux.tar.bz2 && \ - tar xf /opt/sambamba_v0.6.7_linux.tar.bz2 --use-compress-prog=pbzip2 -C /opt/ && \ - ln -s /opt/sambamba /usr/local/bin/ && \ - rm /opt/sambamba_v0.6.7_linux.tar.bz2 - - -**Alternatively, when you are going to using STAR-Cufflinks in your analysis, the corresponding install cmd are as follows:** - -* [STAR](https://github.com/alexdobin/STAR) - - - aria2c https://raw.githubusercontent.com/alexdobin/STAR/master/bin/Linux_x86_64/STAR -q -o /opt/STAR && \ - chmod 755 /opt/STAR && \ - ln -s /opt/STAR /usr/local/bin - - -* [Cufflinks](https://github.com/cole-trapnell-lab/cufflinks) - - - aria2c https://github.com/bioinformatist/cufflinks/releases/download/v2.2.1/cufflinks-2.2.1.Linux_x86_64.tar.gz -q -o /opt/cufflinks-2.2.1.Linux_x86_64.tar.gz && \ - tar xf /opt/cufflinks-2.2.1.Linux_x86_64.tar.gz --use-compress-prog=pigz -C /opt/ && \ - rm /opt/cufflinks-2.2.1.Linux_x86_64/README && \ - ln -s /opt/cufflinks-2.2.1.Linux_x86_64/* /usr/local/bin/ && \ - rm /opt/cufflinks-2.2.1.Linux_x86_64.tar.gz - - -> The `gffcompare` utility share the same function as `cuffcompare`, therefore, in STAR-cufflinks analysis pipe, `gffcompare` is not required. diff --git a/README.md b/README.md index 595eeb1..1e95eb3 100644 --- a/README.md +++ b/README.md @@ -23,380 +23,25 @@ The nf-core/lncpipe pipeline comes with documentation about the pipeline, found * [Adding your own system](docs/configuration/adding_your_own.md) 3. [Running the pipeline](docs/usage.md) 4. [Output and how to interpret the results](docs/output.md) +5. [Run analysis for non-human species](docs/README_for_non_human_genome.md) 5. [Troubleshooting](docs/troubleshooting.md) -## Citation - -Qi Zhao, Yu Sun, Dawei Wang, Hongwan Zhang, Kai Yu, Jian Zheng, Zhixiang Zuo. LncPipe: A Nextflow-based pipeline for identification and analysis of long non-coding RNAs from RNA-Seq data. [J Genet Genomics. 2018 Jul 20;45(7):399-401](https://linkinghub.elsevier.com/retrieve/pii/S1673-8527(18)30117-6) - - -# [LncPipe](https://github.com/likelet/LncPipe) -## Table of Contents - - - - -- [Schematic diagram](#schematic-diagram) -- [Installation](#installation-and-quick-start) -- [Run Docker](#run-docker) -- [Run with example data](https://github.com/likelet/LncPipeTestData) -- [Interactive reports](#interactive-reports) -- [Parameters](#parameters) -- [FAQ](#faq) -- [Acknowledgements](#acknowledgements) -- [Contact](#contact) -- [License](#license) - -## Schematic diagram - - -## Installation -[Nextflow](https://github.com/nextflow-io/nextflow) -LncPipe is implemented with Nextflow pipeline management system. To run LncPipe. [Nextflow](https://github.com/nextflow-io/nextflow) should be pre-installed at POSIX compatible system (Linux, Solaris, OS X, etc), It requires BASH and Java 7 or higher to be installed. We do not recommend running the pipes in the Windows since most of bioinformatic tools are not supported. - -## Quick start -Here, we show step by step installation of [Nextflow](https://github.com/nextflow-io/nextflow) in a linux system as an example (adopted from [NextFlow](https://www.nextflow.io/docs/latest/getstarted.html)). - -* 1. Download the NextFlow executable package by pasting the following command into your terminal window: - - - wget -qO- get.nextflow.io | bash - - -> It will create the [Nextflow](https://github.com/nextflow-io/nextflow) main executable file in the current directory. - -* 2. Optionally, move the nextflow file to a directory accessible by your `$PATH` variable (only required to avoid typing the full path to this file each time you need to run it). Of course, you can download the lastest binary version of NextFlow by yourself from [here](https://github.com/nextflow-io/nextflow/releases) and add the path to your system environment.All those pipelines were written in [Nextflow](https://github.com/nextflow-io/nextflow) commands. For more details, please see [here](https://www.nextflow.io). - -* 3. Download the LncPipe github repository by: -``` -git clone https://github.com/likelet/LncPipe.git -``` - -* 4. Configure the design.file with experimental conditions and replicate info - -* 5. Configure your data and reference files in *nextflow.config* or *docker.config* or *singularity.config* - -* 6. Run LncPipe nextflow pipeline: - - nextflow -c nextflow.config run LncRNAanalysisPipe.nf - - or docker command - - nextflow -c docker.config run LncRNAanalysisPipe.nf - - or singularity command - - # create image - singularity build lncPipe.image docker://bioinformatist/lncpipe - # run command - nextflow -c singularity.config run LncRNAanalysisPipe.nf -* __7.Run with test data __ . - - PlZ go to https://github.com/likelet/LncPipeTestData - -### Prepare input files - -#### References, index and annotation files(Mandatory). -* **:blush:Please keep the consistency of your genome sequence,index library and annotation files (Important!): genome version, chromosome format, gtf coordinated e.g. The dependent third-party softwares may stop for any discrepencies in file-formatting.** -* Genome reference (genome fasta file with suffix `.fa` etc. ) - -* Genome Index for alignment (hisat2 or tophat or STAR) - -* GENCODE gene annotation file in GTF format - -* LNCipedia gene annotation file in GTF format.(set null if not available for your species) - -* Raw sequence file with \*.fastq.gz / \*.fq.gz suffixed - - - -#### Species - - >Currently, LncPipe has been tested for detection of lncRNAs in 'humans' only. - However, LncPipe can be manually configured to run the anlysis for other species as well and requires additional files "known_protein_coding.gtf" and "known_lncRNA.gtf" for coding probability calculations. More information on usage for non-human species can be found here. - -* Reference files for humans - - 1. hisat index built from Genome: - http://cancerbio.info/pub/hg38_hisat_index.tar.gz - - 2. Genome reference: - ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_27/GRCh38.p10.genome.fa.gz - - 3. GENCODE gene annotation: - ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_27/gencode.v27.annotation.gtf.gz - - 4. LNCipedia gene annotation: - https://lncipedia.org/downloads/lncipedia_5_0_hc_hg38.gtf - - 5. Raw sequence file with \*.fastq.gz / \*.fq.gz suffixed - -* Reference files for mouse - - 1. hisat index built from Genome - 2. Genome reference: - ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M16/GRCm38.p5.genome.fa.gz - - 3. GENCODE gene annotation: - ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M16/gencode.vM16.annotation.gtf.gz - - 4. LNCipedia gene annotation: null - 5. Raw sequence file with \*.fastq.gz / \*.fq.gz suffixed - -## Run Docker - -1. Prepare input files as mentioned earlier. -2. Modify the `docker.config` in `mandatory` section. -3. Install docker and download the latest LncPipe build using: - ``` - docker pull bioinformatist/lncpipe - ``` -4. Run LncPipe using the following command: - - - nextflow -c docker.config run LncRNAanalysisPipe.nf - ->The docker image for LncPipe is available on the docker-hub (https://hub.docker.com/r/bioinformatist/lncpipe/tags/). -> Alternatively, nextflow can automatically pull image from docker.io. `Dockerfile` recorded that what we have done with the image. For user from local China looking to pull the docker image can use this [mirror site instead](https://github.com/likelet/Blogs_tips/blob/master/README.md#setting-docker-download-mirror-site). - -## Dependencies - -TO Install softwares locally on your machine, please see install instructions [here](https://github.com/likelet/LncPipe/blob/master/InstallSoftwareLocally.md) - -## Interactive reports - -The results of LncPipe are summarized and visualized via interactive plots by our novel R package [LncPipeReporter](https://github.com/bioinformatist/LncPipeReporter). Users can also try LncPipeReporter as stand-alone for visualizing known and novel lncRNAs. - -## Configuration -As a nextflow-based analysis pipeline, LncPipe allow users edit configure file `nextflow.config` to set the index files and default file path parameters instead of typing them into the command line. +## Schematic diagram +![workflow](docs/img/workflow.png) -To configure, please go to `params` line, and set the following information of various file locations and system environment settings +## Acknowledgment - params { - /* - User setting options (mandatory) - */ - // input file and genome reference - fastq_ext = '*_{1,2}.fq.gz' - fasta_ref = '/data/database/hg38/genome.fa' - design = 'design.file' - hisat2_index = '/data/database/hg38/hisatIndex/grch38_snp_tran/genome_snp_tran' - cpatpath='/opt/CPAT-1.2.3' - //human gtf only - gencode_annotation_gtf = "/data/database/hg38/Annotation/gencode.v24.annotation.gtf" - lncipedia_gtf = "/data/database/hg38/Annotation/lncipedia_4_0_hg38.gtf" // set "null" if you are going to perform analysis on other species +Thanks to the author of [AfterQC](https://github.com/OpenGene/AfterQC)/fastp, Shifu Chen, for his help on providing a gzip output support to meet the require of LncPipe. Thanks to the internal test by Hongwan Zhang and Yan Wang from SYSUCC Cancer bioinformatics platform. - // additional options for non-human species, else leaving them unchanged - species="human"// mouse , zebrafish, fly - known_coding_gtf="" - known_lncRNA_gtf="" - //for test - cpatpath = '/home/zhaoqi/software/CPAT/CPAT-1.2.2/' +And also many thanks to the wonderful guys @apeltzer, @ewels and others from nf-core that help me to polish the code and structure of lncpipe. - /* - User setting options (optional) - */ - // tools setting - star_idex = ''//set if star used - bowtie2_index = ''//set if tophat used - aligner = "hisat" // or "star","tophat" - sam_processor="sambamba"//or "samtools(deprecated)" - qctools ="fastp" // or "afterqc","fastp","fastqc" - detools = "edger"//or "deseq2","noiseq" not supported yet - quant = "kallisto"// or 'htseq' - - //other setting - singleEnd = false - unstrand = false - skip_combine = false - lncRep_Output = 'reporter.html' - lncRep_theme = 'npg' - lncRep_cdf_percent = 10 - lncRep_max_lnc_len = 10000 - lncRep_min_expressed_sample = 50 - mem=60 - cpu=30 - } - - manifest { - homePage = 'https//github.com/likelet/LncPipe' - description = 'LncPipe:a Nextflow-based Long non-coding RNA analysis PIPELINE' - mainScript = 'LncRNAanalysisPipe.nf' - } - - - timeline { - enabled: true - file: "timeline.html" - } - - - -## Parameters -> Those parameters would cover the setting from `nextflow.config` file -* Mandatory(plz configure those options in *nextflow.config* or *docker.config* file) - -| Name | Example/Default value | Description | -|-----------|--------------:|-------------| -|--input_folder | `.` | input folder | -|--species | `human` | Your species, mouse, fly and zebra fish are also supported | -|--fastq_ext | `*_{1,2}.fastq.gz` | input raw paired reads | -|--out_folder | `.` | output folder | -|--design | `FALSE` | a txt file that stored experimental design information, plz see details from `--design` section below | - -* References - -| Name | Required | Description | -|-----------|--------------|-------------| -|--star_index/--bowtie2_index/--hisat2_index | -| Path to STAR?bowtie2/hisat2(mutually exclusive) index(required if not set in config file) | -|--fasta | `-` | Path to Fasta reference(required if not set in config file)| -|--gencode_annotation_gtf | `-` | An annotation file from GENCODE database for annotating lncRNAs(required if not set in config file). e.g. [gencode.v26.annotation.gtf](ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_26/gencode.v26.annotation.gtf.gz) | -|--lncipedia_gtf | `-` | An annotation file from LNCipedia database for annotating lncRNAs(required if not set in config file) e.g. [lncipedia_4_0_hc_hg38.gtf](http://www.lncipedia.org/downloads/lncipedia_4_0_hc_hg38.gtf) | - -* software path (should not setting when using docker ) - - -| Name | Required | Description | -|-----------|--------------|-------------| -|--cpatpath | `-` | Home folder of cpat installed location | - -> since cpat may call model data from its home path, users should specified where the model file is located in. Especially users install cpat by themselves without our install code. - -* Optional - -| Name | Default value | Description | -|-----------|--------------|-------------| -|--singleEnd | `FALSE` | specify that the reads are single ended | -|--merged_gtf | `FALSE` | Skip mapping and assembly step by directly providing assembled merged gtf files| -|--unstrand | `FALSE` | specify that library is unstrand specific | -|--aligner | `star` | Aligner for reads mapping (optional), STAR is default and supported only at present,*star*/*tophat*/*hisat2*| -|--qctools | `fastp` | Tools for assess raw reads quality or filtered by *fastp*, *fastqc*, *afterqc* or *none*(skip qc step)| - -* LncPipeReporter options - -| Name | Default value | Description | -|-----------|--------------|-------------| -|--lncRep_Output | `reporter.html` | Specify report file name. | -|--lncRep_theme | `npg` | Plot theme setting in interactive plot. Values from [ggsci](https://github.com/road2stat/ggsci) | -|--lncRep_min_expressed_sample | `50` |Minimum expressed gene allowed in each sample, 50 default. Samples not passed were filtered from analysis| - -`--fastq_ext` -> Raw fastq files are required for de-novo analysis.This parameters should be set according to your paired or singled reads file names. - -For example: - - Sample1_1.fq.gz - Sample1_2.fq.gz - Sample2_1.fq.gz - Sample2_2.fq.gz - -Then you can input pattern `*_{1,2}.fq.gz` to make the all paired-end file recognized by [LncPipe](https://github.com/likelet/LncPipe) . - -For singled reads file, file pattern should be fed with `--singleEnd` parameter specified - - -`--star_idex?--bowtie2_index/--hisat2_index` - -> This parameter is *required* when not configured in nextflow.config file. It specify the star/tophat/hisat2(mutually exclusive) index folder built before running [LncPipe](https://github.com/likelet/LncPipe) . -If you don't know what it is?You can use `--fasta` to specify the reference sequence data. The index file would be built by [LncPipe](https://github.com/likelet/LncPipe) automatically. - - -`--design` -> Experimental design file matrix for differential expression analysis. Default: `null` -Format: - - WT:Sample1,Sample2,Sample3 - KO:Sample1,Sample2,Sample3 - -While `KO/WT` represents the two experimental condition, and sample1, sample2, sample3 are replicates which should be comma-delimited in the same line . - -For sample names, it should be the sample as the prefix of fastq files which was trimmed by `--fastq_ext`. - -For example: - - if fastq file names are `Sample1_1.fq.gz, Sample1_2.fq.gz` that comes from one sample and your `--fastq_ext` is set as `*_{1,2}.fq.gz`, the sample name -should be Sample1. - -## Output - `Result` folder under current path(default) or output_folder set by user. A typical structure of `Result` is follows: - - Result/ - ├── QC - │ ├── N1141_1.clean_fastqc.html - │ ├── N1141_2.clean_fastqc.html - │ ├── N1177_1.clean_fastqc.html - │ └── N1177_2.clean_fastqc.html - ├── Identified_lncRNA - │ ├── all_lncRNA_for_classifier.gtf - │ ├── final_all.fa - │ ├── final_all.gtf - │ ├── lncRNA.fa - │ ├── protein_coding.fa - │ └── protein_coding.final.gtf - ├── LncReporter - │ ├── Differential_Expression_analysis.csv - │ └── Report.html - ├── Quantification - │ ├── kallisto.count.txt - │ └── kallisto.tpm.txt - └── Star_alignment - ├── STAR_N1141 - │ ├── N1141Aligned.sortedByCoord.out.bam - │ ├── N1141Log.final.out - │ ├── N1141Log.out - │ ├── N1141Log.progress.out - │ └── N1141SJ.out.tab - └── STAR_N1177 - ├── N1177Aligned.sortedByCoord.out.bam - ├── N1177Log.final.out - ├── N1177Log.out - ├── N1177Log.progress.out - └── N1177SJ.out.tab - - -* `QC` stored the Quality control output generated by FastQC or AfterQC software.
-* `Identified_lncRNA` contains all assembled lncRNA and their sequences. *all_lncRNA_for_classifier.gtf* includes both novel and known lncRNA features in [GTF format](http://www.ensembl.org/info/website/upload/gff.html); -*lncRNA.fa* is all lncRNA sequences in fasta format. *protein_coding.final.gtf* and *protein_coding.fa* are protein coding information extracted from gencode annotation. *final_all.gtf* and *final_all.fa* are combined files for further analysis.
-* `Alignment` are hisat/tophat/STAR aligner standard output
-* `Quantification` are estimated abundance using kallisto. *kallisto.count.txt* stored reads count matrix and *kallisto.tpm.txt* are tpm(Transcripts Per Kilobase Million) matrix. -* `LncReporter` stored the interactive report file and differential expression matrix generated by LncPipeReporter which wrapped EdgeR. - -## Tips - -* :blush:Plz keep the consistency of your genome sequence, index library and annotation files: genome version, chromosome format, gtf coordinated e.g. The third-party software may stop for any of the above reasons. -* :confused:Setting your analysis parameters always in config file, differ project should corresponding to differ configurations for reproductive analysis. To rerun a project, you can just specify -c `your.config` in your command, which can also help you to record analysis parameters. -* :open_mouth:Run analysis on docker container, no much to say. -* :grimacing:Always use the latest version to be away from the known bugs. - -## Acknowledgement - - Thanks to the author of [AfterQC](https://github.com/OpenGene/AfterQC), Shifu Chen, for his help on providing a gzip output support to meet the require of LncPipe. Thanks to the internal test by Hongwan Zhang and Yan Wang from SYSUCC Cancer bioinformatics platform. - -## FAQ - -* *1. PLEK throws an error "/data/software/PLEK.1.2/PLEK.py:line12: $'\r': can not find command", how to fix?* ->A: using the follow command as suggested in the installation section. - - perl -CD -pi -e'tr/\x{feff}//d && s/[\r\n]+/\n/' *.py - -* *2. IOError: [Errno 2] No such file or directory: '/opt/CPAT-1.2.3/dat/Human_Hexamer.tsv'?* ->A: The cpat command required the `Human_Hexamer.tsv` to predict lncRNA coding potential, plz check your `cpatpath` parameters. -* *3. When using htseq to quantify transicript, it throws "Error occured when reading beginning of SAM/BAM file. 'csamtools.AlignedRead' object has no attribute 'reference_start' "* ->A: It's a version conflict caused by htseq and hisat generated bamfile, a possible solution for this is to install the old version of htseq - +## Citation - -## Contact +For details of LncPipe, plz read the article beblow :happy: -For implementation: -* [Qi Zhao](https://github.com/likelet) zhaoqi@sysucc.org.cn, Sun Yat-sen University Cancer Center; -* [Yu Sun](http://icannotendure.space) sun_yu@mail.nankai.edu.cn, Nan kai University; -For project design and new feature request: -* [Qi Zhao](https://github.com/likelet) zhaoqi@sysucc.org.cn, Sun Yat-sen University Cancer Center; -* [Zhixiang Zuo]() zuozhx@sysucc.org.cn, Sun Yat-sen University Cancer Center; +> Qi Zhao, Yu Sun, Dawei Wang, Hongwan Zhang, Kai Yu, Jian Zheng, Zhixiang Zuo. LncPipe: A Nextflow-based pipeline for identification and analysis of long non-coding RNAs from RNA-Seq data. [J Genet Genomics. 2018 Jul 20;45(7):399-401](https://linkinghub.elsevier.com/retrieve/pii/S1673-8527(18)30117-6) -> We strongly recommend users open new issues if they have questions or find bugs. diff --git a/bin/cpat_model/Fly_logitModel.RData b/bin/cpat_model/Fly_logitModel.RData new file mode 100755 index 0000000..27c8533 Binary files /dev/null and b/bin/cpat_model/Fly_logitModel.RData differ diff --git a/bin/cpat_model/Human_Hexamer.tsv b/bin/cpat_model/Human_Hexamer.tsv new file mode 100755 index 0000000..4bc31f8 --- /dev/null +++ b/bin/cpat_model/Human_Hexamer.tsv @@ -0,0 +1,4097 @@ +hexamer coding noncoding +AAAAAA 0.000647111 0.002072893 +AAAAAC 0.000420924 0.000538157 +AAAAAG 0.000813362 0.000715402 +AAAAAT 0.000591729 0.001095402 +AAAACA 0.00049346 0.00071723 +AAAACC 0.000400381 0.000358763 +AAAACG 9.06442E-05 9.25067E-05 +AAAACT 0.00040684 0.00052149 +AAAAGA 0.000428654 0.000712875 +AAAAGC 0.000310583 0.00041253 +AAAAGG 0.000309525 0.000463018 +AAAAGT 0.000318843 0.000510709 +AAAATA 0.000363318 0.000998998 +AAAATC 0.000417429 0.000432128 +AAAATG 0.000541959 0.000788041 +AAAATT 0.000547677 0.000806376 +AAACAA 0.000453645 0.000607679 +AAACAC 0.000303912 0.000338976 +AAACAG 0.000608777 0.000446807 +AAACAT 0.000338221 0.000473502 +AAACCA 0.000454492 0.0004121 +AAACCC 0.000401016 0.000297844 +AAACCG 6.9148E-05 5.72891E-05 +AAACCT 0.000451421 0.000343815 +AAACGA 0.000140626 6.96018E-05 +AAACGC 0.000138825 6.02732E-05 +AAACGG 0.000153862 6.5865E-05 +AAACGT 0.000119977 8.7372E-05 +AAACTA 0.000228517 0.000309888 +AAACTC 0.000359082 0.000304431 +AAACTG 0.000583046 0.00045541 +AAACTT 0.000408958 0.000456243 +AAAGAA 0.001359981 0.000846137 +AAAGAC 0.000616296 0.000333169 +AAAGAG 0.000941281 0.000473153 +AAAGAT 0.000819716 0.000428741 +AAAGCA 0.000594906 0.000507671 +AAAGCC 0.000666171 0.000334299 +AAAGCG 0.000103563 7.55431E-05 +AAAGCT 0.000581351 0.000383953 +AAAGGA 0.000603589 0.000541007 +AAAGGC 0.00044189 0.000299054 +AAAGGG 0.000326256 0.000345643 +AAAGGT 0.000317467 0.000287628 +AAAGTA 0.000288134 0.000383334 +AAAGTC 0.000363742 0.000272762 +AAAGTG 0.000599141 0.000459415 +AAAGTT 0.000406522 0.000447506 +AAATAA 2.42494E-05 0.000959237 +AAATAC 0.000370836 0.000411455 +AAATAG 1.21777E-05 0.000358709 +AAATAT 0.000398792 0.000731102 +AAATCA 0.000351035 0.000448205 +AAATCC 0.000303595 0.000283246 +AAATCG 5.99353E-05 5.30146E-05 +AAATCT 0.000382379 0.000420246 +AAATGA 2.4673E-05 0.000609023 +AAATGC 0.000225657 0.000402045 +AAATGG 0.000247048 0.000450651 +AAATGT 0.000410334 0.0006904 +AAATTA 0.000288664 0.000618083 +AAATTC 0.000293535 0.00038777 +AAATTG 0.000274686 0.000432505 +AAATTT 0.000408322 0.000697497 +AACAAA 0.000557631 0.000593108 +AACAAC 0.000568432 0.000229829 +AACAAG 0.000752898 0.000298167 +AACAAT 0.000365224 0.0002915 +AACACA 0.000381743 0.000355295 +AACACC 0.000448985 0.000198563 +AACACG 0.000165722 6.41713E-05 +AACACT 0.000292582 0.000291984 +AACAGA 0.000320855 0.000406831 +AACAGC 0.000586964 0.000280934 +AACAGG 0.000257214 0.000269455 +AACAGT 0.000327209 0.000283004 +AACATA 0.000178218 0.000265127 +AACATC 0.000699951 0.00023698 +AACATG 0.000558902 0.000338224 +AACATT 0.000417959 0.000435274 +AACCAA 0.000267273 0.000300801 +AACCAC 0.000363 0.000237571 +AACCAG 0.000719012 0.00029486 +AACCAT 0.000212103 0.000235367 +AACCCA 0.000341716 0.000289806 +AACCCC 0.000460422 0.000232598 +AACCCG 0.000142743 7.29354E-05 +AACCCT 0.000330809 0.000245475 +AACCGA 0.000135225 4.5998E-05 +AACCGC 0.000251495 4.78529E-05 +AACCGG 0.000221845 4.73959E-05 +AACCGT 9.22326E-05 4.71002E-05 +AACCTA 0.000149626 0.000166813 +AACCTC 0.000523746 0.000269805 +AACCTG 0.000896806 0.000316609 +AACCTT 0.000277757 0.000266498 +AACGAA 0.000130566 5.7316E-05 +AACGAC 0.000199926 3.22335E-05 +AACGAG 0.000309101 6.13217E-05 +AACGAT 0.000111399 3.71264E-05 +AACGCA 8.89499E-05 5.33641E-05 +AACGCC 0.0002408 5.15629E-05 +AACGCG 7.93137E-05 2.6588E-05 +AACGCT 8.37612E-05 5.40093E-05 +AACGGA 0.00010081 5.46276E-05 +AACGGC 0.000212103 4.71809E-05 +AACGGG 0.000184995 5.66439E-05 +AACGGT 6.17355E-05 3.90082E-05 +AACGTA 3.83332E-05 4.2288E-05 +AACGTC 0.000181288 4.72615E-05 +AACGTG 0.00036353 8.18339E-05 +AACGTT 6.92539E-05 6.87416E-05 +AACTAA 1.30248E-05 0.000243405 +AACTAC 0.000417747 0.000162216 +AACTAG 9.63624E-06 0.000154501 +AACTAT 0.000283793 0.000220419 +AACTCA 0.000241435 0.000280316 +AACTCC 0.000366601 0.000281687 +AACTCG 0.000101869 5.11059E-05 +AACTCT 0.000266003 0.000297441 +AACTGA 2.38259E-05 0.000356047 +AACTGC 0.000351882 0.000261175 +AACTGG 0.000344893 0.000286768 +AACTGT 0.000266638 0.000337202 +AACTTA 0.000143802 0.000244803 +AACTTC 0.000552972 0.000282736 +AACTTG 0.000320643 0.000309996 +AACTTT 0.000366283 0.000456458 +AAGAAA 0.001381901 0.000933939 +AAGAAC 0.000735531 0.000321771 +AAGAAG 0.00180706 0.000545228 +AAGAAT 0.000622014 0.000461109 +AAGACA 0.000485624 0.000413579 +AAGACC 0.000597341 0.000261121 +AAGACG 0.000221845 7.62421E-05 +AAGACT 0.000394451 0.000325911 +AAGAGA 0.000475141 0.000525737 +AAGAGC 0.00054503 0.000303678 +AAGAGG 0.000535182 0.000417665 +AAGAGT 0.000349234 0.000291365 +AAGATA 0.000263249 0.000299188 +AAGATC 0.000718482 0.000217596 +AAGATG 0.000774817 0.000417477 +AAGATT 0.00048573 0.000356531 +AAGCAA 0.000423147 0.000414788 +AAGCAC 0.00045068 0.0002519 +AAGCAG 0.001078941 0.000432155 +AAGCAT 0.000324879 0.000311421 +AAGCCA 0.000556678 0.000400486 +AAGCCC 0.000635886 0.00028451 +AAGCCG 0.000204373 8.99259E-05 +AAGCCT 0.000475247 0.000330562 +AAGCGA 0.000166675 7.98983E-05 +AAGCGC 0.000306665 6.37143E-05 +AAGCGG 0.000375496 8.55707E-05 +AAGCGT 0.000124742 5.42244E-05 +AAGCTA 0.000219304 0.000208537 +AAGCTC 0.000539735 0.000247491 +AAGCTG 0.00126616 0.000414842 +AAGCTT 0.000355482 0.000307711 +AAGGAA 0.00113517 0.000623567 +AAGGAC 0.000910466 0.000276337 +AAGGAG 0.001627889 0.000445328 +AAGGAT 0.0007224 0.000282386 +AAGGCA 0.000492719 0.000357096 +AAGGCC 0.000903795 0.000270853 +AAGGCG 0.000224493 8.60009E-05 +AAGGCT 0.0005427 0.000301285 +AAGGGA 0.000342351 0.000388953 +AAGGGC 0.000578704 0.000250314 +AAGGGG 0.000366071 0.000300828 +AAGGGT 0.000256578 0.000201628 +AAGGTA 0.000170276 0.00018582 +AAGGTC 0.000425159 0.000190175 +AAGGTG 0.000875204 0.000296688 +AAGGTT 0.000306348 0.000245851 +AAGTAA 2.11786E-05 0.000333169 +AAGTAC 0.000484248 0.000170684 +AAGTAG 1.84253E-05 0.000230232 +AAGTAT 0.000337798 0.000293489 +AAGTCA 0.000369672 0.000314566 +AAGTCC 0.000409276 0.000209505 +AAGTCG 0.000114682 4.54603E-05 +AAGTCT 0.000345105 0.000278972 +AAGTGA 4.2463E-05 0.000389625 +AAGTGC 0.000328691 0.000294107 +AAGTGG 0.000349658 0.000317469 +AAGTGT 0.000301265 0.000317093 +AAGTTA 0.000210621 0.000284833 +AAGTTC 0.00052724 0.000239883 +AAGTTG 0.000302959 0.000289349 +AAGTTT 0.00047747 0.000456458 +AATAAA 0.000497378 0.001106586 +AATAAC 0.000229576 0.000235125 +AATAAG 0.000336315 0.000289591 +AATAAT 0.000327526 0.000491891 +AATACA 0.000234658 0.000373549 +AATACC 0.000178747 0.000166948 +AATACG 4.05569E-05 4.2073E-05 +AATACT 0.000201937 0.000278354 +AATAGA 0.000153545 0.000284241 +AATAGC 0.000174299 0.00017547 +AATAGG 9.33974E-05 0.000172701 +AATAGT 0.000162651 0.000219667 +AATATA 0.000180018 0.000445812 +AATATC 0.000256896 0.000227732 +AATATG 0.000262085 0.000335186 +AATATT 0.000358447 0.000614023 +AATCAA 0.000259331 0.000298113 +AATCAC 0.000165828 0.000230097 +AATCAG 0.000372848 0.000287198 +AATCAT 0.000208079 0.000301312 +AATCCA 0.000321173 0.000258433 +AATCCC 0.000209985 0.000274536 +AATCCG 4.3416E-05 4.30945E-05 +AATCCT 0.000310372 0.000271014 +AATCGA 9.22326E-05 3.82017E-05 +AATCGC 7.59251E-05 4.54872E-05 +AATCGG 8.09021E-05 3.66425E-05 +AATCGT 6.45946E-05 4.1831E-05 +AATCTA 0.000125695 0.000212032 +AATCTC 0.000227246 0.000249534 +AATCTG 0.000334092 0.000307092 +AATCTT 0.000276698 0.000331261 +AATGAA 0.001003334 0.000563589 +AATGAC 0.000571715 0.000249507 +AATGAG 0.00080002 0.000330857 +AATGAT 0.000601471 0.000331906 +AATGCA 0.000437549 0.000342632 +AATGCC 0.000555937 0.000234506 +AATGCG 8.28081E-05 3.67231E-05 +AATGCT 0.000445173 0.000327336 +AATGGA 0.000594376 0.000373441 +AATGGC 0.000496531 0.000239829 +AATGGG 0.000420924 0.00026182 +AATGGT 0.000317255 0.000247169 +AATGTA 0.000230423 0.000399223 +AATGTC 0.000374437 0.000262734 +AATGTG 0.00057108 0.000417557 +AATGTT 0.000345846 0.000498343 +AATTAA 1.35543E-05 0.000460491 +AATTAC 0.000208079 0.000232409 +AATTAG 6.77714E-06 0.000267923 +AATTAT 0.000241435 0.000447506 +AATTCA 0.000279557 0.00037043 +AATTCC 0.000230423 0.000263675 +AATTCG 2.84852E-05 3.70995E-05 +AATTCT 0.000318314 0.000415595 +AATTGA 7.94196E-06 0.000294887 +AATTGC 0.00010081 0.000221979 +AATTGG 0.000108646 0.000231952 +AATTGT 0.000125906 0.000359085 +AATTTA 0.000209562 0.000495923 +AATTTC 0.000234764 0.000388147 +AATTTG 0.000257531 0.000423525 +AATTTT 0.000328373 0.000844416 +ACAAAA 0.000346163 0.000617518 +ACAAAC 0.000198337 0.000273649 +ACAAAG 0.000329221 0.000355832 +ACAAAT 0.000295547 0.000383872 +ACAACA 0.000254566 0.000266014 +ACAACC 0.000177582 0.000163076 +ACAACG 5.08285E-05 4.30945E-05 +ACAACT 0.000221104 0.000219613 +ACAAGA 0.000196537 0.000315104 +ACAAGC 0.000173664 0.000200149 +ACAAGG 0.000148568 0.000240367 +ACAAGT 0.000189548 0.000220769 +ACAATA 0.000133531 0.000239991 +ACAATC 0.000138296 0.000152484 +ACAATG 0.000220786 0.000248244 +ACAATT 0.000206914 0.000266498 +ACACAA 0.000190713 0.000292037 +ACACAC 0.000192301 0.000484578 +ACACAG 0.000381426 0.000397179 +ACACAT 0.000170805 0.000303329 +ACACCA 0.000321808 0.000264643 +ACACCC 0.000264308 0.00022335 +ACACCG 7.33837E-05 6.39563E-05 +ACACCT 0.0002965 0.000272654 +ACACGA 5.64408E-05 5.16973E-05 +ACACGC 6.88303E-05 7.03546E-05 +ACACGG 9.50917E-05 7.62152E-05 +ACACGT 4.59575E-05 7.29354E-05 +ACACTA 0.000102398 0.000155495 +ACACTC 0.000182453 0.000200337 +ACACTG 0.000406946 0.000338412 +ACACTT 0.000176311 0.000280047 +ACAGAA 0.00070874 0.000496999 +ACAGAC 0.000509874 0.000268649 +ACAGAG 0.000736908 0.000470034 +ACAGAT 0.000555196 0.000311098 +ACAGCA 0.000441255 0.000375538 +ACAGCC 0.000536453 0.000340079 +ACAGCG 0.00010621 9.16733E-05 +ACAGCT 0.000443267 0.000339622 +ACAGGA 0.000424206 0.000350456 +ACAGGC 0.000357918 0.000329271 +ACAGGG 0.000304442 0.000311259 +ACAGGT 0.000231376 0.000241684 +ACAGTA 0.000183406 0.000229022 +ACAGTC 0.000252448 0.000207811 +ACAGTG 0.00053137 0.000365215 +ACAGTT 0.000259967 0.000309915 +ACATAA 6.24767E-06 0.000260019 +ACATAC 0.000142638 0.000175954 +ACATAG 8.68321E-06 0.000182271 +ACATAT 0.000152274 0.000279536 +ACATCA 0.000237306 0.000262277 +ACATCC 0.00022481 0.000212193 +ACATCG 4.97696E-05 4.64819E-05 +ACATCT 0.000261449 0.000294134 +ACATGA 9.63624E-06 0.000276902 +ACATGC 0.000108646 0.000216172 +ACATGG 0.000144332 0.000296177 +ACATGT 0.000135649 0.000297549 +ACATTA 0.000147191 0.000257599 +ACATTC 0.000183406 0.0002676 +ACATTG 0.000159792 0.000269778 +ACATTT 0.000293535 0.000619239 +ACCAAA 0.000481812 0.000332712 +ACCAAC 0.000561761 0.000173453 +ACCAAG 0.000864085 0.000276633 +ACCAAT 0.000332397 0.000175712 +ACCACA 0.000396357 0.000311179 +ACCACC 0.000629109 0.000278219 +ACCACG 0.000199396 9.38509E-05 +ACCACT 0.000320749 0.000249749 +ACCAGA 0.000217186 0.000294054 +ACCAGC 0.000595329 0.000341745 +ACCAGG 0.00027278 0.000304135 +ACCAGT 0.000309313 0.000209316 +ACCATA 0.000143802 0.000158587 +ACCATC 0.00078403 0.00021386 +ACCATG 0.00056875 0.00028908 +ACCATT 0.000385238 0.000249427 +ACCCAA 0.000193784 0.000223323 +ACCCAC 0.000333245 0.000267251 +ACCCAG 0.000702598 0.000447694 +ACCCAT 0.000178641 0.000196197 +ACCCCA 0.000389262 0.000360725 +ACCCCC 0.000320432 0.000265584 +ACCCCG 0.000165934 0.00013251 +ACCCCT 0.000318843 0.000282198 +ACCCGA 9.7739E-05 5.58912E-05 +ACCCGC 0.00019929 0.000100679 +ACCCGG 0.00021888 0.000125977 +ACCCGT 6.99951E-05 5.21006E-05 +ACCCTA 0.000111717 0.000135225 +ACCCTC 0.000408852 0.000270934 +ACCCTG 0.000840789 0.000374275 +ACCCTT 0.000212633 0.000235259 +ACCGAA 0.000105893 4.82562E-05 +ACCGAC 0.000196431 3.87125E-05 +ACCGAG 0.000316302 8.11081E-05 +ACCGAT 9.33974E-05 2.86042E-05 +ACCGCA 7.52898E-05 6.72092E-05 +ACCGCC 0.000206703 9.55715E-05 +ACCGCG 7.29601E-05 6.05958E-05 +ACCGCT 7.22189E-05 6.11066E-05 +ACCGGA 7.61369E-05 6.09184E-05 +ACCGGC 0.000176523 7.01664E-05 +ACCGGG 0.000145497 7.96295E-05 +ACCGGT 4.82871E-05 3.3282E-05 +ACCGTA 3.8545E-05 3.1669E-05 +ACCGTC 0.000171017 5.46008E-05 +ACCGTG 0.000343622 9.57597E-05 +ACCGTT 6.42769E-05 4.5756E-05 +ACCTAA 8.7891E-06 0.000178131 +ACCTAC 0.000466669 0.000133531 +ACCTAG 8.36553E-06 0.000148317 +ACCTAT 0.000233811 0.000142215 +ACCTCA 0.000274474 0.000335589 +ACCTCC 0.000399851 0.000362957 +ACCTCG 0.000136602 9.33401E-05 +ACCTCT 0.000284216 0.000330293 +ACCTGA 1.75782E-05 0.000294538 +ACCTGC 0.000405993 0.000339218 +ACCTGG 0.000323714 0.000396507 +ACCTGT 0.000253295 0.000317953 +ACCTTA 0.000123577 0.0001759 +ACCTTC 0.000650393 0.000263702 +ACCTTG 0.000306348 0.000284483 +ACCTTT 0.000326467 0.000328787 +ACGAAA 7.12658E-05 5.71547E-05 +ACGAAC 4.37337E-05 3.26368E-05 +ACGAAG 9.68919E-05 6.21819E-05 +ACGAAT 4.08746E-05 4.0218E-05 +ACGACA 5.87705E-05 4.05675E-05 +ACGACC 7.07364E-05 3.63736E-05 +ACGACG 3.32503E-05 1.97595E-05 +ACGACT 4.60634E-05 3.91158E-05 +ACGAGA 4.36278E-05 5.97624E-05 +ACGAGC 5.95117E-05 4.75303E-05 +ACGAGG 5.53819E-05 8.42266E-05 +ACGAGT 4.01334E-05 3.93846E-05 +ACGATA 2.31905E-05 2.39534E-05 +ACGATC 6.0253E-05 3.24217E-05 +ACGATG 8.53496E-05 5.06757E-05 +ACGATT 5.17816E-05 3.86587E-05 +ACGCAA 3.75919E-05 3.89813E-05 +ACGCAC 9.54094E-05 6.09991E-05 +ACGCAG 0.000193572 9.00334E-05 +ACGCAT 3.86509E-05 4.23149E-05 +ACGCCA 9.7739E-05 7.35537E-05 +ACGCCC 0.000193678 9.65124E-05 +ACGCCG 8.40789E-05 5.55417E-05 +ACGCCT 9.53035E-05 9.78566E-05 +ACGCGA 2.12844E-05 2.15338E-05 +ACGCGC 7.32778E-05 5.20737E-05 +ACGCGG 9.12796E-05 5.32835E-05 +ACGCGT 1.69428E-05 2.29587E-05 +ACGCTA 4.25689E-05 2.69374E-05 +ACGCTC 0.000133954 5.45739E-05 +ACGCTG 0.000420924 0.000103879 +ACGCTT 5.92999E-05 5.09177E-05 +ACGGAA 0.00013046 6.79082E-05 +ACGGAC 0.000175676 4.95466E-05 +ACGGAG 0.000332821 0.000105169 +ACGGAT 0.000113729 4.08094E-05 +ACGGCA 0.000108434 6.2854E-05 +ACGGCC 0.000273203 8.03553E-05 +ACGGCG 9.2868E-05 5.16704E-05 +ACGGCT 0.000106634 6.65102E-05 +ACGGGA 8.83146E-05 7.34193E-05 +ACGGGC 0.00020162 7.09998E-05 +ACGGGG 0.000143908 0.000103126 +ACGGGT 6.50182E-05 4.35247E-05 +ACGGTA 3.8545E-05 2.64535E-05 +ACGGTC 0.000112035 3.87394E-05 +ACGGTG 0.000300418 8.67536E-05 +ACGGTT 5.96176E-05 4.38473E-05 +ACGTAA 2.64732E-06 4.21805E-05 +ACGTAC 8.01608E-05 2.91688E-05 +ACGTAG 3.07089E-06 4.03255E-05 +ACGTAT 5.12521E-05 4.46E-05 +ACGTCA 6.25826E-05 5.73698E-05 +ACGTCC 0.000105893 5.77193E-05 +ACGTCG 5.06167E-05 2.16951E-05 +ACGTCT 7.03128E-05 6.55155E-05 +ACGTGA 4.4475E-06 8.07586E-05 +ACGTGC 8.52437E-05 7.79896E-05 +ACGTGG 9.99628E-05 0.000116406 +ACGTGT 6.42769E-05 8.74526E-05 +ACGTTA 3.16619E-05 4.10514E-05 +ACGTTC 8.60908E-05 5.1079E-05 +ACGTTG 6.59712E-05 6.05152E-05 +ACGTTT 8.87381E-05 9.31251E-05 +ACTAAA 0.000229152 0.000316717 +ACTAAC 0.000109493 0.000119821 +ACTAAG 0.000158627 0.000158883 +ACTAAT 0.000143908 0.000187836 +ACTACA 0.000178112 0.000218806 +ACTACC 0.000125271 0.000119713 +ACTACG 3.53682E-05 2.9572E-05 +ACTACT 0.000144014 0.000168937 +ACTAGA 8.8844E-05 0.000165872 +ACTAGC 8.4926E-05 9.71038E-05 +ACTAGG 5.22051E-05 0.000124687 +ACTAGT 9.63624E-05 0.000113664 +ACTATA 8.81028E-05 0.000164663 +ACTATC 0.0001204 0.000105922 +ACTATG 0.000140414 0.00016977 +ACTATT 0.000181712 0.000236039 +ACTCAA 0.000192089 0.000228027 +ACTCAC 0.000190819 0.000205176 +ACTCAG 0.000379096 0.00032669 +ACTCAT 0.000187854 0.000215526 +ACTCCA 0.000330597 0.000341692 +ACTCCC 0.000219939 0.000250502 +ACTCCG 7.03128E-05 8.23985E-05 +ACTCCT 0.000304971 0.000332363 +ACTCGA 9.32915E-05 3.89276E-05 +ACTCGC 8.09021E-05 5.91172E-05 +ACTCGG 0.000109175 8.82322E-05 +ACTCGT 5.83469E-05 4.47076E-05 +ACTCTA 0.00012273 0.000164017 +ACTCTC 0.000195372 0.000217408 +ACTCTG 0.000408005 0.000373387 +ACTCTT 0.000194207 0.000299968 +ACTGAA 0.000540159 0.00041374 +ACTGAC 0.000344151 0.000212489 +ACTGAG 0.000568326 0.000347848 +ACTGAT 0.000401439 0.000245797 +ACTGCA 0.000352411 0.00042444 +ACTGCC 0.000392968 0.000288865 +ACTGCG 8.12197E-05 7.93875E-05 +ACTGCT 0.000362894 0.000319405 +ACTGGA 0.000557102 0.000342794 +ACTGGC 0.000333033 0.000242356 +ACTGGG 0.000310372 0.000326717 +ACTGGT 0.000240059 0.000211548 +ACTGTA 0.000191348 0.000291043 +ACTGTC 0.000308889 0.000239614 +ACTGTG 0.000583363 0.000405003 +ACTGTT 0.00028824 0.000332524 +ACTTAA 6.67124E-06 0.000288838 +ACTTAC 0.000174511 0.000158291 +ACTTAG 3.49446E-06 0.00017547 +ACTTAT 0.000126118 0.00019703 +ACTTCA 0.000245565 0.000323706 +ACTTCC 0.00020935 0.000308436 +ACTTCG 4.97696E-05 5.21543E-05 +ACTTCT 0.000266956 0.000361559 +ACTTGA 6.24767E-06 0.000313168 +ACTTGC 0.000107163 0.000215715 +ACTTGG 0.00012633 0.000300318 +ACTTGT 0.000135437 0.000264804 +ACTTTA 0.00016011 0.000335159 +ACTTTC 0.000238576 0.000307065 +ACTTTG 0.00025319 0.000438392 +ACTTTT 0.000257319 0.000579316 +AGAAAA 0.000467517 0.000933912 +AGAAAC 0.00026452 0.000449791 +AGAAAG 0.000399639 0.00059257 +AGAAAT 0.00037486 0.000630637 +AGAACA 0.000221422 0.000384437 +AGAACC 0.000148462 0.000257465 +AGAACG 4.65928E-05 6.64565E-05 +AGAACT 0.000208503 0.000349138 +AGAAGA 0.000317572 0.000618298 +AGAAGC 0.000218774 0.000402395 +AGAAGG 0.000184359 0.000459281 +AGAAGT 0.00021761 0.000375807 +AGAATA 0.000163075 0.000350563 +AGAATC 0.000181077 0.000290075 +AGAATG 0.000218033 0.000405621 +AGAATT 0.000320643 0.000447559 +AGACAA 0.000204055 0.00031712 +AGACAC 0.000156615 0.000288811 +AGACAG 0.000282628 0.000427477 +AGACAT 0.000174935 0.000294618 +AGACCA 0.000187113 0.000330266 +AGACCC 0.000176629 0.000281822 +AGACCG 3.64271E-05 6.53811E-05 +AGACCT 0.00019749 0.000280854 +AGACGA 7.73017E-05 5.91172E-05 +AGACGC 6.6183E-05 8.19683E-05 +AGACGG 7.42308E-05 0.000117966 +AGACGT 4.94519E-05 6.91179E-05 +AGACTA 8.6938E-05 0.000166894 +AGACTC 0.000151533 0.000261713 +AGACTG 0.000231482 0.000347714 +AGACTT 0.000169534 0.000322443 +AGAGAA 0.000565679 0.000631632 +AGAGAC 0.000322655 0.000371721 +AGAGAG 0.000490072 0.000554315 +AGAGAT 0.000404722 0.000365941 +AGAGCA 0.000262826 0.000394787 +AGAGCC 0.000287287 0.00036543 +AGAGCG 4.64869E-05 0.00011275 +AGAGCT 0.000267591 0.000380485 +AGAGGA 0.000309101 0.000527485 +AGAGGC 0.000227775 0.000418149 +AGAGGG 0.00016551 0.000405728 +AGAGGT 0.000161486 0.000294161 +AGAGTA 0.000121247 0.000203214 +AGAGTC 0.000157674 0.000232141 +AGAGTG 0.000232858 0.00032618 +AGAGTT 0.000193043 0.000307845 +AGATAA 9.95392E-06 0.000304619 +AGATAC 0.000168793 0.000181734 +AGATAG 4.87107E-06 0.000171518 +AGATAT 0.000139461 0.000257922 +AGATCA 0.00013999 0.000262492 +AGATCC 0.000111082 0.000183911 +AGATCG 2.52025E-05 6.08378E-05 +AGATCT 0.000150685 0.000242141 +AGATGA 1.26012E-05 0.000382716 +AGATGC 8.87381E-05 0.000267116 +AGATGG 0.00011034 0.000392367 +AGATGT 0.000114576 0.000331099 +AGATTA 0.000113517 0.000234022 +AGATTC 0.000120188 0.000240582 +AGATTG 0.000121035 0.000253836 +AGATTT 0.000200773 0.000464711 +AGCAAA 0.000502991 0.000422692 +AGCAAC 0.00048986 0.000218403 +AGCAAG 0.000688091 0.00034188 +AGCAAT 0.000326467 0.000262331 +AGCACA 0.000372637 0.000321878 +AGCACC 0.000547995 0.000263648 +AGCACG 0.000146556 7.33387E-05 +AGCACT 0.000285381 0.000328384 +AGCAGA 0.000264626 0.000446726 +AGCAGC 0.000949964 0.00047568 +AGCAGG 0.00028591 0.000431214 +AGCAGT 0.00044655 0.000330938 +AGCATA 0.000124636 0.000179906 +AGCATC 0.000583681 0.000247088 +AGCATG 0.000446126 0.000277332 +AGCATT 0.000320008 0.000339783 +AGCCAA 0.000259967 0.000324137 +AGCCAC 0.000404193 0.000390459 +AGCCAG 0.000836659 0.000522861 +AGCCAT 0.000219304 0.000303248 +AGCCCA 0.000357812 0.000401239 +AGCCCC 0.000633239 0.000435946 +AGCCCG 0.000192407 0.000156866 +AGCCCT 0.000417641 0.000396023 +AGCCGA 0.000145708 0.000105949 +AGCCGC 0.000362683 0.0001652 +AGCCGG 0.0003001 0.000187191 +AGCCGT 0.000114364 7.62421E-05 +AGCCTA 0.000115847 0.000171599 +AGCCTC 0.000553607 0.000502214 +AGCCTG 0.000931009 0.000572972 +AGCCTT 0.000264944 0.000351155 +AGCGAA 0.000107587 5.57568E-05 +AGCGAC 0.000272462 5.72891E-05 +AGCGAG 0.000387568 0.000125816 +AGCGAT 0.000131307 6.22357E-05 +AGCGCA 8.1961E-05 7.8124E-05 +AGCGCC 0.000300206 0.000132483 +AGCGCG 9.46681E-05 8.68074E-05 +AGCGCT 9.29738E-05 8.54094E-05 +AGCGGA 8.31258E-05 9.05711E-05 +AGCGGC 0.000302959 0.000159743 +AGCGGG 0.000196219 0.000137618 +AGCGGT 7.35955E-05 6.44939E-05 +AGCGTA 2.74262E-05 2.82547E-05 +AGCGTC 0.000172923 6.89835E-05 +AGCGTG 0.00029057 9.52489E-05 +AGCGTT 5.69703E-05 6.04883E-05 +AGCTAA 1.23895E-05 0.000222812 +AGCTAC 0.000366707 0.0002123 +AGCTAG 9.63624E-06 0.000164824 +AGCTAT 0.000200879 0.00019281 +AGCTCA 0.000278392 0.000320588 +AGCTCC 0.000526816 0.000360107 +AGCTCG 0.00012813 7.49786E-05 +AGCTCT 0.000335362 0.000376264 +AGCTGA 2.8591E-05 0.000399034 +AGCTGC 0.000401439 0.000444737 +AGCTGG 0.000311113 0.000548239 +AGCTGT 0.000248954 0.000394061 +AGCTTA 0.000104516 0.000181572 +AGCTTC 0.000531582 0.000327201 +AGCTTG 0.000239424 0.000273703 +AGCTTT 0.000315666 0.000405621 +AGGAAA 0.000492825 0.000683867 +AGGAAC 0.000312913 0.000286526 +AGGAAG 0.000656747 0.00063838 +AGGAAT 0.000248107 0.000362419 +AGGACA 0.00021168 0.000360403 +AGGACC 0.000246518 0.000259777 +AGGACG 8.47142E-05 0.000100518 +AGGACT 0.000186583 0.000303624 +AGGAGA 0.000242389 0.000550228 +AGGAGC 0.000269179 0.000403013 +AGGAGG 0.000259331 0.000620368 +AGGAGT 0.000143802 0.000324862 +AGGATA 0.000105575 0.000196197 +AGGATC 0.00021761 0.000199369 +AGGATG 0.000272886 0.000342175 +AGGATT 0.000186583 0.000298355 +AGGCAA 0.000159369 0.000277466 +AGGCAC 0.000188383 0.000272762 +AGGCAG 0.000431513 0.000585446 +AGGCAT 0.000131201 0.000278004 +AGGCCA 0.000202785 0.000394276 +AGGCCC 0.000272674 0.000371882 +AGGCCG 8.50319E-05 0.000173884 +AGGCCT 0.000239424 0.000369919 +AGGCGA 7.75135E-05 8.00058E-05 +AGGCGC 0.000117329 0.000138075 +AGGCGG 0.000151744 0.000216091 +AGGCGT 5.15698E-05 0.000101136 +AGGCTA 8.42906E-05 0.000162727 +AGGCTC 0.000206597 0.000295828 +AGGCTG 0.000472176 0.000684136 +AGGCTT 0.000151638 0.000304995 +AGGGAA 0.00032795 0.000454119 +AGGGAC 0.000323502 0.000278542 +AGGGAG 0.000477259 0.000517753 +AGGGAT 0.000223751 0.000242249 +AGGGCA 0.000173982 0.000345939 +AGGGCC 0.000298088 0.000329594 +AGGGCG 6.89362E-05 0.000126918 +AGGGCT 0.000177582 0.00034188 +AGGGGA 0.000127389 0.000391265 +AGGGGC 0.000232223 0.00035188 +AGGGGG 9.29738E-05 0.000288946 +AGGGGT 0.000105152 0.000230393 +AGGGTA 6.65007E-05 0.000146167 +AGGGTC 0.000162757 0.00021558 +AGGGTG 0.000203314 0.000303302 +AGGGTT 9.03265E-05 0.000228162 +AGGTAA 5.08285E-06 0.000191654 +AGGTAC 0.000128342 0.000119686 +AGGTAG 5.8241E-06 0.000176007 +AGGTAT 0.00010314 0.000162055 +AGGTCA 0.000118176 0.000266821 +AGGTCC 0.00013586 0.000187137 +AGGTCG 3.99216E-05 5.53535E-05 +AGGTCT 0.000123471 0.000235851 +AGGTGA 1.05893E-05 0.000309216 +AGGTGC 9.73154E-05 0.000253164 +AGGTGG 0.000119977 0.000418794 +AGGTGT 9.35033E-05 0.000269563 +AGGTTA 7.6984E-05 0.00016348 +AGGTTC 0.000160428 0.000193804 +AGGTTG 0.00010261 0.000248244 +AGGTTT 0.000173241 0.000327416 +AGTAAA 0.000287605 0.000352418 +AGTAAC 0.000182136 0.000167835 +AGTAAG 0.000177476 0.000186976 +AGTAAT 0.000193996 0.000244749 +AGTACA 0.000149732 0.000186976 +AGTACC 0.000131307 0.000118449 +AGTACG 2.63673E-05 2.47868E-05 +AGTACT 0.00013586 0.000164985 +AGTAGA 9.41387E-05 0.000239345 +AGTAGC 0.000146026 0.000209693 +AGTAGG 6.25826E-05 0.000163587 +AGTAGT 0.000144332 0.00016305 +AGTATA 8.6938E-05 0.000209988 +AGTATC 0.000126965 0.000140575 +AGTATG 0.000131731 0.000181438 +AGTATT 0.000178218 0.000330589 +AGTCAA 0.000187113 0.000212032 +AGTCAC 0.000149944 0.000232813 +AGTCAG 0.000308042 0.000275853 +AGTCAT 0.000159792 0.000245717 +AGTCCA 0.000195478 0.000224344 +AGTCCC 0.000210091 0.000264509 +AGTCCG 4.12982E-05 5.39287E-05 +AGTCCT 0.000274368 0.000275128 +AGTCGA 7.15835E-05 3.45724E-05 +AGTCGC 8.1008E-05 4.93316E-05 +AGTCGG 8.55614E-05 6.95481E-05 +AGTCGT 6.26885E-05 4.10783E-05 +AGTCTA 7.94196E-05 0.000144795 +AGTCTC 0.000166569 0.000276875 +AGTCTG 0.000245459 0.000293193 +AGTCTT 0.000173982 0.000302065 +AGTGAA 0.000643087 0.000376049 +AGTGAC 0.000554666 0.000254588 +AGTGAG 0.00062032 0.000393093 +AGTGAT 0.000513898 0.000293543 +AGTGCA 0.000271509 0.000293758 +AGTGCC 0.000458516 0.000267062 +AGTGCG 7.4019E-05 6.32573E-05 +AGTGCT 0.00032975 0.000342471 +AGTGGA 0.000388203 0.000324378 +AGTGGC 0.000439561 0.000322174 +AGTGGG 0.000344151 0.000321529 +AGTGGT 0.000272356 0.000257573 +AGTGTA 0.000130883 0.00020601 +AGTGTC 0.000249272 0.000229775 +AGTGTG 0.00040504 0.000332309 +AGTGTT 0.0002372 0.000344326 +AGTTAA 8.25964E-06 0.000275531 +AGTTAC 0.000146767 0.000173749 +AGTTAG 3.91803E-06 0.000157296 +AGTTAT 0.000143591 0.000249158 +AGTTCA 0.000210197 0.000285989 +AGTTCC 0.000202679 0.000235044 +AGTTCG 3.44151E-05 4.97348E-05 +AGTTCT 0.000259014 0.000335293 +AGTTGA 6.67124E-06 0.000244641 +AGTTGC 6.53358E-05 0.000196439 +AGTTGG 9.10678E-05 0.000245045 +AGTTGT 0.00010314 0.000266337 +AGTTTA 0.000149732 0.000312738 +AGTTTC 0.000193148 0.000345993 +AGTTTG 0.000200031 0.000364946 +AGTTTT 0.000230529 0.000564046 +ATAAAA 0.000320114 0.00082963 +ATAAAC 0.000135225 0.000319728 +ATAAAG 0.000258167 0.000440758 +ATAAAT 0.000223857 0.000644509 +ATAACA 0.000147403 0.000241496 +ATAACC 9.65742E-05 0.000137376 +ATAACG 2.61555E-05 3.19109E-05 +ATAACT 0.000139249 0.000233619 +ATAAGA 0.000119977 0.000257841 +ATAAGC 8.37612E-05 0.00016227 +ATAAGG 7.43367E-05 0.000176303 +ATAAGT 0.000102187 0.000196493 +ATAATA 9.5833E-05 0.000375699 +ATAATC 0.000101233 0.00019125 +ATAATG 0.000163922 0.000297011 +ATAATT 0.000176629 0.000439064 +ATACAA 0.000128766 0.000292817 +ATACAC 7.5078E-05 0.000174287 +ATACAG 0.000206173 0.000242787 +ATACAT 0.00011267 0.000297791 +ATACCA 0.000131413 0.000181277 +ATACCC 8.60908E-05 0.000115385 +ATACCG 1.8743E-05 2.48674E-05 +ATACCT 0.000132048 0.000193132 +ATACGA 3.90744E-05 3.09162E-05 +ATACGC 2.38259E-05 2.29855E-05 +ATACGG 4.27807E-05 2.9814E-05 +ATACGT 2.41435E-05 4.35247E-05 +ATACTA 5.48525E-05 0.000164851 +ATACTC 5.85587E-05 0.000131864 +ATACTG 0.000132154 0.000237571 +ATACTT 0.000102187 0.000276122 +ATAGAA 0.000398686 0.000320749 +ATAGAC 0.000186477 0.000126004 +ATAGAG 0.000251284 0.000195337 +ATAGAT 0.000255519 0.000207488 +ATAGCA 0.00018976 0.00020316 +ATAGCC 0.000155556 0.000125574 +ATAGCG 2.61555E-05 2.54588E-05 +ATAGCT 0.000166252 0.000186008 +ATAGGA 0.000144861 0.000194127 +ATAGGC 9.06442E-05 0.000112078 +ATAGGG 7.76194E-05 0.000117455 +ATAGGT 7.59251E-05 0.000130574 +ATAGTA 9.19149E-05 0.000176411 +ATAGTC 7.68781E-05 0.000111944 +ATAGTG 0.000156192 0.000167727 +ATAGTT 0.000119447 0.000226119 +ATATAA 6.56535E-06 0.000384921 +ATATAC 7.03128E-05 0.000211628 +ATATAG 2.965E-06 0.000195928 +ATATAT 0.000124636 0.00059999 +ATATCA 0.000119871 0.00022671 +ATATCC 9.45622E-05 0.000151113 +ATATCG 1.78959E-05 2.45179E-05 +ATATCT 0.000126436 0.000246093 +ATATGA 7.83606E-06 0.000262573 +ATATGC 6.20532E-05 0.000184987 +ATATGG 8.30199E-05 0.000190417 +ATATGT 0.000106105 0.000350025 +ATATTA 0.000107058 0.000351343 +ATATTC 0.000124106 0.000273972 +ATATTG 9.48799E-05 0.000290639 +ATATTT 0.000244295 0.000808634 +ATCAAA 0.00048393 0.000314647 +ATCAAC 0.000584846 0.000148263 +ATCAAG 0.00076857 0.000225635 +ATCAAT 0.000378884 0.000201063 +ATCACA 0.000389685 0.000272493 +ATCACC 0.000614707 0.000207085 +ATCACG 0.000172499 6.22895E-05 +ATCACT 0.000352941 0.000264293 +ATCAGA 0.000231587 0.000294403 +ATCAGC 0.000525546 0.000217059 +ATCAGG 0.000213268 0.00020937 +ATCAGT 0.000301477 0.000233243 +ATCATA 0.000156298 0.000198536 +ATCATC 0.00076624 0.000199154 +ATCATG 0.000570233 0.000245448 +ATCATT 0.000411817 0.000322389 +ATCCAA 0.000230634 0.000208295 +ATCCAC 0.000436172 0.000195122 +ATCCAG 0.000905383 0.000292736 +ATCCAT 0.000257743 0.000231307 +ATCCCA 0.000325408 0.000348197 +ATCCCC 0.000430984 0.000207273 +ATCCCG 0.000136813 6.57037E-05 +ATCCCT 0.000325091 0.000257223 +ATCCGA 0.000183618 3.54327E-05 +ATCCGC 0.000331974 5.13747E-05 +ATCCGG 0.000311748 4.95197E-05 +ATCCGT 0.000118494 4.30139E-05 +ATCCTA 0.000154815 0.000164555 +ATCCTC 0.000515592 0.000250663 +ATCCTG 0.001014559 0.000331987 +ATCCTT 0.000261343 0.000280719 +ATCGAA 0.000129825 3.5352E-05 +ATCGAC 0.00028231 2.32275E-05 +ATCGAG 0.000379308 4.57291E-05 +ATCGAT 0.000144014 3.15346E-05 +ATCGCA 0.00010314 4.09976E-05 +ATCGCC 0.000327738 4.98961E-05 +ATCGCG 8.41847E-05 2.57277E-05 +ATCGCT 0.000117753 6.51123E-05 +ATCGGA 7.82548E-05 3.80135E-05 +ATCGGC 0.000202361 3.79329E-05 +ATCGGG 0.000155662 4.31214E-05 +ATCGGT 5.97235E-05 2.70719E-05 +ATCGTA 4.61692E-05 2.83892E-05 +ATCGTC 0.00021295 3.71801E-05 +ATCGTG 0.000384603 5.79343E-05 +ATCGTT 7.42308E-05 4.25569E-05 +ATCTAA 9.95392E-06 0.000196412 +ATCTAC 0.000451844 0.000141596 +ATCTAG 9.84803E-06 0.000147591 +ATCTAT 0.000278922 0.000204074 +ATCTCA 0.000236988 0.00032091 +ATCTCC 0.000491025 0.000256632 +ATCTCG 9.74213E-05 7.27472E-05 +ATCTCT 0.000298618 0.000357015 +ATCTGA 1.8743E-05 0.000297737 +ATCTGC 0.000401016 0.000271256 +ATCTGG 0.000308677 0.000276472 +ATCTGT 0.000279663 0.000353682 +ATCTTA 0.00014645 0.000235286 +ATCTTC 0.000623391 0.000285747 +ATCTTG 0.00029004 0.000296742 +ATCTTT 0.000361836 0.000431994 +ATGAAA 0.00058654 0.0005541 +ATGAAC 0.000458092 0.000228834 +ATGAAG 0.000847777 0.000391695 +ATGAAT 0.000436914 0.000381668 +ATGACA 0.000318843 0.000271633 +ATGACC 0.000381002 0.000175819 +ATGACG 0.000111293 4.41968E-05 +ATGACT 0.000294382 0.000256847 +ATGAGA 0.000269603 0.000333707 +ATGAGC 0.000331338 0.000210177 +ATGAGG 0.000280086 0.000284537 +ATGAGT 0.000240377 0.000224936 +ATGATA 0.000144014 0.000226361 +ATGATC 0.000369778 0.000179583 +ATGATG 0.000524699 0.000293758 +ATGATT 0.000321702 0.000319808 +ATGCAA 0.000250754 0.000269697 +ATGCAC 0.000263567 0.000191089 +ATGCAG 0.000657382 0.000309135 +ATGCAT 0.000208397 0.000275988 +ATGCCA 0.00032202 0.000287171 +ATGCCC 0.00035707 0.000216521 +ATGCCG 0.000109281 5.19662E-05 +ATGCCT 0.000330385 0.000319862 +ATGCGA 8.00549E-05 3.33895E-05 +ATGCGC 0.000150474 4.41699E-05 +ATGCGG 0.000203844 4.86057E-05 +ATGCGT 7.25365E-05 4.25569E-05 +ATGCTA 0.000145603 0.000185551 +ATGCTC 0.000299782 0.000190094 +ATGCTG 0.000857414 0.000370376 +ATGCTT 0.000246518 0.000309861 +ATGGAA 0.000771005 0.000427504 +ATGGAC 0.000632074 0.000193804 +ATGGAG 0.001106791 0.000357042 +ATGGAT 0.000603377 0.000258218 +ATGGCA 0.000487848 0.000287897 +ATGGCC 0.000696774 0.00021058 +ATGGCG 0.000296182 7.33387E-05 +ATGGCT 0.000546407 0.000278327 +ATGGGA 0.000343516 0.000310211 +ATGGGC 0.000454492 0.000185067 +ATGGGG 0.000320643 0.000289134 +ATGGGT 0.000226505 0.000177782 +ATGGTA 0.000157886 0.000191062 +ATGGTC 0.000277651 0.000155468 +ATGGTG 0.000652405 0.000317254 +ATGGTT 0.000243236 0.000245125 +ATGTAA 1.32366E-05 0.000375189 +ATGTAC 0.000299465 0.00019488 +ATGTAG 8.57731E-06 0.000206413 +ATGTAT 0.000242283 0.000387797 +ATGTCA 0.000238788 0.000287225 +ATGTCC 0.000314925 0.00019453 +ATGTCG 0.000110976 3.4196E-05 +ATGTCT 0.000318525 0.000294054 +ATGTGA 2.0755E-05 0.000377904 +ATGTGC 0.00019929 0.000247061 +ATGTGG 0.000252342 0.000323088 +ATGTGT 0.000190078 0.000404976 +ATGTTA 0.000155133 0.000312469 +ATGTTC 0.000340975 0.000263299 +ATGTTG 0.000256049 0.000341073 +ATGTTT 0.00037359 0.000580499 +ATTAAA 0.000411076 0.000617169 +ATTAAC 0.00017737 0.00018703 +ATTAAG 0.0002965 0.000241173 +ATTAAT 0.000257637 0.000347445 +ATTACA 0.000203738 0.000346907 +ATTACC 0.00013226 0.000145091 +ATTACG 3.12384E-05 3.0056E-05 +ATTACT 0.000174088 0.000240743 +ATTAGA 0.0001186 0.000229963 +ATTAGC 9.93274E-05 0.000181062 +ATTAGG 6.62889E-05 0.000161141 +ATTAGT 0.000119447 0.000179422 +ATTATA 0.000123153 0.00034895 +ATTATC 0.000162863 0.000190148 +ATTATG 0.000185101 0.000261041 +ATTATT 0.000273945 0.000539098 +ATTCAA 0.000276168 0.000307388 +ATTCAC 0.000259755 0.00022757 +ATTCAG 0.00051951 0.000337498 +ATTCAT 0.000357918 0.00035594 +ATTCCA 0.000347011 0.000334756 +ATTCCC 0.000237306 0.00024155 +ATTCCG 5.30523E-05 4.68851E-05 +ATTCCT 0.000376978 0.000363199 +ATTCGA 0.000142108 3.52983E-05 +ATTCGC 0.000101869 2.89806E-05 +ATTCGG 0.000119023 3.99223E-05 +ATTCGT 9.55153E-05 4.12933E-05 +ATTCTA 0.000129083 0.000276687 +ATTCTC 0.000257214 0.000338815 +ATTCTG 0.000401122 0.000413068 +ATTCTT 0.000302112 0.000445328 +ATTGAA 0.00073066 0.000340428 +ATTGAC 0.000496425 0.000158291 +ATTGAG 0.00062625 0.000228458 +ATTGAT 0.000567585 0.000243028 +ATTGCA 0.000376661 0.000267977 +ATTGCC 0.000448562 0.000192326 +ATTGCG 6.04648E-05 3.31207E-05 +ATTGCT 0.000463069 0.000310668 +ATTGGA 0.000393709 0.000251604 +ATTGGC 0.000335786 0.000176814 +ATTGGG 0.000285275 0.00018504 +ATTGGT 0.000268544 0.000198966 +ATTGTA 0.000203314 0.000312496 +ATTGTC 0.000291205 0.000209236 +ATTGTG 0.000493778 0.000313276 +ATTGTT 0.00031429 0.000386587 +ATTTAA 1.30248E-05 0.000600393 +ATTTAC 0.000182241 0.000275128 +ATTTAG 7.09482E-06 0.000287602 +ATTTAT 0.000255519 0.000643165 +ATTTCA 0.000252025 0.000519796 +ATTTCC 0.000227775 0.000398013 +ATTTCG 3.22973E-05 4.33903E-05 +ATTTCT 0.000327103 0.000628675 +ATTTGA 1.00598E-05 0.000439978 +ATTTGC 0.00011214 0.000305936 +ATTTGG 0.000139461 0.000380673 +ATTTGT 0.000154921 0.000506435 +ATTTTA 0.000236353 0.000823474 +ATTTTC 0.000243765 0.000609749 +ATTTTG 0.000305183 0.000655343 +ATTTTT 0.000280616 0.00129582 +CAAAAA 0.000373801 0.00062803 +CAAAAC 0.000229681 0.000338519 +CAAAAG 0.000388415 0.000377259 +CAAAAT 0.000329009 0.000487831 +CAAACA 0.00023667 0.000346907 +CAAACC 0.00016371 0.000233108 +CAAACG 4.76517E-05 5.81225E-05 +CAAACT 0.00019156 0.000305829 +CAAAGA 0.000273945 0.000407288 +CAAAGC 0.000201196 0.000307711 +CAAAGG 0.000190078 0.000331099 +CAAAGT 0.000190819 0.000362903 +CAAATA 0.000158733 0.000382474 +CAAATC 0.000170699 0.000232383 +CAAATG 0.000265791 0.000378737 +CAAATT 0.000233917 0.000364247 +CAACAA 0.000264626 0.000269321 +CAACAC 0.00013459 0.000194638 +CAACAG 0.000421241 0.000280746 +CAACAT 0.000191772 0.000277843 +CAACCA 0.000185842 0.000217677 +CAACCC 0.000142955 0.000193993 +CAACCG 3.53682E-05 4.16966E-05 +CAACCT 0.000178323 0.00026475 +CAACGA 5.85587E-05 4.03255E-05 +CAACGC 5.17816E-05 4.57023E-05 +CAACGG 5.88764E-05 4.42774E-05 +CAACGT 4.73341E-05 5.24769E-05 +CAACTA 0.000106528 0.000135548 +CAACTC 0.000162016 0.000202649 +CAACTG 0.000289511 0.000236953 +CAACTT 0.000199714 0.000257788 +CAAGAA 0.000737331 0.000426536 +CAAGAC 0.000346163 0.000256201 +CAAGAG 0.000571927 0.000339837 +CAAGAT 0.000460739 0.000278972 +CAAGCA 0.000318737 0.000300479 +CAAGCC 0.000338221 0.000257357 +CAAGCG 5.62291E-05 8.00865E-05 +CAAGCT 0.000312913 0.000259508 +CAAGGA 0.000370095 0.000361048 +CAAGGC 0.000293641 0.000276095 +CAAGGG 0.0002408 0.000260583 +CAAGGT 0.000207444 0.00022835 +CAAGTA 0.000155662 0.000211091 +CAAGTC 0.000202679 0.000201628 +CAAGTG 0.000360459 0.000297011 +CAAGTT 0.0002354 0.000268138 +CAATAA 1.00598E-05 0.000339487 +CAATAC 9.98569E-05 0.00011724 +CAATAG 4.55339E-06 0.000134902 +CAATAT 0.000154074 0.000227651 +CAATCA 0.00010314 0.000194261 +CAATCC 8.70439E-05 0.000141919 +CAATCG 1.32366E-05 2.70987E-05 +CAATCT 0.000106634 0.000189933 +CAATGA 7.09482E-06 0.000237598 +CAATGC 7.24306E-05 0.000159689 +CAATGG 9.59388E-05 0.000200149 +CAATGT 0.000120506 0.000230877 +CAATTA 9.66801E-05 0.000198294 +CAATTC 0.000110764 0.000193536 +CAATTG 8.79969E-05 0.00015614 +CAATTT 0.000155662 0.000330427 +CACAAA 0.000318949 0.000353977 +CACAAC 0.000350187 0.000173857 +CACAAG 0.000526393 0.000249077 +CACAAT 0.000241965 0.000222839 +CACACA 0.000316619 0.000580956 +CACACC 0.000344893 0.000305291 +CACACG 0.000198655 9.84749E-05 +CACACT 0.000297029 0.000282171 +CACAGA 0.000231058 0.000443796 +CACAGC 0.000484248 0.000405863 +CACAGG 0.000264097 0.000385861 +CACAGT 0.000275851 0.000326207 +CACATA 0.000119341 0.000221414 +CACATC 0.000485412 0.000242975 +CACATG 0.000410758 0.000294269 +CACATT 0.000261767 0.000344756 +CACCAA 0.000166781 0.000263702 +CACCAC 0.000363953 0.000352364 +CACCAG 0.000653147 0.000364139 +CACCAT 0.000194737 0.00031306 +CACCCA 0.000258696 0.000394867 +CACCCC 0.000328479 0.000401723 +CACCCG 0.000167522 0.000124014 +CACCCT 0.000254884 0.000339218 +CACCGA 0.000112035 6.85534E-05 +CACCGC 0.000232858 0.000139311 +CACCGG 0.000226822 9.42004E-05 +CACCGT 7.8996E-05 9.80448E-05 +CACCTA 0.000109493 0.000167378 +CACCTC 0.000376978 0.000396427 +CACCTG 0.000816327 0.00048767 +CACCTT 0.000221634 0.000318357 +CACGAA 9.64683E-05 5.6617E-05 +CACGAC 0.000169958 4.37129E-05 +CACGAG 0.00029004 7.74788E-05 +CACGAT 9.15972E-05 4.82562E-05 +CACGCA 8.01608E-05 8.86086E-05 +CACGCC 0.000209879 0.00014985 +CACGCG 8.23846E-05 4.93584E-05 +CACGCT 7.57133E-05 8.506E-05 +CACGGA 8.79969E-05 8.93613E-05 +CACGGC 0.000215704 0.000100787 +CACGGG 0.000164663 9.5222E-05 +CACGGT 5.7288E-05 7.75863E-05 +CACGTA 3.87568E-05 4.58367E-05 +CACGTC 0.000150474 7.20483E-05 +CACGTG 0.00033208 0.000139473 +CACGTT 5.66526E-05 7.76669E-05 +CACTAA 8.04785E-06 0.000181008 +CACTAC 0.000305924 0.000129794 +CACTAG 8.68321E-06 0.000128692 +CACTAT 0.0001833 0.000154742 +CACTCA 0.000183618 0.000268299 +CACTCC 0.000244506 0.000328949 +CACTCG 0.000109175 6.82308E-05 +CACTCT 0.000168264 0.000292226 +CACTGA 2.6685E-05 0.000349595 +CACTGC 0.00034214 0.000447183 +CACTGG 0.000285169 0.000362849 +CACTGT 0.000218774 0.000381237 +CACTTA 0.000104198 0.000206951 +CACTTC 0.000401439 0.000290908 +CACTTG 0.000226293 0.000293059 +CACTTT 0.000246307 0.000446995 +CAGAAA 0.000882616 0.000611308 +CAGAAC 0.00069561 0.000303651 +CAGAAG 0.001308093 0.000517242 +CAGAAT 0.000596706 0.000389141 +CAGACA 0.000456186 0.000361128 +CAGACC 0.000563985 0.000279617 +CAGACG 0.000218457 9.04905E-05 +CAGACT 0.000369778 0.000297065 +CAGAGA 0.00052957 0.000542324 +CAGAGC 0.000701857 0.000484229 +CAGAGG 0.000591517 0.00054141 +CAGAGT 0.000423571 0.000354676 +CAGATA 0.000246095 0.00023499 +CAGATC 0.000675702 0.000220903 +CAGATG 0.000750991 0.000380861 +CAGATT 0.000467411 0.000320238 +CAGCAA 0.000534653 0.000356531 +CAGCAC 0.000604436 0.000381936 +CAGCAG 0.001754749 0.000593215 +CAGCAT 0.000370942 0.00033411 +CAGCCA 0.000576057 0.00050087 +CAGCCC 0.000809338 0.000587301 +CAGCCG 0.000286758 0.000188508 +CAGCCT 0.000615661 0.000743253 +CAGCGA 0.000200031 9.93621E-05 +CAGCGC 0.000418594 0.000152699 +CAGCGG 0.000465187 0.000176706 +CAGCGT 0.000157145 0.000102723 +CAGCTA 0.000263991 0.000278999 +CAGCTC 0.000791443 0.000445651 +CAGCTG 0.001692166 0.000580096 +CAGCTT 0.000463916 0.000404438 +CAGGAA 0.000947211 0.000534824 +CAGGAC 0.000887064 0.000330965 +CAGGAG 0.001733041 0.000648515 +CAGGAT 0.000633451 0.000303732 +CAGGCA 0.000539418 0.000461163 +CAGGCC 0.001043361 0.000450167 +CAGGCG 0.000304865 0.000192568 +CAGGCT 0.000618837 0.000518774 +CAGGGA 0.00038132 0.000473207 +CAGGGC 0.000751309 0.00044393 +CAGGGG 0.000370095 0.000380485 +CAGGGT 0.000317149 0.000315507 +CAGGTA 0.000162969 0.000177728 +CAGGTC 0.000429501 0.000229076 +CAGGTG 0.000948376 0.000429064 +CAGGTT 0.000287075 0.000268434 +CAGTAA 1.32366E-05 0.000251443 +CAGTAC 0.000486895 0.000134203 +CAGTAG 1.23895E-05 0.000194208 +CAGTAT 0.000392862 0.000240582 +CAGTCA 0.000326467 0.000270934 +CAGTCC 0.000421136 0.000259454 +CAGTCG 0.000117859 6.13486E-05 +CAGTCT 0.000383861 0.000317335 +CAGTGA 4.20394E-05 0.000447237 +CAGTGC 0.000402181 0.000310291 +CAGTGG 0.000451527 0.000463797 +CAGTGT 0.00037899 0.000378952 +CAGTTA 0.000210515 0.000226683 +CAGTTC 0.000566314 0.000271283 +CAGTTG 0.000356753 0.000267331 +CAGTTT 0.000542489 0.000449925 +CATAAA 0.000238047 0.000313706 +CATAAC 0.000109705 0.000128827 +CATAAG 0.000214115 0.000160334 +CATAAT 0.000163604 0.000233619 +CATACA 0.000140414 0.000201305 +CATACC 0.00011267 0.000120358 +CATACG 3.03912E-05 3.13195E-05 +CATACT 0.00024133 0.00017262 +CATAGA 9.3821E-05 0.000188051 +CATAGC 0.000100386 0.000142188 +CATAGG 6.07824E-05 0.00013786 +CATAGT 0.000110658 0.000160092 +CATATA 7.52898E-05 0.000234291 +CATATC 0.000112776 0.000141274 +CATATG 0.000134696 0.000205096 +CATATT 0.0001761 0.000322792 +CATCAA 0.000183089 0.000227248 +CATCAC 0.000177053 0.000236765 +CATCAG 0.000442632 0.000288946 +CATCAT 0.000184677 0.00026838 +CATCCA 0.000208291 0.000290021 +CATCCC 0.000170593 0.000302441 +CATCCG 4.28866E-05 5.9171E-05 +CATCCT 0.000237835 0.0003436 +CATCGA 8.79969E-05 3.77716E-05 +CATCGC 7.15835E-05 6.34186E-05 +CATCGG 0.000103034 5.16973E-05 +CATCGT 6.52299E-05 5.78537E-05 +CATCTA 9.31856E-05 0.000185471 +CATCTC 0.000191242 0.000366236 +CATCTG 0.000312913 0.000392098 +CATCTT 0.000192089 0.000388173 +CATGAA 0.000491342 0.000339514 +CATGAC 0.00030836 0.000180228 +CATGAG 0.000446232 0.000274375 +CATGAT 0.000305289 0.000241335 +CATGCA 0.000236988 0.000259024 +CATGCC 0.00032615 0.000286795 +CATGCG 5.59114E-05 5.80956E-05 +CATGCT 0.000284852 0.000266014 +CATGGA 0.000315137 0.000322255 +CATGGC 0.000315772 0.000259616 +CATGGG 0.000244824 0.000260422 +CATGGT 0.000201937 0.000271014 +CATGTA 0.000120718 0.000246604 +CATGTC 0.000212633 0.000201063 +CATGTG 0.000376661 0.000350187 +CATGTT 0.000203844 0.000372688 +CATTAA 9.00088E-06 0.000301124 +CATTAC 0.000145497 0.00014614 +CATTAG 5.71821E-06 0.000172432 +CATTAT 0.000172923 0.000269993 +CATTCA 0.000185101 0.000324862 +CATTCC 0.000169852 0.000295774 +CATTCG 3.25091E-05 4.31483E-05 +CATTCT 0.000198125 0.000386372 +CATTGA 5.18875E-06 0.000236576 +CATTGC 8.47142E-05 0.000230312 +CATTGG 7.59251E-05 0.000209827 +CATTGT 9.02206E-05 0.000320695 +CATTTA 0.000137449 0.000421778 +CATTTC 0.000186371 0.000451888 +CATTTG 0.000194207 0.000436403 +CATTTT 0.000237623 0.000797719 +CCAAAA 0.000325408 0.000388415 +CCAAAC 0.000230634 0.000245609 +CCAAAG 0.000425054 0.000422934 +CCAAAT 0.000323608 0.000333385 +CCAACA 0.000267909 0.000269401 +CCAACC 0.000211997 0.000221602 +CCAACG 6.23708E-05 5.23694E-05 +CCAACT 0.00021168 0.000223269 +CCAAGA 0.000218033 0.000367796 +CCAAGC 0.000226505 0.000272869 +CCAAGG 0.000184253 0.000383254 +CCAAGT 0.000210621 0.000286177 +CCAATA 0.000105363 0.000168023 +CCAATC 0.000119871 0.000136354 +CCAATG 0.000212527 0.000178642 +CCAATT 0.000179171 0.000184852 +CCACAA 0.00018383 0.000254266 +CCACAC 0.0001797 0.000303893 +CCACAG 0.000492719 0.000447049 +CCACAT 0.000159475 0.00027873 +CCACCA 0.000530523 0.000449011 +CCACCC 0.00049399 0.000500601 +CCACCG 0.000166569 0.000153613 +CCACCT 0.000526711 0.000472588 +CCACGA 8.09021E-05 7.24515E-05 +CCACGC 9.09619E-05 0.000129606 +CCACGG 0.000132578 0.000126972 +CCACGT 5.77116E-05 0.000112401 +CCACTA 9.57271E-05 0.000150898 +CCACTC 0.00020702 0.000280182 +CCACTG 0.000450256 0.000449173 +CCACTT 0.000187536 0.000293865 +CCAGAA 0.000764546 0.000453393 +CCAGAC 0.00052544 0.00029529 +CCAGAG 0.000940116 0.000522431 +CCAGAT 0.000562396 0.000281284 +CCAGCA 0.000454492 0.000514258 +CCAGCC 0.000677502 0.00073664 +CCAGCG 0.000138614 0.00016641 +CCAGCT 0.000494731 0.000568375 +CCAGGA 0.000514639 0.000585338 +CCAGGC 0.000532323 0.000648784 +CCAGGG 0.00047387 0.000522807 +CCAGGT 0.000286863 0.000350617 +CCAGTA 0.000174405 0.000177728 +CCAGTC 0.000241435 0.000261229 +CCAGTG 0.000497272 0.000388469 +CCAGTT 0.000284852 0.000299081 +CCATAA 6.45946E-06 0.000179717 +CCATAC 0.000148568 0.000113557 +CCATAG 6.56535E-06 0.000145978 +CCATAT 0.000196113 0.000183777 +CCATCA 0.00025446 0.000277063 +CCATCC 0.000275427 0.000326422 +CCATCG 6.54417E-05 6.48703E-05 +CCATCT 0.000309736 0.000397556 +CCATGA 1.27071E-05 0.000260557 +CCATGC 9.51976E-05 0.000256443 +CCATGG 0.000144014 0.000304458 +CCATGT 0.000112246 0.000328761 +CCATTA 0.000102928 0.000185229 +CCATTC 0.000172393 0.000266901 +CCATTG 0.000164028 0.000234802 +CCATTT 0.000271191 0.000436698 +CCCAAA 0.000517922 0.000409196 +CCCAAC 0.000556572 0.000230124 +CCCAAG 0.000875627 0.000372446 +CCCAAT 0.000310901 0.000158506 +CCCACA 0.000388521 0.000356182 +CCCACC 0.000621591 0.000554073 +CCCACG 0.000245142 0.000134956 +CCCACT 0.000311748 0.000337874 +CCCAGA 0.000252025 0.000496999 +CCCAGC 0.000863767 0.000778524 +CCCAGG 0.000404404 0.000770379 +CCCAGT 0.000375708 0.000345912 +CCCATA 0.000101339 0.000138317 +CCCATC 0.000611319 0.00032091 +CCCATG 0.000510721 0.000277923 +CCCATT 0.000280086 0.000268944 +CCCCAA 0.00021168 0.000343923 +CCCCAC 0.000333456 0.000471728 +CCCCAG 0.000885475 0.000627438 +CCCCAT 0.000178112 0.00031091 +CCCCCA 0.000567585 0.000472884 +CCCCCC 0.000311325 0.000270423 +CCCCCG 0.000320008 0.000194799 +CCCCCT 0.000421665 0.000349327 +CCCCGA 0.000156827 0.000129095 +CCCCGC 0.000252025 0.00029685 +CCCCGG 0.00035707 0.00024077 +CCCCGT 8.6938E-05 0.000124176 +CCCCTA 8.84205E-05 0.000158103 +CCCCTC 0.00035347 0.00051208 +CCCCTG 0.000796314 0.000458662 +CCCCTT 0.000173241 0.000359704 +CCCGAA 0.000131731 7.06234E-05 +CCCGAC 0.000281992 8.89312E-05 +CCCGAG 0.000525652 0.000178696 +CCCGAT 0.000105257 4.13471E-05 +CCCGCA 0.000114364 0.000121326 +CCCGCC 0.000339069 0.000353305 +CCCGCG 0.000177159 0.00016891 +CCCGCT 0.00010261 0.000143156 +CCCGGA 0.000122518 0.000149527 +CCCGGC 0.000317361 0.000303947 +CCCGGG 0.000269815 0.000300264 +CCCGGT 7.16894E-05 8.73182E-05 +CCCGTA 3.12384E-05 3.45186E-05 +CCCGTC 0.000142743 0.000122482 +CCCGTG 0.000333456 0.000122858 +CCCGTT 4.79694E-05 6.36337E-05 +CCCTAA 1.02716E-05 0.000172163 +CCCTAC 0.000454809 0.000154931 +CCCTAG 1.0907E-05 0.000169233 +CCCTAT 0.000297876 0.000130574 +CCCTCA 0.000272992 0.00035723 +CCCTCC 0.000455551 0.000650397 +CCCTCG 0.000187642 0.000141059 +CCCTCT 0.000305183 0.000459361 +CCCTGA 3.08148E-05 0.00038285 +CCCTGC 0.000375813 0.000563132 +CCCTGG 0.000364165 0.000580123 +CCCTGT 0.00020342 0.00039925 +CCCTTA 9.11737E-05 0.000165227 +CCCTTC 0.000547148 0.000436107 +CCCTTG 0.000282734 0.000304243 +CCCTTT 0.000294064 0.000406051 +CCGAAA 6.21591E-05 5.987E-05 +CCGAAC 5.54878E-05 4.00298E-05 +CCGAAG 0.000129189 8.65923E-05 +CCGAAT 4.92401E-05 4.16428E-05 +CCGACA 4.73341E-05 5.09446E-05 +CCGACC 8.61967E-05 7.93875E-05 +CCGACG 4.0451E-05 3.80404E-05 +CCGACT 4.56398E-05 6.29347E-05 +CCGAGA 4.53221E-05 0.000123235 +CCGAGC 0.000113623 0.000147403 +CCGAGG 8.06903E-05 0.000190148 +CCGAGT 5.62291E-05 9.56521E-05 +CCGATA 1.8743E-05 1.81196E-05 +CCGATC 4.59575E-05 3.34971E-05 +CCGATG 6.85126E-05 4.6912E-05 +CCGATT 3.78037E-05 3.91964E-05 +CCGCAA 3.86509E-05 5.14285E-05 +CCGCAC 0.000121671 9.12432E-05 +CCGCAG 0.000283793 0.000176411 +CCGCAT 3.98157E-05 5.20199E-05 +CCGCCA 0.000162016 0.00015407 +CCGCCC 0.000337268 0.000328787 +CCGCCG 0.000271509 0.000248728 +CCGCCT 0.000165299 0.000231684 +CCGCGA 3.01794E-05 5.86333E-05 +CCGCGC 0.000133954 0.000192138 +CCGCGG 0.000145708 0.000187567 +CCGCGT 2.35082E-05 5.76924E-05 +CCGCTA 3.11325E-05 3.6293E-05 +CCGCTC 0.000159686 0.000135682 +CCGCTG 0.000429289 0.00017762 +CCGCTT 5.18875E-05 8.32588E-05 +CCGGAA 0.000131519 0.000100249 +CCGGAC 0.000207656 9.0141E-05 +CCGGAG 0.00036893 0.00018297 +CCGGAT 0.000121353 5.24769E-05 +CCGGCA 0.000113411 0.000110599 +CCGGCC 0.000352623 0.000267197 +CCGGCG 0.00014052 0.000153775 +CCGGCT 0.000130672 0.000165039 +CCGGGA 0.000117647 0.000189422 +CCGGGC 0.000286016 0.000268165 +CCGGGG 0.000215915 0.000222893 +CCGGGT 8.84205E-05 0.000127644 +CCGGTA 2.54143E-05 3.03786E-05 +CCGGTC 9.91156E-05 6.98707E-05 +CCGGTG 0.000240377 0.000102696 +CCGGTT 4.84989E-05 5.44932E-05 +CCGTAA 1.37661E-06 3.41423E-05 +CCGTAC 8.05844E-05 2.51362E-05 +CCGTAG 2.22375E-06 4.16966E-05 +CCGTAT 5.7288E-05 3.29863E-05 +CCGTCA 5.5276E-05 6.1698E-05 +CCGTCC 0.000142532 0.000120089 +CCGTCG 6.13119E-05 4.51646E-05 +CCGTCT 6.53358E-05 0.000122455 +CCGTGA 4.87107E-06 8.6216E-05 +CCGTGC 7.65605E-05 0.000106056 +CCGTGG 8.81028E-05 0.000151516 +CCGTGT 4.6381E-05 0.000104873 +CCGTTA 2.20257E-05 3.25561E-05 +CCGTTC 7.85724E-05 5.88484E-05 +CCGTTG 5.24169E-05 5.6402E-05 +CCGTTT 5.77116E-05 8.08661E-05 +CCTAAA 0.000251178 0.000246443 +CCTAAC 0.000127813 0.000129526 +CCTAAG 0.000224704 0.000176653 +CCTAAT 0.000169428 0.00017426 +CCTACA 0.000165193 0.000178427 +CCTACC 0.000172817 0.000182782 +CCTACG 4.14041E-05 3.74221E-05 +CCTACT 0.000180759 0.000173937 +CCTAGA 0.000101445 0.000207891 +CCTAGC 0.000123047 0.000151678 +CCTAGG 8.12197E-05 0.000200176 +CCTAGT 0.000122094 0.000134902 +CCTATA 8.25964E-05 0.000129284 +CCTATC 0.00012866 0.000115869 +CCTATG 0.000157039 0.000152108 +CCTATT 0.000161592 0.000189046 +CCTCAA 0.000222798 0.000284806 +CCTCAC 0.000212844 0.000336234 +CCTCAG 0.000565997 0.000589935 +CCTCAT 0.000218245 0.000302038 +CCTCCA 0.000593847 0.000501407 +CCTCCC 0.000464446 0.00086345 +CCTCCG 0.000147509 0.000189987 +CCTCCT 0.000590458 0.000677092 +CCTCGA 0.00013999 6.98438E-05 +CCTCGC 0.000116058 0.000152215 +CCTCGG 0.000198655 0.000192245 +CCTCGT 8.22787E-05 8.98453E-05 +CCTCTA 0.000115105 0.000202864 +CCTCTC 0.000248424 0.000493154 +CCTCTG 0.000520463 0.000591952 +CCTCTT 0.00023667 0.000433795 +CCTGAA 0.000678137 0.00038191 +CCTGAC 0.000494943 0.000320077 +CCTGAG 0.000976649 0.000522189 +CCTGAT 0.000524381 0.000249185 +CCTGCA 0.00043416 0.000457506 +CCTGCC 0.000667866 0.00071766 +CCTGCG 0.000143908 0.00017176 +CCTGCT 0.00052131 0.000521409 +CCTGGA 0.000566314 0.000543749 +CCTGGC 0.000604542 0.000596388 +CCTGGG 0.000587493 0.00077218 +CCTGGT 0.000354635 0.000335051 +CCTGTA 0.000210621 0.000323169 +CCTGTC 0.000355482 0.000374382 +CCTGTG 0.00074845 0.000502268 +CCTGTT 0.00032382 0.000360994 +CCTTAA 1.03775E-05 0.000232141 +CCTTAC 0.00021295 0.000146059 +CCTTAG 6.77714E-06 0.000181922 +CCTTAT 0.000239847 0.000203993 +CCTTCA 0.000320749 0.000362043 +CCTTCC 0.000345634 0.000578537 +CCTTCG 6.95715E-05 8.44685E-05 +CCTTCT 0.000373378 0.000482508 +CCTTGA 1.12246E-05 0.000295882 +CCTTGC 0.000119447 0.000278972 +CCTTGG 0.000138084 0.000399545 +CCTTGT 0.000126754 0.000306877 +CCTTTA 0.000149626 0.000279483 +CCTTTC 0.000266638 0.000430461 +CCTTTG 0.000259967 0.000402718 +CCTTTT 0.000255837 0.000547513 +CGAAAA 0.000152486 5.69665E-05 +CGAAAC 0.000108117 4.56485E-05 +CGAAAG 0.000171334 5.65633E-05 +CGAAAT 0.000122836 4.32558E-05 +CGAACA 8.96912E-05 3.87394E-05 +CGAACC 7.94196E-05 3.78522E-05 +CGAACG 2.1708E-05 1.69367E-05 +CGAACT 7.05246E-05 5.20468E-05 +CGAAGA 0.00012273 6.98707E-05 +CGAAGC 0.00012093 6.91448E-05 +CGAAGG 0.000105257 7.73175E-05 +CGAAGT 9.14913E-05 5.02456E-05 +CGAATA 5.87705E-05 3.11582E-05 +CGAATC 0.000109175 3.65618E-05 +CGAATG 0.000124848 4.44925E-05 +CGAATT 0.000111611 3.93577E-05 +CGACAA 6.78773E-05 3.35777E-05 +CGACAC 7.87842E-05 4.30945E-05 +CGACAG 0.000162228 6.71823E-05 +CGACAT 7.61369E-05 3.52983E-05 +CGACCA 7.53956E-05 4.47882E-05 +CGACCC 8.81028E-05 7.68604E-05 +CGACCG 2.45671E-05 3.08356E-05 +CGACCT 7.52898E-05 6.09184E-05 +CGACGA 3.70625E-05 1.8711E-05 +CGACGC 5.09344E-05 3.81479E-05 +CGACGG 6.04648E-05 3.83361E-05 +CGACGT 3.08148E-05 2.10499E-05 +CGACTA 4.80753E-05 2.04047E-05 +CGACTC 8.79969E-05 5.98431E-05 +CGACTG 0.000155768 5.97355E-05 +CGACTT 7.58192E-05 4.98961E-05 +CGAGAA 0.000240271 7.78551E-05 +CGAGAC 0.000228728 9.91739E-05 +CGAGAG 0.000319796 0.000104981 +CGAGAT 0.000198867 6.73974E-05 +CGAGCA 0.000131307 7.19676E-05 +CGAGCC 0.00020342 0.000120923 +CGAGCG 4.36278E-05 7.69411E-05 +CGAGCT 0.000144861 7.74788E-05 +CGAGGA 0.000160533 0.000120546 +CGAGGC 0.000212844 0.000156947 +CGAGGG 0.000158098 0.000129203 +CGAGGT 0.000100598 8.33663E-05 +CGAGTA 7.51839E-05 4.42237E-05 +CGAGTC 0.000117012 5.90097E-05 +CGAGTG 0.000218774 7.56507E-05 +CGAGTT 9.25503E-05 5.46276E-05 +CGATAA 4.4475E-06 2.34157E-05 +CGATAC 7.96314E-05 1.68292E-05 +CGATAG 1.58839E-06 1.92756E-05 +CGATAT 6.68183E-05 2.07811E-05 +CGATCA 5.88764E-05 3.12657E-05 +CGATCC 6.38533E-05 4.14815E-05 +CGATCG 1.36602E-05 1.30924E-05 +CGATCT 6.49123E-05 5.98968E-05 +CGATGA 3.17678E-06 4.29601E-05 +CGATGC 5.16757E-05 4.46269E-05 +CGATGG 5.74998E-05 5.52728E-05 +CGATGT 5.46407E-05 4.21536E-05 +CGATTA 4.43691E-05 2.41147E-05 +CGATTC 8.73615E-05 5.66439E-05 +CGATTG 5.15698E-05 3.25024E-05 +CGATTT 0.000105893 5.76386E-05 +CGCAAA 0.000154498 4.36322E-05 +CGCAAC 0.000204055 3.56746E-05 +CGCAAG 0.000383226 4.81218E-05 +CGCAAT 9.24444E-05 3.12388E-05 +CGCACA 0.000126436 7.55163E-05 +CGCACC 0.000232435 9.61092E-05 +CGCACG 0.000112035 4.65356E-05 +CGCACT 7.92078E-05 5.93592E-05 +CGCAGA 9.71037E-05 9.85287E-05 +CGCAGC 0.0003612 0.000176223 +CGCAGG 0.000203314 0.000149796 +CGCAGT 0.000109917 7.55163E-05 +CGCATA 3.69566E-05 2.19908E-05 +CGCATC 0.000368295 5.15091E-05 +CGCATG 0.000238788 5.69934E-05 +CGCATT 0.000112246 4.25031E-05 +CGCCAA 8.33376E-05 6.15099E-05 +CGCCAC 0.000241647 0.000132617 +CGCCAG 0.000402392 0.00013786 +CGCCAT 0.000101869 8.17533E-05 +CGCCCA 0.000126754 0.000159393 +CGCCCC 0.000181394 0.000260691 +CGCCCG 9.41387E-05 0.000188347 +CGCCCT 0.000112988 0.000148156 +CGCCGA 7.09482E-05 6.20475E-05 +CGCCGC 0.000291205 0.000274375 +CGCCGG 0.000225552 0.00014985 +CGCCGT 6.89362E-05 5.59718E-05 +CGCCTA 6.93598E-05 4.00298E-05 +CGCCTC 0.000333986 0.000210123 +CGCCTG 0.000622226 0.000218968 +CGCCTT 0.00011807 9.94427E-05 +CGCGAA 2.97559E-05 2.45179E-05 +CGCGAC 0.000131836 3.4438E-05 +CGCGAG 0.000226505 7.26397E-05 +CGCGAT 4.07687E-05 2.45717E-05 +CGCGCA 3.90744E-05 6.8177E-05 +CGCGCC 0.000213162 0.000179583 +CGCGCG 9.83744E-05 0.000140655 +CGCGCT 4.18276E-05 9.05442E-05 +CGCGGA 3.72743E-05 8.53826E-05 +CGCGGC 0.000185101 0.000184153 +CGCGGG 0.000119447 0.000168265 +CGCGGT 3.01794E-05 6.53811E-05 +CGCGTA 1.45073E-05 1.21514E-05 +CGCGTC 0.000102504 5.8015E-05 +CGCGTG 0.000228728 6.19131E-05 +CGCGTT 2.4673E-05 2.87924E-05 +CGCTAA 3.91803E-06 3.54865E-05 +CGCTAC 0.000335045 3.0513E-05 +CGCTAG 3.70625E-06 3.40079E-05 +CGCTAT 0.00015185 2.45986E-05 +CGCTCA 0.000112882 6.64027E-05 +CGCTCC 0.000286546 0.000155011 +CGCTCG 0.000100069 6.87416E-05 +CGCTCT 0.000126012 0.000110815 +CGCTGA 1.30248E-05 9.22379E-05 +CGCTGC 0.000354317 0.000193778 +CGCTGG 0.000288028 0.000159555 +CGCTGT 0.000172288 0.000109793 +CGCTTA 4.09805E-05 2.97602E-05 +CGCTTC 0.000428972 9.27487E-05 +CGCTTG 0.000110552 7.84735E-05 +CGCTTT 0.000173135 7.76401E-05 +CGGAAA 0.000229787 8.07855E-05 +CGGAAC 0.00021168 5.76117E-05 +CGGAAG 0.000437337 0.00011939 +CGGAAT 0.000127177 4.73959E-05 +CGGACA 0.000130354 6.51123E-05 +CGGACC 0.0001833 8.12694E-05 +CGGACG 7.70899E-05 5.32566E-05 +CGGACT 9.42446E-05 6.0542E-05 +CGGAGA 0.000147509 0.000119525 +CGGAGC 0.000247895 0.00019445 +CGGAGG 0.000217716 0.000191627 +CGGAGT 0.000110234 8.65117E-05 +CGGATA 6.16296E-05 2.59428E-05 +CGGATC 0.000201726 6.42251E-05 +CGGATG 0.000190289 5.987E-05 +CGGATT 0.000104093 4.35784E-05 +CGGCAA 0.000106952 5.00036E-05 +CGGCAC 0.000222904 7.98176E-05 +CGGCAG 0.00052724 0.000168426 +CGGCAT 9.82685E-05 5.12134E-05 +CGGCCA 0.000164346 0.00012923 +CGGCCC 0.000399428 0.000226172 +CGGCCG 0.000153545 0.000178104 +CGGCCT 0.000189866 0.000176733 +CGGCGA 9.2868E-05 6.56768E-05 +CGGCGC 0.00026219 0.000174179 +CGGCGG 0.000309525 0.00029107 +CGGCGT 6.81949E-05 5.68052E-05 +CGGCTA 7.67723E-05 4.74766E-05 +CGGCTC 0.000298406 0.000171733 +CGGCTG 0.000661406 0.000189664 +CGGCTT 0.000118176 9.2453E-05 +CGGGAA 0.000289934 0.000114148 +CGGGAC 0.000428336 0.00011318 +CGGGAG 0.000715517 0.000233512 +CGGGAT 0.000214115 6.62414E-05 +CGGGCA 0.00021941 0.000132214 +CGGGCC 0.000457457 0.000199719 +CGGGCG 0.000154392 0.000186008 +CGGGCT 0.000202573 0.000137618 +CGGGGA 0.000141685 0.000169475 +CGGGGC 0.000385132 0.000255368 +CGGGGG 0.00015831 0.000175846 +CGGGGT 0.000124 0.000120627 +CGGGTA 6.21591E-05 3.52445E-05 +CGGGTC 0.000169852 8.83667E-05 +CGGGTG 0.000315984 0.000127025 +CGGGTT 7.12658E-05 7.6027E-05 +CGGTAA 2.01196E-06 2.80666E-05 +CGGTAC 0.000103881 2.47061E-05 +CGGTAG 2.8591E-06 3.91426E-05 +CGGTAT 6.11001E-05 2.35501E-05 +CGGTCA 7.23248E-05 5.12672E-05 +CGGTCC 0.000117435 7.91724E-05 +CGGTCG 3.10266E-05 3.36584E-05 +CGGTCT 7.38073E-05 5.72354E-05 +CGGTGA 6.14178E-06 8.13769E-05 +CGGTGC 8.40789E-05 0.000105975 +CGGTGG 9.9751E-05 0.000152941 +CGGTGT 5.73939E-05 6.87685E-05 +CGGTTA 3.97098E-05 2.85505E-05 +CGGTTC 0.00012273 6.17787E-05 +CGGTTG 7.47603E-05 5.27458E-05 +CGGTTT 0.000100704 6.60263E-05 +CGTAAA 7.08423E-05 4.04062E-05 +CGTAAC 4.16159E-05 2.72332E-05 +CGTAAG 6.60771E-05 3.23411E-05 +CGTAAT 4.79694E-05 3.34433E-05 +CGTACA 4.35219E-05 3.11044E-05 +CGTACC 4.80753E-05 2.57546E-05 +CGTACG 9.84803E-06 8.14575E-06 +CGTACT 4.69105E-05 2.78784E-05 +CGTAGA 4.28866E-05 3.6051E-05 +CGTAGC 4.72282E-05 3.88469E-05 +CGTAGG 3.02853E-05 4.1831E-05 +CGTAGT 3.72743E-05 3.15883E-05 +CGTATA 2.06491E-05 2.7045E-05 +CGTATC 5.89823E-05 2.76633E-05 +CGTATG 3.99216E-05 3.27981E-05 +CGTATT 5.38994E-05 4.64281E-05 +CGTCAA 4.8393E-05 3.51639E-05 +CGTCAC 6.23708E-05 6.03539E-05 +CGTCAG 0.000130989 6.98169E-05 +CGTCAT 6.29003E-05 4.77185E-05 +CGTCCA 6.86185E-05 7.01395E-05 +CGTCCC 7.83606E-05 0.000117885 +CGTCCG 2.00137E-05 5.29877E-05 +CGTCCT 8.46083E-05 9.87975E-05 +CGTCGA 3.89685E-05 1.57001E-05 +CGTCGC 5.85587E-05 4.68314E-05 +CGTCGG 6.06766E-05 4.60517E-05 +CGTCGT 3.0603E-05 2.35501E-05 +CGTCTA 3.75919E-05 2.83623E-05 +CGTCTC 9.72096E-05 0.000121998 +CGTCTG 0.000167946 0.000107804 +CGTCTT 7.1054E-05 7.79627E-05 +CGTGAA 0.00012453 7.72099E-05 +CGTGAC 0.000129189 6.88222E-05 +CGTGAG 0.000198231 0.000113799 +CGTGAT 0.000108752 7.05697E-05 +CGTGCA 7.77253E-05 8.0651E-05 +CGTGCC 0.000174935 0.000113584 +CGTGCG 3.07089E-05 5.28264E-05 +CGTGCT 9.90097E-05 8.95495E-05 +CGTGGA 0.000110023 0.000117105 +CGTGGC 0.000180865 0.000128504 +CGTGGG 0.000129189 0.000143855 +CGTGGT 8.71497E-05 0.000108395 +CGTGTA 4.98755E-05 4.88208E-05 +CGTGTC 0.000111187 8.00327E-05 +CGTGTG 0.000229152 0.000130278 +CGTGTT 7.65605E-05 8.8286E-05 +CGTTAA 2.22375E-06 3.48681E-05 +CGTTAC 7.4019E-05 2.94914E-05 +CGTTAG 1.58839E-06 3.35777E-05 +CGTTAT 6.33239E-05 3.57822E-05 +CGTTCA 6.35357E-05 5.21812E-05 +CGTTCC 9.26562E-05 7.08385E-05 +CGTTCG 1.58839E-05 1.99208E-05 +CGTTCT 9.71037E-05 7.12417E-05 +CGTTGA 2.43553E-06 4.30945E-05 +CGTTGC 4.58516E-05 5.50578E-05 +CGTTGG 5.00873E-05 6.11873E-05 +CGTTGT 4.20394E-05 5.53535E-05 +CGTTTA 4.96637E-05 5.37405E-05 +CGTTTC 0.000104622 8.56245E-05 +CGTTTG 7.25365E-05 7.40108E-05 +CGTTTT 9.62565E-05 0.000113852 +CTAAAA 0.000237729 0.000409627 +CTAAAC 0.000120824 0.000159689 +CTAAAG 0.000272886 0.000241308 +CTAAAT 0.000184148 0.000290908 +CTAACA 0.0001222 0.000183481 +CTAACC 8.54555E-05 0.000124902 +CTAACG 2.372E-05 3.32551E-05 +CTAACT 0.00010674 0.000165308 +CTAAGA 0.000108117 0.000223726 +CTAAGC 0.000103351 0.000141301 +CTAAGG 9.39269E-05 0.000171733 +CTAAGT 0.000108222 0.000168991 +CTAATA 6.34298E-05 0.000202138 +CTAATC 7.41249E-05 0.000119337 +CTAATG 0.000146026 0.000188697 +CTAATT 0.000117435 0.000289242 +CTACAA 0.000169111 0.000207408 +CTACAC 0.000100175 0.000132321 +CTACAG 0.000355164 0.00026018 +CTACAT 0.000103669 0.000190229 +CTACCA 0.000115953 0.00018211 +CTACCC 0.000125483 0.000168507 +CTACCG 2.6685E-05 3.1884E-05 +CTACCT 0.000130672 0.00022171 +CTACGA 5.07226E-05 2.78246E-05 +CTACGC 6.11001E-05 2.96258E-05 +CTACGG 7.3066E-05 3.65618E-05 +CTACGT 3.63212E-05 4.05944E-05 +CTACTA 6.97833E-05 0.000161087 +CTACTC 0.000107587 0.000176303 +CTACTG 0.000205961 0.00021886 +CTACTT 0.000119129 0.000250637 +CTAGAA 0.000363318 0.000270584 +CTAGAC 0.000185206 0.000122159 +CTAGAG 0.000355906 0.000215715 +CTAGAT 0.000204479 0.000150011 +CTAGCA 0.000146238 0.000157915 +CTAGCC 0.00014772 0.000139661 +CTAGCG 3.4521E-05 3.6508E-05 +CTAGCT 0.00014232 0.000160738 +CTAGGA 0.000136496 0.000213214 +CTAGGC 0.000124742 0.000150925 +CTAGGG 0.000106528 0.000155979 +CTAGGT 8.24905E-05 0.00013466 +CTAGTA 6.0253E-05 0.000109443 +CTAGTC 6.67124E-05 0.000109148 +CTAGTG 0.000180653 0.000140978 +CTAGTT 7.71958E-05 0.000172889 +CTATAA 6.67124E-06 0.000211763 +CTATAC 6.29003E-05 9.77491E-05 +CTATAG 3.81214E-06 0.000127509 +CTATAT 8.31258E-05 0.000194745 +CTATCA 7.6984E-05 0.000145226 +CTATCC 7.20071E-05 0.000115788 +CTATCG 1.59898E-05 2.13456E-05 +CTATCT 7.91019E-05 0.000178077 +CTATGA 6.24767E-06 0.000178911 +CTATGC 4.79694E-05 0.000122213 +CTATGG 6.40651E-05 0.000152081 +CTATGT 5.74998E-05 0.000207676 +CTATTA 5.17816E-05 0.000187917 +CTATTC 5.94058E-05 0.000167082 +CTATTG 7.82548E-05 0.000161168 +CTATTT 0.000122412 0.000400889 +CTCAAA 0.000370413 0.000375323 +CTCAAC 0.00060253 0.000183696 +CTCAAG 0.000786783 0.000309566 +CTCAAT 0.00034341 0.000187702 +CTCACA 0.000315349 0.000309781 +CTCACC 0.00064944 0.000327766 +CTCACG 0.000199926 0.000101674 +CTCACT 0.00033621 0.000396669 +CTCAGA 0.000203102 0.000417208 +CTCAGC 0.000681102 0.000532189 +CTCAGG 0.000285487 0.000455974 +CTCAGT 0.000353788 0.000342928 +CTCATA 0.000130778 0.000165845 +CTCATC 0.000816327 0.000261739 +CTCATG 0.000511992 0.000280934 +CTCATT 0.00040864 0.000344864 +CTCCAA 0.000215704 0.000302791 +CTCCAC 0.00028877 0.000353682 +CTCCAG 0.000741461 0.000610206 +CTCCAT 0.00020575 0.000345912 +CTCCCA 0.000286228 0.000594882 +CTCCCC 0.000311325 0.000559476 +CTCCCG 0.000153121 0.000238888 +CTCCCT 0.000283051 0.000582569 +CTCCGA 0.000167946 8.27211E-05 +CTCCGC 0.000243659 0.000188401 +CTCCGG 0.000299465 0.000161463 +CTCCGT 0.000102504 0.0001131 +CTCCTA 0.000147085 0.000217435 +CTCCTC 0.00051125 0.000553992 +CTCCTG 0.001019853 0.000740565 +CTCCTT 0.000269391 0.000452157 +CTCGAA 6.37474E-05 6.13754E-05 +CTCGAC 0.000122624 4.2745E-05 +CTCGAG 0.000170487 7.75863E-05 +CTCGAT 7.91019E-05 3.94921E-05 +CTCGCA 5.87705E-05 6.22357E-05 +CTCGCC 0.000198549 0.00014251 +CTCGCG 4.46867E-05 7.47904E-05 +CTCGCT 7.41249E-05 0.000123235 +CTCGGA 6.53358E-05 9.34477E-05 +CTCGGC 0.000177688 0.000189396 +CTCGGG 0.000139461 0.000161221 +CTCGGT 5.07226E-05 7.82046E-05 +CTCGTA 2.61555E-05 2.74751E-05 +CTCGTC 0.000128554 6.6322E-05 +CTCGTG 0.000223434 9.1512E-05 +CTCGTT 4.80753E-05 5.96818E-05 +CTCTAA 8.36553E-06 0.000222893 +CTCTAC 0.000550536 0.000219559 +CTCTAG 1.04834E-05 0.000201009 +CTCTAT 0.000321702 0.000182218 +CTCTCA 0.000272992 0.000338197 +CTCTCC 0.000524805 0.000521597 +CTCTCG 0.000139461 9.73996E-05 +CTCTCT 0.000340445 0.000553266 +CTCTGA 3.99216E-05 0.000429332 +CTCTGC 0.000478424 0.000552056 +CTCTGG 0.000372213 0.000500924 +CTCTGT 0.000280192 0.000533372 +CTCTTA 0.000130883 0.000234829 +CTCTTC 0.000847777 0.000456512 +CTCTTG 0.000273945 0.000355725 +CTCTTT 0.000425159 0.000500709 +CTGAAA 0.000695821 0.000511328 +CTGAAC 0.000569915 0.000249561 +CTGAAG 0.001336261 0.000479524 +CTGAAT 0.000439879 0.000354676 +CTGACA 0.000405781 0.000291016 +CTGACC 0.000684173 0.000321475 +CTGACG 0.000232011 7.30967E-05 +CTGACT 0.000386932 0.00032005 +CTGAGA 0.000344151 0.000482723 +CTGAGC 0.000786254 0.000422692 +CTGAGG 0.000496108 0.000573214 +CTGAGT 0.000370625 0.000347929 +CTGATA 0.00015365 0.000199531 +CTGATC 0.000442208 0.000191116 +CTGATG 0.000592046 0.000315023 +CTGATT 0.000337586 0.000316394 +CTGCAA 0.000370519 0.00037121 +CTGCAC 0.000727483 0.000362661 +CTGCAG 0.001986125 0.000643837 +CTGCAT 0.000331656 0.000343331 +CTGCCA 0.000437443 0.000476083 +CTGCCC 0.001057022 0.000617545 +CTGCCG 0.000310795 0.000183024 +CTGCCT 0.000604436 0.000737231 +CTGCGA 0.000182241 8.2506E-05 +CTGCGC 0.00058834 0.000157323 +CTGCGG 0.000729601 0.000191089 +CTGCGT 0.000189866 0.000108879 +CTGCTA 0.000243659 0.000218295 +CTGCTC 0.000980991 0.000450409 +CTGCTG 0.002398895 0.000681206 +CTGCTT 0.000435007 0.000479954 +CTGGAA 0.001007781 0.00052571 +CTGGAC 0.001392702 0.000318652 +CTGGAG 0.002456712 0.000647816 +CTGGAT 0.000907925 0.0003204 +CTGGCA 0.000688091 0.00038199 +CTGGCC 0.001667599 0.000522888 +CTGGCG 0.000458727 0.000131945 +CTGGCT 0.000845236 0.000503693 +CTGGGA 0.00053137 0.000682254 +CTGGGC 0.001278655 0.000606496 +CTGGGG 0.0007745 0.000584397 +CTGGGT 0.000407899 0.000406508 +CTGGTA 0.000228411 0.000176491 +CTGGTC 0.000593635 0.000289645 +CTGGTG 0.001510243 0.000376882 +CTGGTT 0.000339916 0.000308598 +CTGTAA 1.41896E-05 0.000408578 +CTGTAC 0.00044655 0.000212274 +CTGTAG 1.59898E-05 0.000264777 +CTGTAT 0.00028824 0.000314835 +CTGTCA 0.00031429 0.000349273 +CTGTCC 0.000615343 0.000395835 +CTGTCG 0.000173664 8.49255E-05 +CTGTCT 0.000395615 0.000483987 +CTGTGA 3.4521E-05 0.000469389 +CTGTGC 0.000446232 0.000420783 +CTGTGG 0.000439984 0.000515683 +CTGTGT 0.00031609 0.000527995 +CTGTTA 0.000131942 0.000271203 +CTGTTC 0.000419865 0.000329943 +CTGTTG 0.000344257 0.000364113 +CTGTTT 0.000381532 0.000564987 +CTTAAA 0.000261555 0.000403228 +CTTAAC 0.000126436 0.000173158 +CTTAAG 0.000203314 0.000214532 +CTTAAT 0.000197808 0.000267197 +CTTACA 0.00015365 0.000207166 +CTTACC 0.000140096 0.000159797 +CTTACG 3.63212E-05 3.91695E-05 +CTTACT 0.000192831 0.000212731 +CTTAGA 9.41387E-05 0.000218242 +CTTAGC 0.000103563 0.000161678 +CTTAGG 6.66065E-05 0.000176895 +CTTAGT 0.000123047 0.000170201 +CTTATA 9.27621E-05 0.000192084 +CTTATC 0.000148462 0.000145172 +CTTATG 0.000161275 0.000181196 +CTTATT 0.000225234 0.000331395 +CTTCAA 0.000297029 0.000315023 +CTTCAC 0.00019569 0.000290182 +CTTCAG 0.000588446 0.000458609 +CTTCAT 0.000266638 0.000364758 +CTTCCA 0.000320432 0.000435381 +CTTCCC 0.000262508 0.000558939 +CTTCCG 6.38533E-05 0.000113449 +CTTCCT 0.000372213 0.00065572 +CTTCGA 0.000181606 4.98155E-05 +CTTCGC 0.000116906 7.52474E-05 +CTTCGG 0.000191031 8.75333E-05 +CTTCGT 0.000101763 6.92792E-05 +CTTCTA 0.000147509 0.000266579 +CTTCTC 0.00029057 0.000479712 +CTTCTG 0.000512945 0.000539179 +CTTCTT 0.000280298 0.000463985 +CTTGAA 0.000425795 0.00041702 +CTTGAC 0.000235082 0.000191681 +CTTGAG 0.000335045 0.000322282 +CTTGAT 0.000330491 0.000243324 +CTTGCA 0.000254143 0.000259186 +CTTGCC 0.000282945 0.000297549 +CTTGCG 4.51103E-05 5.75311E-05 +CTTGCT 0.000323502 0.000364543 +CTTGGA 0.000337586 0.000369409 +CTTGGC 0.000269179 0.000348977 +CTTGGG 0.000239847 0.000375968 +CTTGGT 0.000204585 0.000271283 +CTTGTA 0.000136284 0.000229936 +CTTGTC 0.000204479 0.000245797 +CTTGTG 0.00032615 0.000301285 +CTTGTT 0.000227564 0.000354354 +CTTTAA 9.21267E-06 0.000417262 +CTTTAC 0.000137766 0.00021179 +CTTTAG 4.65928E-06 0.00023956 +CTTTAT 0.000220681 0.000400863 +CTTTCA 0.000219939 0.000402368 +CTTTCC 0.000244295 0.000473664 +CTTTCG 3.54741E-05 6.03001E-05 +CTTTCT 0.00033208 0.000622572 +CTTTGA 7.62428E-06 0.000393953 +CTTTGC 0.000124848 0.000358628 +CTTTGG 0.000138296 0.00045299 +CTTTGT 0.000160004 0.000474524 +CTTTTA 0.000165193 0.000486729 +CTTTTC 0.00025319 0.000543615 +CTTTTG 0.000250966 0.000487025 +CTTTTT 0.000263355 0.000862106 +GAAAAA 0.001054057 0.000755539 +GAAAAC 0.00066363 0.000411804 +GAAAAG 0.001044209 0.000524366 +GAAAAT 0.000944034 0.000614319 +GAAACA 0.000513792 0.000427719 +GAAACC 0.000394133 0.000323222 +GAAACG 0.000108858 8.09736E-05 +GAAACT 0.00042823 0.000370618 +GAAAGA 0.000522581 0.000516973 +GAAAGC 0.000384073 0.000321744 +GAAAGG 0.000377931 0.000395136 +GAAAGT 0.000433843 0.000327712 +GAAATA 0.000419865 0.000465814 +GAAATC 0.00046434 0.000280397 +GAAATG 0.000687985 0.000492133 +GAAATT 0.00062985 0.000437021 +GAACAA 0.000460634 0.00027838 +GAACAC 0.000265155 0.000210069 +GAACAG 0.000669348 0.000295559 +GAACAT 0.000352411 0.000254804 +GAACCA 0.000350187 0.000244695 +GAACCC 0.000264838 0.000218 +GAACCG 7.45485E-05 6.22895E-05 +GAACCT 0.000349446 0.000223242 +GAACGA 0.00015831 4.78261E-05 +GAACGC 0.00013639 5.36598E-05 +GAACGG 0.000178535 5.53266E-05 +GAACGT 0.000115 5.52191E-05 +GAACTA 0.000243553 0.000165819 +GAACTC 0.000351564 0.000241657 +GAACTG 0.00065357 0.000305748 +GAACTT 0.000449091 0.000277386 +GAAGAA 0.001988454 0.000610098 +GAAGAC 0.000900724 0.000325319 +GAAGAG 0.001626407 0.000493665 +GAAGAT 0.001304175 0.000348278 +GAAGCA 0.000651558 0.000377608 +GAAGCC 0.000744003 0.000352364 +GAAGCG 0.000118176 9.80985E-05 +GAAGCT 0.000693915 0.000349945 +GAAGGA 0.000675066 0.000476029 +GAAGGC 0.000562926 0.000302603 +GAAGGG 0.000494625 0.000375753 +GAAGGT 0.00039318 0.000249212 +GAAGTA 0.000361624 0.000222893 +GAAGTC 0.000398898 0.00023577 +GAAGTG 0.000693703 0.000337121 +GAAGTT 0.000498967 0.00031341 +GAATAA 1.63075E-05 0.000353117 +GAATAC 0.000294064 0.000154742 +GAATAG 8.36553E-06 0.000168157 +GAATAT 0.000449938 0.000300398 +GAATCA 0.000288028 0.000252357 +GAATCC 0.000255413 0.000192702 +GAATCG 4.96637E-05 5.409E-05 +GAATCT 0.00037539 0.000256954 +GAATGA 1.71546E-05 0.000326637 +GAATGC 0.000260708 0.000203106 +GAATGG 0.000289299 0.000268971 +GAATGT 0.000597129 0.000338008 +GAATTA 0.000300418 0.000268675 +GAATTC 0.000346163 0.000258191 +GAATTG 0.000322338 0.000264051 +GAATTT 0.000549372 0.00044479 +GACAAA 0.00062985 0.000288139 +GACAAC 0.00060666 0.000146489 +GACAAG 0.00095547 0.000235393 +GACAAT 0.000436384 0.000172459 +GACACA 0.000483612 0.000285263 +GACACC 0.00061259 0.000207085 +GACACG 0.000240271 7.05159E-05 +GACACT 0.000365224 0.000226172 +GACAGA 0.000375708 0.000377877 +GACAGC 0.000831576 0.000265261 +GACAGG 0.000386826 0.000291957 +GACAGT 0.000484565 0.000230071 +GACATA 0.000214009 0.000148828 +GACATC 0.000849154 0.000182917 +GACATG 0.000719012 0.000222812 +GACATT 0.000547571 0.000289376 +GACCAA 0.000298618 0.000208241 +GACCAC 0.000443902 0.00019289 +GACCAG 0.000857837 0.000301285 +GACCAT 0.000273627 0.000178535 +GACCCA 0.000439667 0.000256417 +GACCCC 0.000689256 0.000267036 +GACCCG 0.000218669 9.71845E-05 +GACCCT 0.000505532 0.000258191 +GACCGA 0.000178959 5.51922E-05 +GACCGC 0.000330174 6.77469E-05 +GACCGG 0.000330068 7.16719E-05 +GACCGT 0.000127813 4.92509E-05 +GACCTA 0.000196325 0.000116971 +GACCTC 0.000674219 0.000270235 +GACCTG 0.001327154 0.000298866 +GACCTT 0.000364059 0.000230635 +GACGAA 0.000185312 4.38204E-05 +GACGAC 0.000360883 4.0675E-05 +GACGAG 0.00056462 6.40369E-05 +GACGAT 0.000178959 3.29863E-05 +GACGCA 0.000113199 6.18593E-05 +GACGCC 0.000344469 9.55177E-05 +GACGCG 0.000131201 6.0085E-05 +GACGCT 0.000124848 6.91179E-05 +GACGGA 0.000105893 8.24254E-05 +GACGGC 0.00037539 8.50868E-05 +GACGGG 0.000246836 0.000108099 +GACGGT 9.12796E-05 4.98692E-05 +GACGTA 5.05108E-05 2.88731E-05 +GACGTC 0.000218033 5.67514E-05 +GACGTG 0.000484989 8.81516E-05 +GACGTT 8.45024E-05 5.29608E-05 +GACTAA 1.15423E-05 0.000137107 +GACTAC 0.000517074 0.000124391 +GACTAG 1.3872E-05 0.000112266 +GACTAT 0.000379308 0.000125681 +GACTCA 0.000315455 0.000231791 +GACTCC 0.000493884 0.000250798 +GACTCG 0.00019389 7.0032E-05 +GACTCT 0.000393392 0.000258594 +GACTGA 3.82273E-05 0.000270934 +GACTGC 0.000431301 0.000224855 +GACTGG 0.00049399 0.000279644 +GACTGT 0.000355482 0.000255583 +GACTTA 0.000193254 0.000154716 +GACTTC 0.000778735 0.000262116 +GACTTG 0.000447609 0.000252572 +GACTTT 0.000540583 0.000347256 +GAGAAA 0.001312858 0.000605125 +GAGAAC 0.000875627 0.000259347 +GAGAAG 0.002071262 0.000512645 +GAGAAT 0.000656429 0.000355295 +GAGACA 0.000498649 0.00036422 +GAGACC 0.000735002 0.000310506 +GAGACG 0.000281781 0.00012966 +GAGACT 0.00040864 0.000305506 +GAGAGA 0.000513156 0.00053391 +GAGAGC 0.000746438 0.000305425 +GAGAGG 0.000574998 0.00047568 +GAGAGT 0.000445597 0.00024198 +GAGATA 0.000231376 0.000202515 +GAGATC 0.000816433 0.000199988 +GAGATG 0.000909831 0.000374812 +GAGATT 0.000520569 0.000283273 +GAGCAA 0.000439879 0.000271713 +GAGCAC 0.000545348 0.000217139 +GAGCAG 0.001501453 0.000448715 +GAGCAT 0.000328691 0.000208779 +GAGCCA 0.000544818 0.00042823 +GAGCCC 0.000840153 0.00038605 +GAGCCG 0.000306136 0.000204155 +GAGCCT 0.000575104 0.000341799 +GAGCGA 0.000210938 9.40122E-05 +GAGCGC 0.000469846 0.000133881 +GAGCGG 0.000586646 0.000149393 +GAGCGT 0.000178218 6.6322E-05 +GAGCTA 0.000283051 0.000161383 +GAGCTC 0.000780853 0.000282144 +GAGCTG 0.002084393 0.00049388 +GAGCTT 0.000400063 0.000264401 +GAGGAA 0.001468944 0.00051778 +GAGGAC 0.00130301 0.000288112 +GAGGAG 0.002958961 0.000584236 +GAGGAT 0.000921691 0.000273541 +GAGGCA 0.000620637 0.000427908 +GAGGCC 0.00133139 0.000463044 +GAGGCG 0.000414464 0.00021679 +GAGGCT 0.000755651 0.000468502 +GAGGGA 0.00037899 0.000437935 +GAGGGC 0.000868003 0.000323948 +GAGGGG 0.000513686 0.000434709 +GAGGGT 0.000323926 0.00024612 +GAGGTA 0.000208079 0.000151006 +GAGGTC 0.000491554 0.000229156 +GAGGTG 0.001188434 0.000359327 +GAGGTT 0.000319902 0.00025725 +GAGTAA 1.23895E-05 0.000177836 +GAGTAC 0.000492931 9.79641E-05 +GAGTAG 9.42446E-06 0.00018668 +GAGTAT 0.000352729 0.000148048 +GAGTCA 0.000308254 0.000229667 +GAGTCC 0.000459469 0.000215553 +GAGTCG 0.000150685 6.5408E-05 +GAGTCT 0.000372637 0.0002569 +GAGTGA 3.02853E-05 0.000293059 +GAGTGC 0.000386614 0.000239103 +GAGTGG 0.000429183 0.000287494 +GAGTGT 0.00039265 0.000235582 +GAGTTA 0.000189442 0.00017141 +GAGTTC 0.000615872 0.00023499 +GAGTTG 0.000353364 0.000228915 +GAGTTT 0.000526605 0.000337336 +GATAAA 0.00051771 0.000290613 +GATAAC 0.000243447 0.000121434 +GATAAG 0.000346799 0.000148317 +GATAAT 0.000340128 0.000217946 +GATACA 0.000238153 0.000178427 +GATACC 0.000208609 0.000103744 +GATACG 4.6381E-05 2.73945E-05 +GATACT 0.00023127 0.000151221 +GATAGA 0.000165616 0.000158694 +GATAGC 0.000164981 9.37703E-05 +GATAGG 8.91617E-05 0.00010783 +GATAGT 0.000174405 0.000109175 +GATATA 0.000199502 0.000167835 +GATATC 0.000294276 0.000106943 +GATATG 0.000293535 0.000161759 +GATATT 0.000439984 0.000269401 +GATCAA 0.000245671 0.000156167 +GATCAC 0.000195902 0.000193831 +GATCAG 0.000407052 0.000180605 +GATCAT 0.000221422 0.000185309 +GATCCA 0.000312172 0.000187514 +GATCCC 0.000307407 0.000162996 +GATCCG 6.34298E-05 5.24769E-05 +GATCCT 0.000401969 0.00021429 +GATCGA 0.000123789 3.51101E-05 +GATCGC 9.78449E-05 5.10252E-05 +GATCGG 0.000114047 3.6508E-05 +GATCGT 8.36553E-05 3.84437E-05 +GATCTA 0.000138084 0.000112454 +GATCTC 0.000279557 0.000208779 +GATCTG 0.000442314 0.000237517 +GATCTT 0.000324244 0.000226603 +GATGAA 0.001348862 0.000319486 +GATGAC 0.000963836 0.000179583 +GATGAG 0.001275267 0.000264051 +GATGAT 0.001037749 0.000226764 +GATGCA 0.00052491 0.000221172 +GATGCC 0.000793772 0.000213376 +GATGCG 0.000125589 4.99499E-05 +GATGCT 0.000694233 0.000264912 +GATGGA 0.000704081 0.000318814 +GATGGC 0.000695504 0.000215607 +GATGGG 0.00058887 0.00028908 +GATGGT 0.000455445 0.000221898 +GATGTA 0.000303806 0.000185175 +GATGTC 0.00051125 0.000178857 +GATGTG 0.000907183 0.000285424 +GATGTT 0.000511356 0.00027795 +GATTAA 1.19659E-05 0.000211037 +GATTAC 0.000247577 0.000197326 +GATTAG 9.10678E-06 0.00011974 +GATTAT 0.000330068 0.000225178 +GATTCA 0.000289299 0.000222516 +GATTCC 0.00029237 0.000207219 +GATTCG 5.14639E-05 3.47337E-05 +GATTCT 0.000391062 0.000308893 +GATTGA 1.11187E-05 0.000176223 +GATTGC 0.000112776 0.000161974 +GATTGG 0.00013406 0.000175335 +GATTGT 0.000164557 0.000218564 +GATTTA 0.000239424 0.00026924 +GATTTC 0.000353258 0.000295801 +GATTTG 0.000358553 0.000297145 +GATTTT 0.000444644 0.000546814 +GCAAAA 0.000408958 0.000352983 +GCAAAC 0.000197066 0.000197595 +GCAAAG 0.00041277 0.000300022 +GCAAAT 0.0002983 0.000278165 +GCAACA 0.000231482 0.000240071 +GCAACC 0.000175782 0.000178615 +GCAACG 5.54878E-05 4.87401E-05 +GCAACT 0.000208821 0.000171599 +GCAAGA 0.00020755 0.000280719 +GCAAGC 0.000149097 0.000185229 +GCAAGG 0.000147826 0.000243808 +GCAAGT 0.0001815 0.000194934 +GCAATA 0.000115529 0.000180013 +GCAATC 0.000123153 0.000128692 +GCAATG 0.000249589 0.000201574 +GCAATT 0.000223116 0.000200821 +GCACAA 0.000191454 0.000202407 +GCACAC 0.000168158 0.000220876 +GCACAG 0.00044835 0.000351289 +GCACAT 0.000148038 0.000228941 +GCACCA 0.000240165 0.000252196 +GCACCC 0.000290252 0.000248271 +GCACCG 7.26424E-05 9.70232E-05 +GCACCT 0.000277651 0.000270047 +GCACGA 6.95715E-05 4.90627E-05 +GCACGC 8.11139E-05 8.12425E-05 +GCACGG 0.000127707 8.85011E-05 +GCACGT 5.10403E-05 7.21827E-05 +GCACTA 9.56212E-05 0.00011603 +GCACTC 0.000181288 0.000210849 +GCACTG 0.000462751 0.000304054 +GCACTT 0.000191666 0.000296016 +GCAGAA 0.000787207 0.000401051 +GCAGAC 0.000468787 0.000237033 +GCAGAG 0.000964895 0.000517887 +GCAGAT 0.000547995 0.000266471 +GCAGCA 0.000542595 0.000409196 +GCAGCC 0.000800232 0.000495117 +GCAGCG 0.000179382 0.00018539 +GCAGCT 0.000624132 0.000412933 +GCAGGA 0.000404616 0.000450382 +GCAGGC 0.000407687 0.000355429 +GCAGGG 0.000402181 0.000449737 +GCAGGT 0.000243447 0.000268407 +GCAGTA 0.000189654 0.000178723 +GCAGTC 0.000239 0.000205284 +GCAGTG 0.000573198 0.000469013 +GCAGTT 0.000270132 0.000265315 +GCATAA 7.09482E-06 0.000149419 +GCATAC 0.000113623 9.42004E-05 +GCATAG 5.08285E-06 0.000125493 +GCATAT 0.000168581 0.00018504 +GCATCA 0.000197384 0.000201439 +GCATCC 0.000231058 0.000211252 +GCATCG 5.40053E-05 5.08102E-05 +GCATCT 0.000292476 0.000266095 +GCATGA 7.51839E-06 0.000226522 +GCATGC 7.70899E-05 0.00018125 +GCATGG 0.000126224 0.000253406 +GCATGT 0.000113199 0.000239614 +GCATTA 0.000126542 0.000176921 +GCATTC 0.000161063 0.000203187 +GCATTG 0.000184359 0.00020609 +GCATTT 0.00032435 0.000430542 +GCCAAA 0.000602847 0.000274402 +GCCAAC 0.000691056 0.000184126 +GCCAAG 0.001305446 0.000347821 +GCCAAT 0.000404404 0.00015442 +GCCACA 0.000533488 0.000316905 +GCCACC 0.000869591 0.000410809 +GCCACG 0.000312913 0.000121514 +GCCACT 0.000427066 0.000314485 +GCCAGA 0.000295758 0.000335481 +GCCAGC 0.000845236 0.000402072 +GCCAGG 0.000442632 0.000567998 +GCCAGT 0.000411711 0.000258245 +GCCATA 0.000172393 0.000139446 +GCCATC 0.001003016 0.000228458 +GCCATG 0.000842589 0.000286822 +GCCATT 0.000489648 0.000269159 +GCCCAA 0.000268756 0.000215768 +GCCCAC 0.000467517 0.000300344 +GCCCAG 0.001259912 0.00064744 +GCCCAT 0.000245354 0.000207676 +GCCCCA 0.00046381 0.000421993 +GCCCCC 0.000530735 0.000391722 +GCCCCG 0.000301688 0.000271982 +GCCCCT 0.000443691 0.000415783 +GCCCGA 0.000183618 9.55446E-05 +GCCCGC 0.000403875 0.000233189 +GCCCGG 0.000543442 0.000277198 +GCCCGT 0.000133107 8.37427E-05 +GCCCTA 0.000164557 0.000136005 +GCCCTC 0.000589293 0.000361236 +GCCCTG 0.001542858 0.000538131 +GCCCTT 0.000303489 0.000288435 +GCCGAA 0.00014232 5.77999E-05 +GCCGAC 0.000334303 6.44133E-05 +GCCGAG 0.000664371 0.00020394 +GCCGAT 0.000116694 3.6051E-05 +GCCGCA 0.000138825 0.000112266 +GCCGCC 0.000646687 0.000352714 +GCCGCG 0.000242918 0.000187164 +GCCGCT 0.000170487 0.000150199 +GCCGGA 0.000102081 0.000126649 +GCCGGC 0.000374543 0.000208779 +GCCGGG 0.000293323 0.000296258 +GCCGGT 7.45485E-05 7.71562E-05 +GCCGTA 4.55339E-05 3.15077E-05 +GCCGTC 0.000235506 8.87968E-05 +GCCGTG 0.000545136 0.000129579 +GCCGTT 6.81949E-05 6.0085E-05 +GCCTAA 9.63624E-06 0.000137241 +GCCTAC 0.000522051 0.00011681 +GCCTAG 1.2813E-05 0.000166464 +GCCTAT 0.000324561 0.000125843 +GCCTCA 0.000347964 0.000423714 +GCCTCC 0.000715623 0.000648945 +GCCTCG 0.000251389 0.000162969 +GCCTCT 0.000447715 0.000435381 +GCCTGA 3.51564E-05 0.000334003 +GCCTGC 0.000515168 0.000435758 +GCCTGG 0.000526711 0.000671178 +GCCTGT 0.000315878 0.000413417 +GCCTTA 0.000155027 0.000179099 +GCCTTC 0.000990097 0.000350241 +GCCTTG 0.000494625 0.00032169 +GCCTTT 0.000520145 0.000378952 +GCGAAA 5.7288E-05 4.50302E-05 +GCGAAC 4.00275E-05 3.33358E-05 +GCGAAG 0.000102187 6.95481E-05 +GCGAAT 3.71684E-05 3.21529E-05 +GCGACA 4.70164E-05 5.78268E-05 +GCGACC 7.78312E-05 6.41176E-05 +GCGACG 3.90744E-05 4.22611E-05 +GCGACT 4.20394E-05 5.16435E-05 +GCGAGA 3.20855E-05 9.39047E-05 +GCGAGC 7.12658E-05 0.0001074 +GCGAGG 6.05707E-05 0.000142887 +GCGAGT 3.13443E-05 5.55148E-05 +GCGATA 1.76841E-05 1.91143E-05 +GCGATC 5.11462E-05 4.81756E-05 +GCGATG 7.93137E-05 5.11328E-05 +GCGATT 4.4475E-05 5.5004E-05 +GCGCAA 3.43093E-05 3.94384E-05 +GCGCAC 0.000122624 8.01671E-05 +GCGCAG 0.000270662 0.000159286 +GCGCAT 3.28268E-05 4.69389E-05 +GCGCCA 7.22189E-05 0.000108556 +GCGCCC 0.000278286 0.000223403 +GCGCCG 0.000139778 0.000165819 +GCGCCT 9.9751E-05 0.000145575 +GCGCGA 2.4673E-05 5.39018E-05 +GCGCGC 0.000150474 0.000174368 +GCGCGG 0.000147509 0.000200875 +GCGCGT 2.41435E-05 5.18855E-05 +GCGCTA 4.08746E-05 3.12388E-05 +GCGCTC 0.000208079 0.000116729 +GCGCTG 0.000573939 0.000165953 +GCGCTT 6.05707E-05 6.90373E-05 +GCGGAA 0.000124424 7.78551E-05 +GCGGAC 0.00020575 7.48173E-05 +GCGGAG 0.000416688 0.000211817 +GCGGAT 0.00010621 5.9386E-05 +GCGGCA 0.000147509 0.00011275 +GCGGCC 0.000502461 0.000236496 +GCGGCG 0.000337374 0.000318679 +GCGGCT 0.000193784 0.000161894 +GCGGGA 9.80567E-05 0.000164447 +GCGGGC 0.000324455 0.000228511 +GCGGGG 0.000217716 0.000274402 +GCGGGT 7.70899E-05 9.87975E-05 +GCGGTA 3.37798E-05 3.14539E-05 +GCGGTC 0.000107693 6.48165E-05 +GCGGTG 0.000317255 0.000146919 +GCGGTT 5.17816E-05 5.89559E-05 +GCGTAA 1.16482E-06 2.53513E-05 +GCGTAC 5.61232E-05 1.64259E-05 +GCGTAG 1.4825E-06 3.45993E-05 +GCGTAT 4.10864E-05 2.45717E-05 +GCGTCA 4.36278E-05 5.00305E-05 +GCGTCC 0.000136284 0.000103233 +GCGTCG 5.67585E-05 4.41161E-05 +GCGTCT 6.17355E-05 8.42534E-05 +GCGTGA 4.4475E-06 9.869E-05 +GCGTGC 6.56535E-05 9.34208E-05 +GCGTGG 7.05246E-05 0.00014985 +GCGTGT 4.32042E-05 8.97646E-05 +GCGTTA 2.20257E-05 2.71794E-05 +GCGTTC 7.87842E-05 5.62407E-05 +GCGTTG 6.14178E-05 5.3391E-05 +GCGTTT 6.98892E-05 7.29623E-05 +GCTAAA 0.000260073 0.000196036 +GCTAAC 0.000120506 0.000113261 +GCTAAG 0.000232011 0.000161275 +GCTAAT 0.000180441 0.00019695 +GCTACA 0.000208926 0.00016477 +GCTACC 0.00016964 0.000123665 +GCTACG 4.17217E-05 3.36852E-05 +GCTACT 0.000186371 0.000208026 +GCTAGA 9.42446E-05 0.000155845 +GCTAGC 8.68321E-05 9.42004E-05 +GCTAGG 6.59712E-05 0.000143156 +GCTAGT 0.000102716 0.000111944 +GCTATA 0.000107799 0.000130359 +GCTATC 0.000148038 9.03023E-05 +GCTATG 0.000202785 0.000145468 +GCTATT 0.00022894 0.000197649 +GCTCAA 0.000208503 0.000197407 +GCTCAC 0.000202149 0.000300156 +GCTCAG 0.000572668 0.000374947 +GCTCAT 0.000197172 0.000206574 +GCTCCA 0.000335998 0.000313598 +GCTCCC 0.000317678 0.000384598 +GCTCCG 9.60447E-05 0.000157995 +GCTCCT 0.000339069 0.000388469 +GCTCGA 0.000129083 4.33365E-05 +GCTCGC 0.000117647 0.000100088 +GCTCGG 0.00017197 0.00012388 +GCTCGT 7.85724E-05 4.93853E-05 +GCTCTA 0.00013406 0.000151113 +GCTCTC 0.000259649 0.000296177 +GCTCTG 0.00061852 0.000490762 +GCTCTT 0.000266214 0.000310775 +GCTGAA 0.000692962 0.000334406 +GCTGAC 0.000565997 0.00022714 +GCTGAG 0.000984485 0.000527243 +GCTGAT 0.000508815 0.000224291 +GCTGCA 0.000581034 0.000404572 +GCTGCC 0.000926244 0.000506811 +GCTGCG 0.000201514 0.000189019 +GCTGCT 0.000739449 0.000526678 +GCTGGA 0.000512097 0.000491326 +GCTGGC 0.000530735 0.00037492 +GCTGGG 0.000514427 0.000710804 +GCTGGT 0.000331444 0.000318625 +GCTGTA 0.000229681 0.000221306 +GCTGTC 0.000451315 0.000294457 +GCTGTG 0.000976543 0.000491676 +GCTGTT 0.000399004 0.000342767 +GCTTAA 5.92999E-06 0.000181572 +GCTTAC 0.000146661 0.000115519 +GCTTAG 4.4475E-06 0.000147591 +GCTTAT 0.000206067 0.000177755 +GCTTCA 0.000269285 0.00029529 +GCTTCC 0.00031429 0.000374382 +GCTTCG 6.4171E-05 7.68873E-05 +GCTTCT 0.000337268 0.00039105 +GCTTGA 7.83606E-06 0.000232893 +GCTTGC 0.000115 0.000191439 +GCTTGG 0.000138402 0.000277708 +GCTTGT 0.000155239 0.000220554 +GCTTTA 0.000186477 0.000257922 +GCTTTC 0.000311219 0.000318921 +GCTTTG 0.000346269 0.000368737 +GCTTTT 0.000340869 0.000470868 +GGAAAA 0.00063038 0.000625798 +GGAAAC 0.000357388 0.000337551 +GGAAAG 0.000502249 0.00047197 +GGAAAT 0.000445385 0.000427235 +GGAACA 0.000304865 0.000287252 +GGAACC 0.000244718 0.000219855 +GGAACG 5.84528E-05 7.29354E-05 +GGAACT 0.000241012 0.000257277 +GGAAGA 0.000306136 0.000547325 +GGAAGC 0.000294382 0.000385378 +GGAAGG 0.000178641 0.00050259 +GGAAGT 0.000274686 0.000321851 +GGAATA 0.000170911 0.00023948 +GGAATC 0.00024313 0.000204397 +GGAATG 0.000330174 0.000300344 +GGAATT 0.00032022 0.000310426 +GGACAA 0.000216127 0.000224102 +GGACAC 0.000207656 0.000244964 +GGACAG 0.000424842 0.000352821 +GGACAT 0.000185524 0.000221306 +GGACCA 0.000262296 0.000241496 +GGACCC 0.000289405 0.000281499 +GGACCG 5.59114E-05 8.67267E-05 +GGACCT 0.000252872 0.000238351 +GGACGA 8.41847E-05 5.77461E-05 +GGACGC 9.40328E-05 0.000110331 +GGACGG 9.48799E-05 0.000106406 +GGACGT 7.02069E-05 7.04352E-05 +GGACTA 9.76331E-05 0.000142349 +GGACTC 0.000212315 0.000242545 +GGACTG 0.00031429 0.000298167 +GGACTT 0.000216974 0.000270826 +GGAGAA 0.000798855 0.000520602 +GGAGAC 0.000553184 0.000314781 +GGAGAG 0.000879334 0.000530012 +GGAGAT 0.000532641 0.000281902 +GGAGCA 0.000360459 0.000352956 +GGAGCC 0.000500767 0.000455436 +GGAGCG 9.11737E-05 0.000165523 +GGAGCT 0.000392015 0.000390136 +GGAGGA 0.000474611 0.000576816 +GGAGGC 0.000474188 0.000600339 +GGAGGG 0.000298935 0.000563374 +GGAGGT 0.000276592 0.0003179 +GGAGTA 0.000161381 0.000152189 +GGAGTC 0.000243236 0.00025061 +GGAGTG 0.000340763 0.0003493 +GGAGTT 0.000231587 0.000305049 +GGATAA 3.91803E-06 0.000180524 +GGATAC 0.000137872 0.000106217 +GGATAG 3.07089E-06 0.000118127 +GGATAT 0.000196749 0.000180847 +GGATCA 0.000176735 0.000202031 +GGATCC 0.000167522 0.000186573 +GGATCG 3.53682E-05 4.91971E-05 +GGATCT 0.000198337 0.000199558 +GGATGA 6.98892E-06 0.000248136 +GGATGC 8.45024E-05 0.000209827 +GGATGG 0.000131836 0.000306205 +GGATGT 0.000103351 0.000229345 +GGATTA 0.000135437 0.00024983 +GGATTC 0.000205961 0.000205149 +GGATTG 0.000132366 0.000192111 +GGATTT 0.000345528 0.000355268 +GGCAAA 0.000494413 0.000268837 +GGCAAC 0.00046434 0.000175712 +GGCAAG 0.000771111 0.000248163 +GGCAAT 0.00026039 0.00014985 +GGCACA 0.00032742 0.000279859 +GGCACC 0.000632709 0.000264885 +GGCACG 0.000173452 9.48187E-05 +GGCACT 0.000296712 0.000225097 +GGCAGA 0.000220575 0.000436 +GGCAGC 0.000843859 0.000436887 +GGCAGG 0.000274686 0.00055168 +GGCAGT 0.00038132 0.000265423 +GGCATA 0.000121035 0.000126945 +GGCATC 0.00068682 0.000203698 +GGCATG 0.000514851 0.000266552 +GGCATT 0.000337692 0.000245233 +GGCCAA 0.000199608 0.000277386 +GGCCAC 0.000477576 0.00029736 +GGCCAG 0.00080775 0.000492294 +GGCCAT 0.000227352 0.00022093 +GGCCCA 0.000349976 0.000360752 +GGCCCC 0.000609201 0.00042124 +GGCCCG 0.000204267 0.000196412 +GGCCCT 0.000408111 0.000359999 +GGCCGA 0.000167311 0.000128504 +GGCCGC 0.000453645 0.000235259 +GGCCGG 0.000371048 0.000257788 +GGCCGT 0.000141155 9.30444E-05 +GGCCTA 0.000126118 0.000127993 +GGCCTC 0.000597976 0.000445543 +GGCCTG 0.001031925 0.000477615 +GGCCTT 0.000277227 0.000304995 +GGCGAA 9.44563E-05 5.62407E-05 +GGCGAC 0.0002426 8.14575E-05 +GGCGAG 0.000453327 0.000128262 +GGCGAT 0.000113199 4.44925E-05 +GGCGCA 8.51378E-05 0.000108771 +GGCGCC 0.00039085 0.00021765 +GGCGCG 0.000190183 0.000176545 +GGCGCT 0.000116694 0.000127939 +GGCGGA 0.000104304 0.000164071 +GGCGGC 0.000566844 0.000361263 +GGCGGG 0.000243977 0.000329271 +GGCGGT 0.00011214 0.000100894 +GGCGTA 3.69566E-05 2.74214E-05 +GGCGTC 0.000218139 8.77214E-05 +GGCGTG 0.000334198 0.000166625 +GGCGTT 6.21591E-05 5.96549E-05 +GGCTAA 6.45946E-06 0.000172163 +GGCTAC 0.000578386 0.000121272 +GGCTAG 5.92999E-06 0.000137376 +GGCTAT 0.000317255 0.000120143 +GGCTCA 0.000281675 0.000372635 +GGCTCC 0.000728119 0.000392233 +GGCTCG 0.000158733 0.000100518 +GGCTCT 0.000408217 0.000343735 +GGCTGA 1.94843E-05 0.000410111 +GGCTGC 0.000506697 0.000489579 +GGCTGG 0.000506379 0.00066486 +GGCTGT 0.000298935 0.000349192 +GGCTTA 0.000115211 0.000159124 +GGCTTC 0.000764334 0.000325965 +GGCTTG 0.000269603 0.000248002 +GGCTTT 0.000410758 0.000350832 +GGGAAA 0.000591199 0.000482696 +GGGAAC 0.000374013 0.000229667 +GGGAAG 0.000761157 0.000515521 +GGGAAT 0.000254143 0.000240905 +GGGACA 0.000256684 0.000294188 +GGGACC 0.00036173 0.000285908 +GGGACG 0.000107481 0.000112723 +GGGACT 0.000216445 0.000290451 +GGGAGA 0.00018383 0.000461405 +GGGAGC 0.000344363 0.000397314 +GGGAGG 0.000191031 0.000742151 +GGGAGT 0.00017017 0.000252465 +GGGATA 9.21267E-05 0.000140817 +GGGATC 0.000264097 0.000169421 +GGGATG 0.000283475 0.000267761 +GGGATT 0.000188807 0.000307764 +GGGCAA 0.000149097 0.000238055 +GGGCAC 0.000216974 0.000247303 +GGGCAG 0.00061026 0.000506596 +GGGCAT 0.000126118 0.000207327 +GGGCCA 0.000281039 0.000328895 +GGGCCC 0.000529358 0.000380431 +GGGCCG 0.0001761 0.000225984 +GGGCCT 0.000345634 0.000367661 +GGGCGA 8.28081E-05 9.48994E-05 +GGGCGC 0.000183618 0.000206171 +GGGCGG 0.000189442 0.000296312 +GGGCGT 6.7136E-05 9.87169E-05 +GGGCTA 8.83146E-05 0.000131381 +GGGCTC 0.000331868 0.000338949 +GGGCTG 0.000771217 0.000524366 +GGGCTT 0.000184253 0.000277386 +GGGGAA 0.000424524 0.000394276 +GGGGAC 0.000656853 0.000278219 +GGGGAG 0.000908878 0.000499875 +GGGGAT 0.000347222 0.000202434 +GGGGCA 0.00028284 0.000314405 +GGGGCC 0.000660241 0.000416724 +GGGGCG 0.000182771 0.00023456 +GGGGCT 0.000390109 0.000392851 +GGGGGA 0.000190289 0.00032204 +GGGGGC 0.000588128 0.000351639 +GGGGGG 0.000135966 0.000235555 +GGGGGT 0.000202573 0.000239157 +GGGGTA 7.93137E-05 0.000119605 +GGGGTC 0.000282204 0.000221683 +GGGGTG 0.000370625 0.000351854 +GGGGTT 0.000148568 0.000245314 +GGGTAA 3.49446E-06 0.00013052 +GGGTAC 0.000126542 9.12163E-05 +GGGTAG 3.60035E-06 0.000145226 +GGGTAT 9.30797E-05 0.000114471 +GGGTCA 0.000121777 0.00019082 +GGGTCC 0.00021708 0.000238404 +GGGTCG 4.81812E-05 7.26397E-05 +GGGTCT 0.000168687 0.000259159 +GGGTGA 6.88303E-06 0.00025897 +GGGTGC 8.31258E-05 0.000224291 +GGGTGG 0.000100704 0.000444844 +GGGTGT 6.84067E-05 0.000233915 +GGGTTA 6.04648E-05 0.000116863 +GGGTTC 0.000143167 0.000213483 +GGGTTG 0.000115 0.000199369 +GGGTTT 0.000155556 0.000318383 +GGTAAA 0.000185842 0.00019617 +GGTAAC 0.000103563 0.000109443 +GGTAAG 7.04187E-05 0.000115949 +GGTAAT 0.000106422 0.000141569 +GGTACA 0.000106952 0.00012845 +GGTACC 0.000125483 0.000102964 +GGTACG 1.53545E-05 2.43566E-05 +GGTACT 0.000103881 0.000115197 +GGTAGA 7.3066E-05 0.000163802 +GGTAGC 9.92215E-05 0.000115197 +GGTAGG 3.61094E-05 0.000152054 +GGTAGT 7.8043E-05 0.000106325 +GGTATA 6.04648E-05 0.000114014 +GGTATC 0.000117329 9.31788E-05 +GGTATG 9.15972E-05 0.000115008 +GGTATT 0.000139461 0.000180793 +GGTCAA 0.000123789 0.00013837 +GGTCAC 0.000138614 0.000182917 +GGTCAG 0.000252766 0.0002701 +GGTCAT 0.000140731 0.000159178 +GGTCCA 0.000168793 0.000161141 +GGTCCC 0.00024493 0.000258675 +GGTCCG 4.75459E-05 6.69941E-05 +GGTCCT 0.000248319 0.000229398 +GGTCGA 6.59712E-05 2.92763E-05 +GGTCGC 7.39131E-05 6.23164E-05 +GGTCGG 7.4019E-05 7.77207E-05 +GGTCGT 5.85587E-05 3.84168E-05 +GGTCTA 6.22649E-05 9.80985E-05 +GGTCTC 0.000179488 0.000279832 +GGTCTG 0.000254566 0.000259965 +GGTCTT 0.000165404 0.000241792 +GGTGAA 0.000439137 0.000273326 +GGTGAC 0.000391168 0.000226468 +GGTGAG 0.00039911 0.00027416 +GGTGAT 0.000341186 0.000214129 +GGTGCA 0.000227987 0.00022507 +GGTGCC 0.000399322 0.000268165 +GGTGCG 6.19473E-05 9.31519E-05 +GGTGCT 0.000323079 0.000282198 +GGTGGA 0.000338116 0.000329648 +GGTGGC 0.000475776 0.000376264 +GGTGGG 0.000292793 0.000489659 +GGTGGT 0.0002965 0.00026846 +GGTGTA 0.000121247 0.000117455 +GGTGTC 0.000273945 0.000203509 +GGTGTG 0.000424206 0.000326099 +GGTGTT 0.000238153 0.000227866 +GGTTAA 4.55339E-06 0.000157511 +GGTTAC 0.000142002 9.3609E-05 +GGTTAG 2.8591E-06 0.000109685 +GGTTAT 0.000181288 0.000143451 +GGTTCA 0.000138825 0.000223081 +GGTTCC 0.000186795 0.000208375 +GGTTCG 2.61555E-05 4.07825E-05 +GGTTCT 0.000208926 0.000241926 +GGTTGA 3.07089E-06 0.000164474 +GGTTGC 7.03128E-05 0.000180578 +GGTTGG 8.66203E-05 0.000223592 +GGTTGT 9.21267E-05 0.00018254 +GGTTTA 0.000115 0.000184368 +GGTTTC 0.000205008 0.000301366 +GGTTTG 0.000167628 0.000262331 +GGTTTT 0.000222057 0.000433392 +GTAAAA 0.000271403 0.000384463 +GTAAAC 0.000118176 0.00017305 +GTAAAG 0.000220045 0.000216279 +GTAAAT 0.000199608 0.000337713 +GTAACA 0.00015058 0.000190659 +GTAACC 0.000106952 0.000111863 +GTAACG 2.36141E-05 2.78515E-05 +GTAACT 0.000141896 0.000191788 +GTAAGA 9.10678E-05 0.000185686 +GTAAGC 7.3066E-05 0.0001099 +GTAAGG 5.58055E-05 0.000128289 +GTAAGT 6.01471E-05 0.000157108 +GTAATA 8.18551E-05 0.000207703 +GTAATC 7.81489E-05 0.000194073 +GTAATG 0.000126754 0.000177674 +GTAATT 0.000152274 0.000269993 +GTACAA 0.00013872 0.00015942 +GTACAC 8.42906E-05 0.000106379 +GTACAG 0.000238471 0.000193751 +GTACAT 0.000111082 0.000191788 +GTACCA 0.000128236 0.000133934 +GTACCC 0.000108964 0.000107158 +GTACCG 2.19198E-05 2.72601E-05 +GTACCT 0.000132366 0.000150092 +GTACGA 4.4475E-05 1.93294E-05 +GTACGC 3.62153E-05 1.79314E-05 +GTACGG 5.14639E-05 2.22866E-05 +GTACGT 3.37798E-05 3.29594E-05 +GTACTA 5.62291E-05 9.92277E-05 +GTACTC 6.93598E-05 0.000102185 +GTACTG 0.000145708 0.000164555 +GTACTT 9.63624E-05 0.000188723 +GTAGAA 0.000381002 0.00022464 +GTAGAC 0.000181077 0.000106244 +GTAGAG 0.000270238 0.000219048 +GTAGAT 0.000258484 0.00014415 +GTAGCA 0.00018203 0.000159635 +GTAGCC 0.00018203 0.000125359 +GTAGCG 2.59437E-05 3.40885E-05 +GTAGCT 0.000187324 0.000231872 +GTAGGA 0.000142743 0.000163829 +GTAGGC 9.1809E-05 9.92545E-05 +GTAGGG 8.86322E-05 0.000144392 +GTAGGT 7.23248E-05 0.000126676 +GTAGTA 8.51378E-05 0.000116514 +GTAGTC 6.42769E-05 0.000112616 +GTAGTG 0.000149732 0.000136381 +GTAGTT 0.00010314 0.000175981 +GTATAA 5.61232E-06 0.000211306 +GTATAC 7.47603E-05 0.000109309 +GTATAG 4.4475E-06 0.000116863 +GTATAT 0.000116694 0.000274805 +GTATCA 0.000105893 0.000149124 +GTATCC 0.000103881 0.000111755 +GTATCG 2.09668E-05 2.08886E-05 +GTATCT 0.000142743 0.000192514 +GTATGA 8.36553E-06 0.000154474 +GTATGC 5.55937E-05 0.000104147 +GTATGG 7.37014E-05 0.000127294 +GTATGT 8.32317E-05 0.000240958 +GTATTA 8.9903E-05 0.000214505 +GTATTC 9.10678E-05 0.000167647 +GTATTG 9.74213E-05 0.00017141 +GTATTT 0.000208503 0.000533399 +GTCAAA 0.000317467 0.000208994 +GTCAAC 0.000425583 0.000112159 +GTCAAG 0.000523957 0.000182164 +GTCAAT 0.000280616 0.000131569 +GTCACA 0.000304653 0.000232705 +GTCACC 0.000581987 0.000231146 +GTCACG 0.000133001 6.01388E-05 +GTCACT 0.00031249 0.000237705 +GTCAGA 0.000168899 0.000235877 +GTCAGC 0.000442949 0.000212865 +GTCAGG 0.000175253 0.00027631 +GTCAGT 0.000239106 0.00020281 +GTCATA 0.000117647 0.000140037 +GTCATC 0.000703446 0.000183239 +GTCATG 0.000405463 0.000186169 +GTCATT 0.000364801 0.000266579 +GTCCAA 0.00014232 0.000157458 +GTCCAC 0.000281887 0.00016184 +GTCCAG 0.000527875 0.000270154 +GTCCAT 0.000178429 0.000161759 +GTCCCA 0.000222269 0.000276257 +GTCCCC 0.000299041 0.000297683 +GTCCCG 9.9751E-05 0.000104685 +GTCCCT 0.000257849 0.00029236 +GTCCGA 0.000107693 4.01104E-05 +GTCCGC 0.000176947 7.09998E-05 +GTCCGG 0.000174299 8.15113E-05 +GTCCGT 8.00549E-05 5.1993E-05 +GTCCTA 0.000121035 0.000128773 +GTCCTC 0.000410864 0.00026674 +GTCCTG 0.000739131 0.000339218 +GTCCTT 0.000227246 0.000260046 +GTCGAA 4.90283E-05 2.57546E-05 +GTCGAC 8.06903E-05 2.21253E-05 +GTCGAG 9.31856E-05 4.42237E-05 +GTCGAT 6.04648E-05 2.08349E-05 +GTCGCA 4.33101E-05 3.45724E-05 +GTCGCC 0.000121035 9.27218E-05 +GTCGCG 3.54741E-05 4.14815E-05 +GTCGCT 5.42171E-05 5.8714E-05 +GTCGGA 4.46867E-05 4.62668E-05 +GTCGGC 8.87381E-05 7.35806E-05 +GTCGGG 9.43505E-05 8.69956E-05 +GTCGGT 3.75919E-05 4.38204E-05 +GTCGTA 2.68968E-05 1.99208E-05 +GTCGTC 0.000110976 4.15622E-05 +GTCGTG 0.000157568 5.58643E-05 +GTCGTT 3.92862E-05 3.77985E-05 +GTCTAA 9.95392E-06 0.000127025 +GTCTAC 0.000322338 0.000103932 +GTCTAG 6.98892E-06 0.000113906 +GTCTAT 0.000205644 0.000129956 +GTCTCA 0.000184465 0.000306958 +GTCTCC 0.000395615 0.000313786 +GTCTCG 6.40651E-05 9.26412E-05 +GTCTCT 0.000245777 0.00034852 +GTCTGA 2.13903E-05 0.000232947 +GTCTGC 0.000285593 0.000249938 +GTCTGG 0.00022301 0.000286419 +GTCTGT 0.00019982 0.000311501 +GTCTTA 0.000108117 0.000176169 +GTCTTC 0.000551807 0.000278434 +GTCTTG 0.000216021 0.000274106 +GTCTTT 0.000313443 0.0003568 +GTGAAA 0.000607401 0.000415729 +GTGAAC 0.000459257 0.000195471 +GTGAAG 0.000984273 0.000321152 +GTGAAT 0.000412664 0.00027209 +GTGACA 0.000375813 0.00027424 +GTGACC 0.00055329 0.000224452 +GTGACG 0.000162439 7.26666E-05 +GTGACT 0.000367342 0.000276848 +GTGAGA 0.000202043 0.000316502 +GTGAGC 0.000397733 0.000316824 +GTGAGG 0.000269921 0.000324217 +GTGAGT 0.000192513 0.000211359 +GTGATA 0.000145391 0.000168668 +GTGATC 0.000432042 0.000205714 +GTGATG 0.000522157 0.000273461 +GTGATT 0.000353152 0.000290613 +GTGCAA 0.000212209 0.000202622 +GTGCAC 0.000376131 0.000183777 +GTGCAG 0.000908348 0.000351639 +GTGCAT 0.000189124 0.000213322 +GTGCCA 0.000320114 0.000289188 +GTGCCC 0.000673054 0.0002833 +GTGCCG 0.000158204 8.82591E-05 +GTGCCT 0.000418806 0.000342364 +GTGCGA 0.000105575 4.80411E-05 +GTGCGC 0.000316937 8.44416E-05 +GTGCGG 0.000343093 0.00010119 +GTGCGT 0.000100175 6.90104E-05 +GTGCTA 0.000167522 0.000159501 +GTGCTC 0.000496213 0.000225258 +GTGCTG 0.001335414 0.000434386 +GTGCTT 0.000265897 0.000286822 +GTGGAA 0.000826917 0.000341288 +GTGGAC 0.000999945 0.000202085 +GTGGAG 0.001508019 0.00039105 +GTGGAT 0.000759357 0.000238189 +GTGGCA 0.000503944 0.000319889 +GTGGCC 0.001212472 0.000329298 +GTGGCG 0.000266003 0.000141946 +GTGGCT 0.000687879 0.000398201 +GTGGGA 0.00035114 0.000365833 +GTGGGC 0.000769523 0.000290478 +GTGGGG 0.000442102 0.000430273 +GTGGGT 0.000255731 0.000259266 +GTGGTA 0.000236353 0.000160388 +GTGGTC 0.000574998 0.00019238 +GTGGTG 0.001182293 0.000373334 +GTGGTT 0.000341504 0.000259669 +GTGTAA 1.01657E-05 0.000207031 +GTGTAC 0.000331656 0.000133477 +GTGTAG 9.63624E-06 0.000147242 +GTGTAT 0.000234658 0.000251201 +GTGTCA 0.000238788 0.000222489 +GTGTCC 0.000489225 0.00025311 +GTGTCG 0.00011267 4.97886E-05 +GTGTCT 0.00036893 0.0003254 +GTGTGA 3.10266E-05 0.000315399 +GTGTGC 0.000330491 0.000266498 +GTGTGG 0.000316513 0.00035266 +GTGTGT 0.000253931 0.000596683 +GTGTTA 0.000127283 0.000207811 +GTGTTC 0.000410546 0.000243136 +GTGTTG 0.000257531 0.000263164 +GTGTTT 0.000411287 0.000463797 +GTTAAA 0.000234552 0.000303006 +GTTAAC 0.000120718 0.000134849 +GTTAAG 0.000158733 0.000167781 +GTTAAT 0.000174088 0.000231737 +GTTACA 0.000147191 0.000180255 +GTTACC 0.000138084 0.000123611 +GTTACG 2.36141E-05 2.61041E-05 +GTTACT 0.000156827 0.000186922 +GTTAGA 7.46544E-05 0.000158184 +GTTAGC 7.68781E-05 0.000107642 +GTTAGG 4.89225E-05 0.000127563 +GTTAGT 8.74674E-05 0.000123934 +GTTATA 9.14913E-05 0.000176572 +GTTATC 0.000133319 0.000122133 +GTTATG 0.000137449 0.000158829 +GTTATT 0.000220786 0.000292656 +GTTCAA 0.000199714 0.000256417 +GTTCAC 0.000176206 0.000176007 +GTTCAG 0.000372531 0.000235931 +GTTCAT 0.000200984 0.000235071 +GTTCCA 0.000250013 0.000228377 +GTTCCC 0.000198125 0.00023663 +GTTCCG 4.00275E-05 5.73967E-05 +GTTCCT 0.000293535 0.000298678 +GTTCGA 0.000118918 4.00029E-05 +GTTCGC 7.46544E-05 3.22873E-05 +GTTCGG 8.84205E-05 4.58904E-05 +GTTCGT 8.54555E-05 3.98416E-05 +GTTCTA 0.000101233 0.00017348 +GTTCTC 0.00019929 0.000260825 +GTTCTG 0.000303383 0.000329836 +GTTCTT 0.000240271 0.000340079 +GTTGAA 0.000397733 0.000267385 +GTTGAC 0.000233599 0.000128719 +GTTGAG 0.000258272 0.000196654 +GTTGAT 0.000319478 0.000183347 +GTTGCA 0.000223963 0.000223645 +GTTGCC 0.000248319 0.000214801 +GTTGCG 3.33562E-05 4.02718E-05 +GTTGCT 0.000291417 0.000248217 +GTTGGA 0.000302747 0.000231495 +GTTGGC 0.000247154 0.00019496 +GTTGGG 0.000201196 0.000247195 +GTTGGT 0.000204902 0.000193858 +GTTGTA 0.000146979 0.000192917 +GTTGTC 0.000196219 0.000167109 +GTTGTG 0.000296394 0.000226603 +GTTGTT 0.000226187 0.000297602 +GTTTAA 9.42446E-06 0.000321072 +GTTTAC 0.000133319 0.00018953 +GTTTAG 4.4475E-06 0.000173346 +GTTTAT 0.000188171 0.000328841 +GTTTCA 0.000201937 0.000349058 +GTTTCC 0.000212633 0.000340267 +GTTTCG 2.95441E-05 4.91702E-05 +GTTTCT 0.000300418 0.000468233 +GTTTGA 7.83606E-06 0.000289 +GTTTGC 0.000106528 0.000245179 +GTTTGG 0.000110658 0.000297172 +GTTTGT 0.000128236 0.000380995 +GTTTTA 0.0001761 0.000465733 +GTTTTC 0.000205326 0.000444952 +GTTTTG 0.00023487 0.000479067 +GTTTTT 0.000233917 0.000721343 +TAAAAA 1.05893E-07 0.000972275 +TAAAAC 0 0.000401562 +TAAAAG 0 0.00048189 +TAAAAT 0 0.000828205 +TAAACA 1.05893E-07 0.000376855 +TAAACC 0 0.000195794 +TAAACG 1.05893E-07 5.07564E-05 +TAAACT 0 0.000327497 +TAAAGA 2.11786E-07 0.000444871 +TAAAGC 1.05893E-07 0.000258245 +TAAAGG 3.17678E-07 0.000283273 +TAAAGT 0 0.000361048 +TAAATA 0 0.000616685 +TAAATC 0 0.000259911 +TAAATG 0 0.00049345 +TAAATT 0 0.000528452 +TAACAA 0 0.000263514 +TAACAC 0 0.000166437 +TAACAG 0 0.000216467 +TAACAT 0 0.00026967 +TAACCA 1.05893E-07 0.00019531 +TAACCC 0 0.000130977 +TAACCG 0 2.6588E-05 +TAACCT 0 0.000187648 +TAACGA 0 3.04861E-05 +TAACGC 0 2.55933E-05 +TAACGG 0 3.11851E-05 +TAACGT 0 4.48957E-05 +TAACTA 0 0.000170873 +TAACTC 0 0.000161786 +TAACTG 0 0.000243405 +TAACTT 1.05893E-07 0.00030271 +TAAGAA 0 0.000385808 +TAAGAC 0 0.000162243 +TAAGAG 2.11786E-07 0.000231227 +TAAGAT 0 0.000234748 +TAAGCA 0 0.000226414 +TAAGCC 0 0.000160979 +TAAGCG 1.05893E-07 2.93301E-05 +TAAGCT 0 0.000185202 +TAAGGA 0 0.000250368 +TAAGGC 0 0.000136919 +TAAGGG 0 0.000159313 +TAAGGT 0 0.000153425 +TAAGTA 0 0.000211843 +TAAGTC 0 0.000138989 +TAAGTG 0 0.000224532 +TAAGTT 0 0.000241765 +TAATAA 0 0.000479228 +TAATAC 0 0.000178051 +TAATAG 0 0.000190014 +TAATAT 0 0.000363548 +TAATCA 0 0.00022335 +TAATCC 0 0.000228753 +TAATCG 0 2.74751E-05 +TAATCT 0 0.000232598 +TAATGA 2.11786E-07 0.000304377 +TAATGC 1.05893E-07 0.000175658 +TAATGG 0 0.000202569 +TAATGT 0 0.000318679 +TAATTA 0 0.000327121 +TAATTC 1.05893E-07 0.000247679 +TAATTG 0 0.000255207 +TAATTT 0 0.000679781 +TACAAA 0.000433419 0.000397179 +TACAAC 0.00043289 0.000141381 +TACAAG 0.000581457 0.000193321 +TACAAT 0.000280616 0.000220446 +TACACA 0.000271085 0.000255422 +TACACC 0.000352199 0.000113315 +TACACG 0.000169005 3.7664E-05 +TACACT 0.000188807 0.000173023 +TACAGA 0.00025446 0.000318625 +TACAGC 0.000461904 0.000193885 +TACAGG 0.000217716 0.00028494 +TACAGT 0.000243024 0.00027166 +TACATA 0.000114894 0.000263863 +TACATC 0.000501614 0.000151973 +TACATG 0.000390533 0.0002312 +TACATT 0.000279451 0.000344918 +TACCAA 0.000191031 0.000189396 +TACCAC 0.000313019 0.000149823 +TACCAG 0.000639169 0.000188455 +TACCAT 0.000180335 0.000183965 +TACCCA 0.000244189 0.000194503 +TACCCC 0.000255943 0.000139311 +TACCCG 0.000123047 3.97878E-05 +TACCCT 0.000203526 0.000172432 +TACCGA 0.000137025 2.7045E-05 +TACCGC 0.000275109 2.86849E-05 +TACCGG 0.000231058 3.04592E-05 +TACCGT 8.61967E-05 3.30669E-05 +TACCTA 0.000119129 0.000152215 +TACCTC 0.000327738 0.000185524 +TACCTG 0.000824163 0.000244991 +TACCTT 0.000190289 0.000237329 +TACGAA 0.000115211 3.48412E-05 +TACGAC 0.00020342 1.90605E-05 +TACGAG 0.000306771 2.78784E-05 +TACGAT 0.000113411 2.73407E-05 +TACGCA 6.45946E-05 2.83085E-05 +TACGCC 0.000187324 2.60772E-05 +TACGCG 7.93137E-05 1.30117E-05 +TACGCT 5.97235E-05 2.80666E-05 +TACGGA 7.6031E-05 3.70995E-05 +TACGGC 0.000190713 2.7959E-05 +TACGGG 0.000144332 3.06743E-05 +TACGGT 5.77116E-05 2.91688E-05 +TACGTA 3.71684E-05 3.93308E-05 +TACGTC 0.000132154 2.5889E-05 +TACGTG 0.000291629 5.26651E-05 +TACGTT 5.337E-05 4.66163E-05 +TACTAA 8.89499E-06 0.000224963 +TACTAC 0.000357176 0.000120681 +TACTAG 6.98892E-06 0.000105841 +TACTAT 0.000227669 0.000175174 +TACTCA 0.000164981 0.000195364 +TACTCC 0.00027098 0.000144688 +TACTCG 0.000101022 4.05944E-05 +TACTCT 0.000152591 0.000205714 +TACTGA 2.14962E-05 0.000245206 +TACTGC 0.000305395 0.000177836 +TACTGG 0.000274262 0.000194208 +TACTGT 0.000207126 0.00029365 +TACTTA 9.56212E-05 0.000214881 +TACTTC 0.000438608 0.000207542 +TACTTG 0.000214751 0.000238082 +TACTTT 0.00027638 0.000409438 +TAGAAA 0 0.000455463 +TAGAAC 0 0.000171249 +TAGAAG 1.05893E-07 0.000279321 +TAGAAT 0 0.000287763 +TAGACA 2.11786E-07 0.000188455 +TAGACC 0 0.000105814 +TAGACG 0 3.08625E-05 +TAGACT 0 0.000168776 +TAGAGA 1.05893E-07 0.00032005 +TAGAGC 0 0.000157323 +TAGAGG 1.05893E-07 0.000208456 +TAGAGT 0 0.000180309 +TAGATA 0 0.000179153 +TAGATC 0 0.000110384 +TAGATG 1.05893E-07 0.000198348 +TAGATT 0 0.000231925 +TAGCAA 0 0.000205069 +TAGCAC 0 0.000135252 +TAGCAG 1.05893E-07 0.000208241 +TAGCAT 1.05893E-07 0.000188992 +TAGCCA 1.05893E-07 0.000211897 +TAGCCC 2.11786E-07 0.000130977 +TAGCCG 2.11786E-07 5.08639E-05 +TAGCCT 2.11786E-07 0.000181277 +TAGCGA 0 2.72332E-05 +TAGCGC 0 3.06743E-05 +TAGCGG 1.05893E-07 3.92771E-05 +TAGCGT 0 2.9115E-05 +TAGCTA 0 0.000144446 +TAGCTC 0 0.000155307 +TAGCTG 0 0.000295075 +TAGCTT 0 0.000210311 +TAGGAA 0 0.000296715 +TAGGAC 0 0.000127939 +TAGGAG 1.05893E-07 0.000218349 +TAGGAT 1.05893E-07 0.000175577 +TAGGCA 1.05893E-07 0.000167216 +TAGGCC 0 0.000124713 +TAGGCG 0 3.85781E-05 +TAGGCT 0 0.000158452 +TAGGGA 0 0.000191949 +TAGGGC 0 0.000125305 +TAGGGG 0 0.000144822 +TAGGGT 0 0.000129257 +TAGGTA 0 0.000135171 +TAGGTC 0 9.65124E-05 +TAGGTG 0 0.000164689 +TAGGTT 0 0.000161168 +TAGTAA 0 0.000191949 +TAGTAC 0 9.19422E-05 +TAGTAG 0 0.000163695 +TAGTAT 0 0.000179986 +TAGTCA 0 0.000151543 +TAGTCC 0 0.000131246 +TAGTCG 0 2.17489E-05 +TAGTCT 0 0.000161625 +TAGTGA 1.05893E-07 0.000187352 +TAGTGC 0 0.000119847 +TAGTGG 1.05893E-07 0.000155468 +TAGTGT 1.05893E-07 0.000179287 +TAGTTA 1.05893E-07 0.000173265 +TAGTTC 0 0.000158372 +TAGTTG 0 0.000165146 +TAGTTT 0 0.000342606 +TATAAA 0.000320537 0.000524769 +TATAAC 0.000165616 0.000158802 +TATAAG 0.000238259 0.000194423 +TATAAT 0.000221845 0.000359489 +TATACA 0.00013999 0.000255126 +TATACC 0.000131942 0.000123557 +TATACG 3.04971E-05 2.63729E-05 +TATACT 0.00013586 0.000207945 +TATAGA 0.000117435 0.000218699 +TATAGC 0.000114894 0.000128477 +TATAGG 7.94196E-05 0.000135682 +TATAGT 0.00010674 0.000193213 +TATATA 8.86322E-05 0.000546223 +TATATC 0.000163287 0.000172244 +TATATG 0.000153333 0.000286069 +TATATT 0.00020935 0.000518344 +TATCAA 0.000154921 0.000211978 +TATCAC 0.000130672 0.000145441 +TATCAG 0.000299994 0.000196762 +TATCAT 0.000134484 0.000209935 +TATCCA 0.000196008 0.000192487 +TATCCC 0.000117859 0.000137806 +TATCCG 3.22973E-05 2.42222E-05 +TATCCT 0.000185101 0.000198536 +TATCGA 8.21728E-05 2.48405E-05 +TATCGC 6.85126E-05 2.14263E-05 +TATCGG 8.07962E-05 2.11037E-05 +TATCGT 5.61232E-05 2.78246E-05 +TATCTA 6.70301E-05 0.000181035 +TATCTC 0.000148568 0.000183777 +TATCTG 0.000232435 0.000262385 +TATCTT 0.000164134 0.000302952 +TATGAA 0.00072833 0.000340159 +TATGAC 0.00048573 0.000139311 +TATGAG 0.000628156 0.000183911 +TATGAT 0.000489436 0.000219371 +TATGCA 0.000297982 0.000224936 +TATGCC 0.000353682 0.00014122 +TATGCG 6.70301E-05 2.32813E-05 +TATGCT 0.000298935 0.000197595 +TATGGA 0.000385132 0.000223296 +TATGGC 0.000347328 0.000135628 +TATGGG 0.000285381 0.000150011 +TATGGT 0.00021888 0.000168722 +TATGTA 0.000152909 0.000335804 +TATGTC 0.000252766 0.000168426 +TATGTG 0.000430666 0.00030107 +TATGTT 0.000232117 0.000349111 +TATTAA 8.36553E-06 0.00042487 +TATTAC 0.000169005 0.000186734 +TATTAG 4.12982E-06 0.000191089 +TATTAT 0.000177794 0.000396131 +TATTCA 0.000160428 0.00031263 +TATTCC 0.000167628 0.000218403 +TATTCG 2.61555E-05 3.01635E-05 +TATTCT 0.000191984 0.000361639 +TATTGA 6.77714E-06 0.000263917 +TATTGC 8.36553E-05 0.000188992 +TATTGG 9.10678E-05 0.000194288 +TATTGT 8.61967E-05 0.000322658 +TATTTA 0.000130248 0.000621658 +TATTTC 0.000220892 0.00045342 +TATTTG 0.000175782 0.000476002 +TATTTT 0.00027225 0.001195786 +TCAAAA 0.000309525 0.000475115 +TCAAAC 0.000170699 0.000227355 +TCAAAG 0.000295653 0.000329943 +TCAAAT 0.00026039 0.000362419 +TCAACA 0.000205114 0.000248056 +TCAACC 0.000131307 0.000154635 +TCAACG 3.93921E-05 3.84705E-05 +TCAACT 0.000181394 0.000218484 +TCAAGA 0.000190078 0.000338707 +TCAAGC 0.000164769 0.000239265 +TCAAGG 0.000125377 0.000258648 +TCAAGT 0.000196008 0.000276042 +TCAATA 8.57731E-05 0.000232624 +TCAATC 7.78312E-05 0.000135548 +TCAATG 0.000141367 0.000200015 +TCAATT 0.00012453 0.000226629 +TCACAA 0.000175041 0.000254131 +TCACAC 0.000136708 0.000256551 +TCACAG 0.000321067 0.000365376 +TCACAT 0.000135225 0.000292091 +TCACCA 0.000257849 0.000328895 +TCACCC 0.000217821 0.000287521 +TCACCG 6.08883E-05 8.53019E-05 +TCACCT 0.000248319 0.000354757 +TCACGA 4.73341E-05 5.3149E-05 +TCACGC 4.84989E-05 9.11357E-05 +TCACGG 7.5078E-05 7.11073E-05 +TCACGT 4.30984E-05 7.72099E-05 +TCACTA 8.41847E-05 0.000172835 +TCACTC 0.000140096 0.00026631 +TCACTG 0.000312807 0.000449173 +TCACTT 0.000148144 0.000367822 +TCAGAA 0.000649546 0.000473368 +TCAGAC 0.000396568 0.000226925 +TCAGAG 0.000634933 0.000411428 +TCAGAT 0.000510615 0.00029771 +TCAGCA 0.000342457 0.000368038 +TCAGCC 0.00044189 0.000447748 +TCAGCG 7.73017E-05 8.75601E-05 +TCAGCT 0.000356435 0.000387609 +TCAGGA 0.00037539 0.00043151 +TCAGGC 0.000282522 0.000288327 +TCAGGG 0.000289934 0.000328519 +TCAGGT 0.000181818 0.000243432 +TCAGTA 0.000155556 0.000236496 +TCAGTC 0.000192831 0.000234936 +TCAGTG 0.000392015 0.000377581 +TCAGTT 0.000216551 0.000341235 +TCATAA 7.73017E-06 0.000250556 +TCATAC 9.53035E-05 0.000142107 +TCATAG 5.08285E-06 0.00017469 +TCATAT 0.00012453 0.000254965 +TCATCA 0.000251495 0.000281875 +TCATCC 0.000235823 0.000245475 +TCATCG 4.87107E-05 4.86057E-05 +TCATCT 0.000285275 0.000374167 +TCATGA 1.14364E-05 0.00027338 +TCATGC 6.88303E-05 0.000216521 +TCATGG 0.000108328 0.000259616 +TCATGT 7.68781E-05 0.000304834 +TCATTA 0.000119553 0.000272036 +TCATTC 0.000130989 0.00031255 +TCATTG 0.000143696 0.000287037 +TCATTT 0.000232858 0.000621873 +TCCAAA 0.00046614 0.000375081 +TCCAAC 0.000419018 0.00017883 +TCCAAG 0.000665748 0.000313356 +TCCAAT 0.000246836 0.000179395 +TCCACA 0.000319902 0.000301608 +TCCACC 0.000457774 0.000332766 +TCCACG 0.000186371 9.1055E-05 +TCCACT 0.000261555 0.000271982 +TCCAGA 0.000254037 0.000427209 +TCCAGC 0.000652194 0.000462856 +TCCAGG 0.000315772 0.000464873 +TCCAGT 0.000369778 0.000312899 +TCCATA 0.000129613 0.000187729 +TCCATC 0.000578704 0.000303355 +TCCATG 0.000514957 0.000296635 +TCCATT 0.000311113 0.000336369 +TCCCAA 0.000200349 0.000390002 +TCCCAC 0.000309948 0.000343305 +TCCCAG 0.000758192 0.000668436 +TCCCAT 0.000188066 0.000291204 +TCCCCA 0.000417429 0.000499687 +TCCCCC 0.000275851 0.000359354 +TCCCCG 0.000211362 0.00019082 +TCCCCT 0.000298088 0.000441027 +TCCCGA 0.000129189 9.89857E-05 +TCCCGC 0.000211574 0.000153694 +TCCCGG 0.000271297 0.000195686 +TCCCGT 7.42308E-05 8.31243E-05 +TCCCTA 0.000114258 0.000198052 +TCCCTC 0.00037486 0.000463044 +TCCCTG 0.000852013 0.00055453 +TCCCTT 0.000216868 0.000427908 +TCCGAA 9.17031E-05 5.16704E-05 +TCCGAC 0.00016964 3.90082E-05 +TCCGAG 0.000262085 9.22379E-05 +TCCGAT 7.71958E-05 3.16152E-05 +TCCGCA 5.66526E-05 7.07041E-05 +TCCGCC 0.000155133 0.000160469 +TCCGCG 6.93598E-05 7.85003E-05 +TCCGCT 5.85587E-05 7.79089E-05 +TCCGGA 7.14776E-05 8.83129E-05 +TCCGGC 0.00014179 0.000112938 +TCCGGG 0.000135649 0.000131058 +TCCGGT 4.53221E-05 5.92247E-05 +TCCGTA 2.68968E-05 3.62661E-05 +TCCGTC 0.000106528 8.35007E-05 +TCCGTG 0.000259225 0.000100464 +TCCGTT 4.70164E-05 5.94129E-05 +TCCTAA 1.36602E-05 0.000242249 +TCCTAC 0.000399322 0.000167458 +TCCTAG 1.84253E-05 0.000210687 +TCCTAT 0.000252554 0.000187809 +TCCTCA 0.000335151 0.000398416 +TCCTCC 0.000565467 0.000568912 +TCCTCG 0.000166463 0.000106432 +TCCTCT 0.000373272 0.000497025 +TCCTGA 2.94382E-05 0.000464711 +TCCTGC 0.000363848 0.000530173 +TCCTGG 0.000391591 0.000599694 +TCCTGT 0.000235188 0.00043065 +TCCTTA 0.000146556 0.000246039 +TCCTTC 0.000556784 0.000458071 +TCCTTG 0.00033801 0.000370914 +TCCTTT 0.00033928 0.000546303 +TCGAAA 4.61692E-05 4.04331E-05 +TCGAAC 3.36739E-05 3.93846E-05 +TCGAAG 7.17953E-05 4.81218E-05 +TCGAAT 3.38857E-05 3.73414E-05 +TCGACA 3.16619E-05 2.97602E-05 +TCGACC 4.99814E-05 3.33626E-05 +TCGACG 2.03314E-05 1.58883E-05 +TCGACT 3.1556E-05 3.58091E-05 +TCGAGA 3.02853E-05 7.23171E-05 +TCGAGC 6.15237E-05 4.44118E-05 +TCGAGG 4.52162E-05 7.21289E-05 +TCGAGT 3.32503E-05 4.27182E-05 +TCGATA 1.26012E-05 1.91412E-05 +TCGATC 2.55202E-05 3.15614E-05 +TCGATG 4.75459E-05 3.6293E-05 +TCGATT 2.59437E-05 3.78253E-05 +TCGCAA 2.14962E-05 2.89E-05 +TCGCAC 6.34298E-05 4.46807E-05 +TCGCAG 0.000130566 7.3258E-05 +TCGCAT 2.40377E-05 3.14808E-05 +TCGCCA 7.71958E-05 7.72368E-05 +TCGCCC 0.000150156 0.000106782 +TCGCCG 6.85126E-05 7.09998E-05 +TCGCCT 6.9148E-05 9.26949E-05 +TCGCGA 1.45073E-05 2.16682E-05 +TCGCGC 5.05108E-05 5.88215E-05 +TCGCGG 5.42171E-05 5.97355E-05 +TCGCGT 1.186E-05 2.80128E-05 +TCGCTA 2.01196E-05 2.9814E-05 +TCGCTC 9.32915E-05 9.32864E-05 +TCGCTG 0.000271721 0.00010732 +TCGCTT 3.8545E-05 7.53818E-05 +TCGGAA 0.0001168 5.91979E-05 +TCGGAC 0.000178535 4.49764E-05 +TCGGAG 0.000289405 9.10281E-05 +TCGGAT 0.000111293 4.07825E-05 +TCGGCA 8.31258E-05 6.31766E-05 +TCGGCC 0.000246307 0.000124498 +TCGGCG 9.3821E-05 6.22088E-05 +TCGGCT 0.000105787 0.000107588 +TCGGGA 8.46083E-05 9.87438E-05 +TCGGGC 0.000181818 8.66999E-05 +TCGGGG 0.000142955 0.000119417 +TCGGGT 5.48525E-05 5.62675E-05 +TCGGTA 2.8591E-05 2.71525E-05 +TCGGTC 8.27022E-05 4.77992E-05 +TCGGTG 0.000231799 7.27741E-05 +TCGGTT 4.12982E-05 5.16167E-05 +TCGTAA 1.58839E-06 3.2126E-05 +TCGTAC 3.61094E-05 2.20715E-05 +TCGTAG 1.4825E-06 3.15614E-05 +TCGTAT 2.50966E-05 3.18303E-05 +TCGTCA 3.9498E-05 4.40355E-05 +TCGTCC 0.000102081 5.86602E-05 +TCGTCG 3.89685E-05 2.11306E-05 +TCGTCT 5.97235E-05 6.3795E-05 +TCGTGA 3.17678E-06 6.51123E-05 +TCGTGC 4.01334E-05 5.90097E-05 +TCGTGG 6.3218E-05 7.97101E-05 +TCGTGT 2.79557E-05 6.52467E-05 +TCGTTA 1.94843E-05 3.33626E-05 +TCGTTC 4.3416E-05 4.82831E-05 +TCGTTG 3.68507E-05 4.42237E-05 +TCGTTT 4.8393E-05 8.02478E-05 +TCTAAA 0.000244506 0.000343412 +TCTAAC 0.000122836 0.000144392 +TCTAAG 0.00021708 0.000208994 +TCTAAT 0.000163181 0.000240421 +TCTACA 0.000196855 0.000228484 +TCTACC 0.000164451 0.000178185 +TCTACG 3.9498E-05 3.37121E-05 +TCTACT 0.000180018 0.000256121 +TCTAGA 0.000119659 0.000228915 +TCTAGC 0.000124318 0.000151731 +TCTAGG 8.12197E-05 0.000186385 +TCTAGT 0.000133107 0.00017176 +TCTATA 8.33376E-05 0.000208026 +TCTATC 0.00012866 0.000148452 +TCTATG 0.000152274 0.000193616 +TCTATT 0.000146344 0.000294027 +TCTCAA 0.000220681 0.000350133 +TCTCAC 0.000163393 0.000294 +TCTCAG 0.000469317 0.000456377 +TCTCAT 0.000200984 0.000328653 +TCTCCA 0.000420394 0.000457157 +TCTCCC 0.000284957 0.000476352 +TCTCCG 8.7891E-05 0.000114686 +TCTCCT 0.00039678 0.000564961 +TCTCGA 0.000107163 6.92524E-05 +TCTCGC 8.73615E-05 9.06518E-05 +TCTCGG 0.000146873 0.000116863 +TCTCGT 6.93598E-05 6.08378E-05 +TCTCTA 0.00013279 0.000309404 +TCTCTC 0.000233494 0.000502725 +TCTCTG 0.000471858 0.000559181 +TCTCTT 0.000236353 0.000501811 +TCTGAA 0.000642134 0.000470626 +TCTGAC 0.000396992 0.000245529 +TCTGAG 0.000669878 0.000429494 +TCTGAT 0.00046201 0.00030271 +TCTGCA 0.000400381 0.000436241 +TCTGCC 0.000460422 0.000500198 +TCTGCG 9.71037E-05 9.90664E-05 +TCTGCT 0.000436172 0.000461915 +TCTGGA 0.000459257 0.000436403 +TCTGGC 0.000344469 0.000326664 +TCTGGG 0.000393921 0.000470061 +TCTGGT 0.000246095 0.000286311 +TCTGTA 0.000189124 0.000367231 +TCTGTC 0.000327526 0.000406212 +TCTGTG 0.00062985 0.00053641 +TCTGTT 0.000292052 0.000495009 +TCTTAA 1.16482E-05 0.000360134 +TCTTAC 0.000152486 0.00019902 +TCTTAG 5.50642E-06 0.000221575 +TCTTAT 0.00013406 0.000271041 +TCTTCA 0.000370731 0.000449818 +TCTTCC 0.000356753 0.000500682 +TCTTCG 6.96774E-05 6.78275E-05 +TCTTCT 0.000388203 0.000513559 +TCTTGA 1.05893E-05 0.000334541 +TCTTGC 9.5833E-05 0.000292683 +TCTTGG 0.000153862 0.000387958 +TCTTGT 0.000116482 0.000338923 +TCTTTA 0.000165828 0.000399814 +TCTTTC 0.000217821 0.000501676 +TCTTTG 0.000275427 0.000470169 +TCTTTT 0.000243236 0.000781374 +TGAAAA 5.29464E-07 0.000690238 +TGAAAC 1.05893E-07 0.000368683 +TGAAAG 5.29464E-07 0.000439521 +TGAAAT 0 0.00057394 +TGAACA 1.05893E-07 0.000328357 +TGAACC 1.05893E-07 0.000232732 +TGAACG 2.11786E-07 5.5246E-05 +TGAACT 2.11786E-07 0.000331771 +TGAAGA 8.47142E-07 0.000541007 +TGAAGC 2.11786E-07 0.000319781 +TGAAGG 4.23571E-07 0.000363817 +TGAAGT 1.05893E-07 0.000360214 +TGAATA 1.05893E-07 0.00035551 +TGAATC 1.05893E-07 0.000224774 +TGAATG 1.05893E-07 0.000385861 +TGAATT 0 0.000438392 +TGACAA 5.29464E-07 0.00026881 +TGACAC 1.05893E-07 0.000211413 +TGACAG 7.41249E-07 0.000317093 +TGACAT 3.17678E-07 0.000292414 +TGACCA 3.17678E-07 0.00026432 +TGACCC 0 0.000238136 +TGACCG 0 6.0542E-05 +TGACCT 5.29464E-07 0.0003361 +TGACGA 0 4.56216E-05 +TGACGC 4.23571E-07 5.51384E-05 +TGACGG 4.23571E-07 6.20744E-05 +TGACGT 2.11786E-07 6.54618E-05 +TGACTA 4.23571E-07 0.000170012 +TGACTC 2.11786E-07 0.000246443 +TGACTG 4.23571E-07 0.000325131 +TGACTT 0 0.000372957 +TGAGAA 3.17678E-07 0.000503289 +TGAGAC 0 0.000323115 +TGAGAG 2.11786E-07 0.000366075 +TGAGAT 4.23571E-07 0.000344461 +TGAGCA 3.17678E-07 0.000326906 +TGAGCC 0 0.000417315 +TGAGCG 0 8.72913E-05 +TGAGCT 1.05893E-07 0.000352337 +TGAGGA 5.29464E-07 0.000437559 +TGAGGC 1.05893E-06 0.000399169 +TGAGGG 6.35357E-07 0.000343009 +TGAGGT 9.53035E-07 0.000300398 +TGAGTA 1.05893E-07 0.000210795 +TGAGTC 2.11786E-07 0.00022464 +TGAGTG 4.23571E-07 0.000303786 +TGAGTT 1.05893E-07 0.000304538 +TGATAA 1.05893E-07 0.000271902 +TGATAC 3.17678E-07 0.000155791 +TGATAG 0 0.000160522 +TGATAT 0 0.000246281 +TGATCA 2.11786E-07 0.000220876 +TGATCC 1.05893E-07 0.000205042 +TGATCG 0 3.77716E-05 +TGATCT 1.05893E-07 0.000283166 +TGATGA 1.05893E-07 0.000316018 +TGATGC 4.23571E-07 0.000227194 +TGATGG 2.11786E-07 0.00029115 +TGATGT 1.05893E-07 0.000324325 +TGATTA 3.17678E-07 0.000245959 +TGATTC 1.05893E-07 0.000270262 +TGATTG 0 0.000253137 +TGATTT 2.11786E-07 0.000531033 +TGCAAA 0.000309207 0.000393846 +TGCAAC 0.000287499 0.000208806 +TGCAAG 0.000438396 0.000266122 +TGCAAT 0.000205644 0.000267304 +TGCACA 0.00019156 0.000326126 +TGCACC 0.000288558 0.000242168 +TGCACG 0.000110023 7.57044E-05 +TGCACT 0.000146661 0.000313329 +TGCAGA 0.000169534 0.000439709 +TGCAGC 0.000404087 0.000411804 +TGCAGG 0.000198019 0.000389813 +TGCAGT 0.000236564 0.000444844 +TGCATA 7.77253E-05 0.000225688 +TGCATC 0.000350929 0.000227167 +TGCATG 0.000255943 0.000299215 +TGCATT 0.000210938 0.000388792 +TGCCAA 0.000169217 0.000299 +TGCCAC 0.000279239 0.000342955 +TGCCAG 0.000550325 0.000409788 +TGCCAT 0.000168052 0.000317523 +TGCCCA 0.000227564 0.00045014 +TGCCCC 0.000303171 0.000381775 +TGCCCG 0.000123895 0.000146677 +TGCCCT 0.000213797 0.000418794 +TGCCGA 9.23385E-05 6.52467E-05 +TGCCGC 0.000234129 0.000126219 +TGCCGG 0.000193148 0.000112508 +TGCCGT 6.63948E-05 8.43341E-05 +TGCCTA 7.6984E-05 0.000207354 +TGCCTC 0.000326679 0.00051208 +TGCCTG 0.00059607 0.000584397 +TGCCTT 0.000187748 0.000474013 +TGCGAA 6.11001E-05 4.35516E-05 +TGCGAC 0.000137661 4.22074E-05 +TGCGAG 0.000206279 7.15375E-05 +TGCGAT 5.69703E-05 4.18041E-05 +TGCGCA 3.86509E-05 6.91986E-05 +TGCGCC 0.000166358 0.000111352 +TGCGCG 5.89823E-05 7.4199E-05 +TGCGCT 4.69105E-05 7.78014E-05 +TGCGGA 5.35817E-05 8.19683E-05 +TGCGGC 0.000180018 0.000119552 +TGCGGG 0.000142214 0.000128423 +TGCGGT 4.2463E-05 7.01933E-05 +TGCGTA 1.93784E-05 3.32551E-05 +TGCGTC 8.73615E-05 6.59725E-05 +TGCGTG 0.000191242 0.000107481 +TGCGTT 2.80616E-05 6.0542E-05 +TGCTAA 7.73017E-06 0.000238996 +TGCTAC 0.000261343 0.000165872 +TGCTAG 8.47142E-06 0.000168695 +TGCTAT 0.000151427 0.000225957 +TGCTCA 0.000149732 0.000319996 +TGCTCC 0.000324138 0.00033575 +TGCTCG 7.28542E-05 7.1645E-05 +TGCTCT 0.000182877 0.000416966 +TGCTGA 2.35082E-05 0.000411777 +TGCTGC 0.000331444 0.000496649 +TGCTGG 0.000232011 0.000522027 +TGCTGT 0.000213162 0.00049673 +TGCTTA 7.25365E-05 0.000252142 +TGCTTC 0.000358235 0.000390324 +TGCTTG 0.000167946 0.000321825 +TGCTTT 0.000218457 0.000581682 +TGGAAA 0.000352835 0.0006144 +TGGAAC 0.000282522 0.000262949 +TGGAAG 0.000493884 0.000480949 +TGGAAT 0.000278074 0.000403847 +TGGACA 0.000202785 0.000323518 +TGGACC 0.000223857 0.000220742 +TGGACG 8.17492E-05 7.75325E-05 +TGGACT 0.000167099 0.000298731 +TGGAGA 0.000194631 0.000514365 +TGGAGC 0.000269603 0.000367607 +TGGAGG 0.000218774 0.000501569 +TGGAGT 0.000166781 0.000391991 +TGGATA 0.000100175 0.00022257 +TGGATC 0.000251389 0.00020402 +TGGATG 0.000298512 0.000323222 +TGGATT 0.000203102 0.000351907 +TGGCAA 0.000142532 0.000278058 +TGGCAC 0.000175782 0.000264213 +TGGCAG 0.000382167 0.000427692 +TGGCAT 0.000137131 0.000305399 +TGGCCA 0.00015365 0.000434817 +TGGCCC 0.000188807 0.00035887 +TGGCCG 6.59712E-05 0.000135117 +TGGCCT 0.000140414 0.000440516 +TGGCGA 6.33239E-05 6.90104E-05 +TGGCGC 0.000109281 0.000109605 +TGGCGG 0.000124848 0.000148613 +TGGCGT 5.64408E-05 8.40653E-05 +TGGCTA 0.000102504 0.000209666 +TGGCTC 0.00025266 0.000400997 +TGGCTG 0.00055149 0.00051415 +TGGCTT 0.000166569 0.000408121 +TGGGAA 0.000346693 0.000505225 +TGGGAC 0.000380473 0.000311878 +TGGGAG 0.000494731 0.000599103 +TGGGAT 0.000303595 0.00037414 +TGGGCA 0.000202467 0.000405702 +TGGGCC 0.000336421 0.000355268 +TGGGCG 8.5879E-05 0.000145763 +TGGGCT 0.000222798 0.000398577 +TGGGGA 0.000181077 0.000489848 +TGGGGC 0.000280192 0.000396964 +TGGGGG 0.00015005 0.000446377 +TGGGGT 0.000132472 0.000347364 +TGGGTA 9.13855E-05 0.000180389 +TGGGTC 0.000178112 0.000234721 +TGGGTG 0.000333456 0.000378764 +TGGGTT 0.000116164 0.000297817 +TGGTAA 4.3416E-06 0.000214155 +TGGTAC 0.000197066 0.000135225 +TGGTAG 4.97696E-06 0.000176491 +TGGTAT 0.000142638 0.000202703 +TGGTCA 0.000119765 0.000242007 +TGGTCC 0.000175782 0.000210956 +TGGTCG 3.91803E-05 4.57023E-05 +TGGTCT 0.000140731 0.00032669 +TGGTGA 1.55662E-05 0.000338573 +TGGTGC 0.00013999 0.000284214 +TGGTGG 0.000203949 0.000446269 +TGGTGT 0.000111293 0.000302522 +TGGTTA 6.77714E-05 0.000195471 +TGGTTC 0.000233705 0.000244023 +TGGTTG 0.000117541 0.000250045 +TGGTTT 0.000222587 0.00046904 +TGTAAA 0.000257108 0.000523049 +TGTAAC 0.000140731 0.000217489 +TGTAAG 0.000220786 0.000245206 +TGTAAT 0.000220045 0.000429386 +TGTACA 0.000113305 0.00030521 +TGTACC 0.000121035 0.000171034 +TGTACG 1.779E-05 3.50294E-05 +TGTACT 0.000112352 0.000246147 +TGTAGA 7.68781E-05 0.000254615 +TGTAGC 9.53035E-05 0.000186868 +TGTAGG 5.35817E-05 0.000176088 +TGTAGT 0.000105469 0.000239776 +TGTATA 6.65007E-05 0.000361935 +TGTATC 0.00010674 0.000212811 +TGTATG 0.000108434 0.000297575 +TGTATT 0.000127283 0.000528909 +TGTCAA 0.000129507 0.000250233 +TGTCAC 0.000118282 0.000284187 +TGTCAG 0.000244718 0.000311017 +TGTCAT 0.000110976 0.000322577 +TGTCCA 0.000162016 0.000295694 +TGTCCC 0.000181394 0.000329513 +TGTCCG 3.43093E-05 7.00858E-05 +TGTCCT 0.000178641 0.000390378 +TGTCGA 5.96176E-05 3.32282E-05 +TGTCGC 6.26885E-05 6.79082E-05 +TGTCGG 6.77714E-05 5.65633E-05 +TGTCGT 3.9498E-05 5.1778E-05 +TGTCTA 6.43828E-05 0.000203886 +TGTCTC 0.000133107 0.000384222 +TGTCTG 0.000225446 0.000419439 +TGTCTT 0.00013279 0.000462749 +TGTGAA 0.000505744 0.000480035 +TGTGAC 0.000444961 0.000297333 +TGTGAG 0.000522475 0.000387125 +TGTGAT 0.000367448 0.000359515 +TGTGCA 0.000212739 0.000351907 +TGTGCC 0.000387991 0.000353762 +TGTGCG 6.09942E-05 9.11088E-05 +TGTGCT 0.000275215 0.000390485 +TGTGGA 0.000393921 0.00040218 +TGTGGC 0.000446762 0.000361209 +TGTGGG 0.000476729 0.000389195 +TGTGGT 0.00020575 0.000350671 +TGTGTA 0.000120824 0.000367796 +TGTGTC 0.000234341 0.000337175 +TGTGTG 0.0004223 0.000742151 +TGTGTT 0.000192831 0.000517027 +TGTTAA 7.41249E-06 0.000371828 +TGTTAC 0.00010907 0.000219451 +TGTTAG 4.76517E-06 0.00021636 +TGTTAT 9.05383E-05 0.000321744 +TGTTCA 0.000127283 0.000343305 +TGTTCC 0.000141367 0.000305909 +TGTTCG 2.34023E-05 4.72615E-05 +TGTTCT 0.000153227 0.000455087 +TGTTGA 8.36553E-06 0.000323895 +TGTTGC 6.30062E-05 0.000293812 +TGTTGG 6.98892E-05 0.000337014 +TGTTGT 6.97833E-05 0.000379141 +TGTTTA 9.06442E-05 0.000462641 +TGTTTC 0.000135755 0.000473072 +TGTTTG 0.000150368 0.000510521 +TGTTTT 0.00014772 0.000999481 +TTAAAA 0.000354529 0.001063088 +TTAAAC 0.000126754 0.000298678 +TTAAAG 0.000276698 0.000449065 +TTAAAT 0.000272992 0.000625879 +TTAACA 0.00015365 0.000301124 +TTAACC 9.09619E-05 0.000166598 +TTAACG 2.09668E-05 3.91158E-05 +TTAACT 0.000131413 0.000288193 +TTAAGA 0.00014645 0.000347606 +TTAAGC 7.70899E-05 0.00018867 +TTAAGG 9.21267E-05 0.000223726 +TTAAGT 0.000109599 0.000294591 +TTAATA 0.000102822 0.000426752 +TTAATC 8.54555E-05 0.00020765 +TTAATG 0.000158204 0.00033817 +TTAATT 0.000166358 0.000512134 +TTACAA 0.000177794 0.000296473 +TTACAC 7.87842E-05 0.000166464 +TTACAG 0.000297241 0.000372796 +TTACAT 0.000126436 0.000312335 +TTACCA 0.000158733 0.000214989 +TTACCC 8.50319E-05 0.000154958 +TTACCG 2.20257E-05 3.52176E-05 +TTACCT 0.000172393 0.000255207 +TTACGA 5.80292E-05 3.11582E-05 +TTACGC 3.16619E-05 2.48674E-05 +TTACGG 4.99814E-05 3.62661E-05 +TTACGT 3.6533E-05 4.75303E-05 +TTACTA 8.41847E-05 0.000202219 +TTACTC 9.05383E-05 0.000176142 +TTACTG 0.000180865 0.000290263 +TTACTT 0.00013639 0.000354515 +TTAGAA 0.000423571 0.000380081 +TTAGAC 0.000138084 0.000139338 +TTAGAG 0.000251813 0.000236039 +TTAGAT 0.000257743 0.000218215 +TTAGCA 0.000174299 0.00021757 +TTAGCC 0.000126118 0.000184315 +TTAGCG 2.41435E-05 3.01904E-05 +TTAGCT 0.000158945 0.000226495 +TTAGGA 0.000147932 0.000247545 +TTAGGC 6.70301E-05 0.000126649 +TTAGGG 7.07364E-05 0.000173346 +TTAGGT 7.88901E-05 0.00016555 +TTAGTA 8.09021E-05 0.000226253 +TTAGTC 5.50642E-05 0.000132752 +TTAGTG 0.000120824 0.000197084 +TTAGTT 9.33974E-05 0.000264589 +TTATAA 9.10678E-06 0.000434682 +TTATAC 7.25365E-05 0.000194638 +TTATAG 7.09482E-06 0.000235904 +TTATAT 0.000139778 0.000453581 +TTATCA 0.000131307 0.000243727 +TTATCC 9.55153E-05 0.000174636 +TTATCG 1.94843E-05 2.82547E-05 +TTATCT 0.000159475 0.000313517 +TTATGA 7.94196E-06 0.00028787 +TTATGC 4.88166E-05 0.000176034 +TTATGG 7.48662E-05 0.000208375 +TTATGT 8.36553E-05 0.000355886 +TTATTA 0.000113941 0.000446807 +TTATTC 8.48201E-05 0.000314539 +TTATTG 0.000107163 0.000347283 +TTATTT 0.000181288 0.001005235 +TTCAAA 0.0004223 0.000497805 +TTCAAC 0.000514639 0.000215553 +TTCAAG 0.000666171 0.000395351 +TTCAAT 0.000314607 0.000274724 +TTCACA 0.000313548 0.000354865 +TTCACC 0.000581245 0.000290639 +TTCACG 0.000169005 6.84458E-05 +TTCACT 0.000321914 0.000357472 +TTCAGA 0.000247048 0.000463125 +TTCAGC 0.000609731 0.00032868 +TTCAGG 0.000250966 0.000350375 +TTCAGT 0.000375813 0.000411078 +TTCATA 0.000127283 0.000319002 +TTCATC 0.000660347 0.00030634 +TTCATG 0.000481283 0.000342014 +TTCATT 0.000374013 0.000560014 +TTCCAA 0.000258272 0.00038191 +TTCCAC 0.000419441 0.000286634 +TTCCAG 0.000917137 0.000495144 +TTCCAT 0.00025626 0.000385136 +TTCCCA 0.000304971 0.00047447 +TTCCCC 0.000405463 0.000426214 +TTCCCG 0.000162651 0.000121487 +TTCCCT 0.000308783 0.000511086 +TTCCGA 0.000185736 5.61869E-05 +TTCCGC 0.000328056 7.61883E-05 +TTCCGG 0.000326785 9.82061E-05 +TTCCGT 0.000121883 7.15106E-05 +TTCCTA 0.000168687 0.000299 +TTCCTC 0.000567373 0.000499741 +TTCCTG 0.001148407 0.000613647 +TTCCTT 0.00027098 0.000628729 +TTCGAA 8.1008E-05 4.2987E-05 +TTCGAC 0.000220469 2.66686E-05 +TTCGAG 0.000278816 6.39563E-05 +TTCGAT 0.000105257 3.29056E-05 +TTCGCA 5.53819E-05 4.04331E-05 +TTCGCC 0.000227458 6.20206E-05 +TTCGCG 6.81949E-05 2.61578E-05 +TTCGCT 6.68183E-05 5.84989E-05 +TTCGGA 7.17953E-05 5.84451E-05 +TTCGGC 0.000198443 5.66439E-05 +TTCGGG 0.000162228 6.97363E-05 +TTCGGT 5.51701E-05 5.02187E-05 +TTCGTA 2.93323E-05 4.21536E-05 +TTCGTC 0.000133107 4.24493E-05 +TTCGTG 0.000319796 6.3795E-05 +TTCGTT 4.71223E-05 6.59994E-05 +TTCTAA 1.16482E-05 0.000395647 +TTCTAC 0.0004834 0.000231657 +TTCTAG 1.30248E-05 0.00027623 +TTCTAT 0.000286546 0.000328035 +TTCTCA 0.00026219 0.000466271 +TTCTCC 0.000595329 0.000520818 +TTCTCG 0.000119023 7.46829E-05 +TTCTCT 0.000361624 0.000614615 +TTCTGA 3.10266E-05 0.000490627 +TTCTGC 0.000405463 0.000424198 +TTCTGG 0.000376872 0.000455813 +TTCTGT 0.000270874 0.000606872 +TTCTTA 0.000144967 0.000407234 +TTCTTC 0.000670407 0.000511489 +TTCTTG 0.000296817 0.000427719 +TTCTTT 0.000398369 0.000864283 +TTGAAA 0.000450468 0.000592866 +TTGAAC 0.00019569 0.000274294 +TTGAAG 0.00045015 0.000392502 +TTGAAT 0.00028051 0.000396292 +TTGACA 0.000192937 0.000253486 +TTGACC 0.000163922 0.000177352 +TTGACG 5.09344E-05 3.82286E-05 +TTGACT 0.000172499 0.000260557 +TTGAGA 0.000171229 0.000404411 +TTGAGC 0.000148991 0.000234022 +TTGAGG 0.000147509 0.000297871 +TTGAGT 0.000153121 0.000259132 +TTGATA 0.000109387 0.000240716 +TTGATC 0.000136602 0.000170577 +TTGATG 0.000224069 0.000276875 +TTGATT 0.000181606 0.000373764 +TTGCAA 0.000201302 0.000295801 +TTGCAC 0.000155556 0.000219747 +TTGCAG 0.000417747 0.000381264 +TTGCAT 0.000136496 0.000308302 +TTGCCA 0.000238047 0.000317765 +TTGCCC 0.000205961 0.00027994 +TTGCCG 5.97235E-05 6.48703E-05 +TTGCCT 0.000241965 0.000378253 +TTGCGA 5.69703E-05 3.50832E-05 +TTGCGC 7.59251E-05 4.5998E-05 +TTGCGG 0.000107058 5.86871E-05 +TTGCGT 4.80753E-05 4.66701E-05 +TTGCTA 0.000118918 0.000237114 +TTGCTC 0.000186901 0.000278569 +TTGCTG 0.000520463 0.000441349 +TTGCTT 0.000201514 0.000469362 +TTGGAA 0.000619155 0.000472051 +TTGGAC 0.00036173 0.000205875 +TTGGAG 0.000649334 0.000379141 +TTGGAT 0.000491237 0.000284725 +TTGGCA 0.000299994 0.000286876 +TTGGCC 0.000370413 0.000306689 +TTGGCG 8.42906E-05 6.38487E-05 +TTGGCT 0.000350081 0.000352553 +TTGGGA 0.000241859 0.000432558 +TTGGGC 0.000223328 0.000223296 +TTGGGG 0.000201514 0.00037613 +TTGGGT 0.000150156 0.000247868 +TTGGTA 0.000131519 0.000202246 +TTGGTC 0.000176417 0.000187863 +TTGGTG 0.000377402 0.000304538 +TTGGTT 0.00019569 0.000345428 +TTGTAA 9.42446E-06 0.000429305 +TTGTAC 0.000129401 0.000216844 +TTGTAG 7.20071E-06 0.000238861 +TTGTAT 0.000135755 0.00044764 +TTGTCA 0.000169534 0.000311313 +TTGTCC 0.00017144 0.000242061 +TTGTCG 3.53682E-05 4.0675E-05 +TTGTCT 0.000196325 0.000367204 +TTGTGA 1.58839E-05 0.000363656 +TTGTGC 9.14913E-05 0.000253056 +TTGTGG 0.000113305 0.000311824 +TTGTGT 0.000115 0.00043444 +TTGTTA 0.000102504 0.000339783 +TTGTTC 0.000124636 0.000315722 +TTGTTG 0.000167416 0.000365726 +TTGTTT 0.000209879 0.000836996 +TTTAAA 0.000369989 0.001115189 +TTTAAC 0.000225869 0.00030021 +TTTAAG 0.000338751 0.000431564 +TTTAAT 0.000296182 0.000638568 +TTTACA 0.000188913 0.000414493 +TTTACC 0.000195266 0.000232006 +TTTACG 3.25091E-05 4.44925E-05 +TTTACT 0.000203949 0.00038285 +TTTAGA 0.000135331 0.000367984 +TTTAGC 0.000146026 0.000207972 +TTTAGG 9.78449E-05 0.000247518 +TTTAGT 0.000171017 0.000347122 +TTTATA 0.000131836 0.000602705 +TTTATC 0.000199608 0.000302656 +TTTATG 0.000215068 0.000427343 +TTTATT 0.000288452 0.000950714 +TTTCAA 0.000257319 0.000509795 +TTTCAC 0.000164028 0.000377743 +TTTCAG 0.000407793 0.000521275 +TTTCAT 0.000232646 0.000571708 +TTTCCA 0.000291311 0.000551599 +TTTCCC 0.000205114 0.000495869 +TTTCCG 4.78635E-05 8.38771E-05 +TTTCCT 0.00033621 0.000723332 +TTTCGA 0.000114047 4.15622E-05 +TTTCGC 6.96774E-05 5.05413E-05 +TTTCGG 9.30797E-05 6.15099E-05 +TTTCGT 6.81949E-05 6.38756E-05 +TTTCTA 0.000155027 0.00051622 +TTTCTC 0.000255096 0.000597328 +TTTCTG 0.000430772 0.000695938 +TTTCTT 0.000291205 0.000961468 +TTTGAA 0.000826493 0.000637089 +TTTGAC 0.000625826 0.000250986 +TTTGAG 0.000800549 0.000448312 +TTTGAT 0.000682691 0.000392448 +TTTGCA 0.000413511 0.000456807 +TTTGCC 0.000562502 0.000336476 +TTTGCG 7.92078E-05 5.54073E-05 +TTTGCT 0.000487107 0.000503074 +TTTGGA 0.000534017 0.000491622 +TTTGGC 0.000450997 0.000288919 +TTTGGG 0.000441573 0.000471889 +TTTGGT 0.000336421 0.000375914 +TTTGTA 0.000220257 0.000599399 +TTTGTC 0.000340869 0.000339514 +TTTGTG 0.00067729 0.000522215 +TTTGTT 0.000353894 0.000820409 +TTTTAA 1.34484E-05 0.001158418 +TTTTAC 0.000166569 0.000397448 +TTTTAG 5.50642E-06 0.000469739 +TTTTAT 0.000256472 0.000910416 +TTTTCA 0.000252872 0.00071196 +TTTTCC 0.000237623 0.000641633 +TTTTCG 3.31444E-05 6.43595E-05 +TTTTCT 0.000329962 0.001050318 +TTTTGA 1.14364E-05 0.000607033 +TTTTGC 0.000114258 0.000442021 +TTTTGG 0.000147403 0.000498235 +TTTTGT 0.00015185 0.000919341 +TTTTTA 0.000174723 0.001164332 +TTTTTC 0.000241965 0.000870413 +TTTTTG 0.000238365 0.000845572 +TTTTTT 0.000243447 0.002578682 diff --git a/bin/cpat_model/Human_cutoff.txt b/bin/cpat_model/Human_cutoff.txt new file mode 100755 index 0000000..59139d1 --- /dev/null +++ b/bin/cpat_model/Human_cutoff.txt @@ -0,0 +1,2 @@ +Coding Probability Cutoff: 0.364 +Achieved Sensitivity and Specificity: 0.966 diff --git a/bin/cpat_model/Human_logitModel.RData b/bin/cpat_model/Human_logitModel.RData new file mode 100755 index 0000000..85fe444 Binary files /dev/null and b/bin/cpat_model/Human_logitModel.RData differ diff --git a/bin/cpat_model/Mouse_Hexamer.tsv b/bin/cpat_model/Mouse_Hexamer.tsv new file mode 100755 index 0000000..b74e930 --- /dev/null +++ b/bin/cpat_model/Mouse_Hexamer.tsv @@ -0,0 +1,4097 @@ +hexamer coding noncoding +GAACGT 0.000101774181912 6.10808750604e-05 +CTTCTT 0.00025408691306 0.000509033837531 +CACCCT 0.000289638168385 0.0003340733649 +GAACGG 0.000204245447261 6.72924894733e-05 +GAACGC 0.000172179609124 5.40728998253e-05 +GAACGA 0.000159980648964 5.88510647583e-05 +CACCCA 0.000291032335261 0.000367759427678 +CTTCTA 0.000149175855679 0.000289636431023 +CACCCC 0.000298351711357 0.000360353272032 +CTTCTC 0.000324143798554 0.000510706195257 +CACCCG 0.000135931270361 9.52447543314e-05 +CTTCTG 0.000566031751454 0.000608180759891 +CGTGTG 0.000221672533205 0.000147804568595 +TAAGGT 0.0 0.000167315408738 +CGTGTC 0.000111184808321 8.93516842474e-05 +CGTGTA 6.02977173654e-05 6.12401472248e-05 +GGAAAT 0.000346450468562 0.000381695742066 +TAAGGG 0.0 0.000169385946875 +CGTGTT 7.31937609638e-05 0.000100739644004 +TAAGGC 0.0 0.000159670344845 +TAAGGA 0.0 0.000278248471266 +TCACTG 0.00032240108996 0.000431866473862 +GTCAAA 0.000340176717622 0.00020761126634 +CCCGCT 0.000115367308948 0.000115631591379 +GTCAAG 0.000543376539727 0.00019901056946 +CTGTCC 0.000642710929606 0.000467941619107 +TCAGAG 0.000727406567293 0.000498203330349 +CTGTCA 0.000348890260594 0.000400410221387 +CTGTCG 0.000169391275373 8.50513358077e-05 +GTATCT 0.000133840020048 0.00020402764264 +TCAGAA 0.000623889676787 0.000504494580844 +GTCAAT 0.000291729418699 0.000141991134593 +GTATCA 8.85295965943e-05 0.00015194564487 +GTATCC 0.000110139183165 0.0001343460707 +GTATCG 2.43979203213e-05 2.65188153782e-05 +CTGTCT 0.00050503695065 0.000619489083565 +GGTGTC 0.000316475880739 0.000234607898211 +GGTGTA 0.000130354602859 0.00014836202117 +GGTGTG 0.000474713821108 0.000373095045186 +TATCCT 0.000181938777253 0.000224095935358 +CCGGGG 0.00015196418943 0.000162776152051 +TTCTGT 0.000344707759968 0.000661536934976 +ATTCCT 0.00034401067653 0.000343948239095 +CCGGGC 0.000204942530699 0.000178145915919 +CCGGGA 0.000118155642699 0.000148839837663 +TATCCA 0.000151615647711 0.000198453116885 +TATCCC 0.000137325437237 0.000160068525256 +GTTCTG 0.000393155058891 0.000402719667771 +TACACC 0.000344359218249 0.000133071893384 +TATCCG 4.04308393895e-05 2.77133566115e-05 +GGTGTT 0.000234220035084 0.000272355401182 +ATTCCG 5.43725081445e-05 4.82594658235e-05 +TTCTGG 0.000360392137317 0.000506087302489 +CCGGGT 9.27120972208e-05 9.74745646335e-05 +ATTCCC 0.000232825868209 0.000235165350787 +TTCTGC 0.000399080268112 0.000430671932629 +ATTCCA 0.00028824400151 0.000313367983524 +GTTCTA 0.000102819807068 0.000212707975601 +TGCACT 0.0001551010649 0.000286212079488 +TATCTT 0.000133840020048 0.000277133566115 +CGCGGT 3.9036672514e-05 4.59500194392e-05 +CCCGCG 0.000126172102233 0.000124789740834 +CGCGGG 7.73762615903e-05 0.000116985404777 +CGCGGC 9.5151889253e-05 0.000134903523275 +CGCGGA 2.96260461044e-05 6.34699575269e-05 +ACCTGT 0.000286849834634 0.00033192319068 +TCATGT 8.29529290923e-05 0.000339010801997 +CCCGCC 0.000195880446008 0.000210796709628 +TTACAG 0.000280227541976 0.000316633062895 +CTCCGC 0.000212610448514 0.000138487146975 +TTACAA 0.000143250646458 0.000278487379513 +TTACAC 9.61975144096e-05 0.000183879713839 +CTCCGG 0.000264891706345 0.000148282385088 +TCATGC 6.41316762731e-05 0.000200045838529 +ACCTGG 0.000317870047614 0.000383925552368 +GGCGAA 0.000101774181912 4.89761905634e-05 +GTAAGC 7.77248033092e-05 0.000138566783057 +TCATGG 0.000116064392385 0.000295131320696 +ACCTGC 0.000388275474827 0.00033120646594 +ACCTGA 1.74270859438e-05 0.000322844677307 +GTATTT 0.000158237940369 0.000437918816111 +CTCGAT 9.34091806586e-05 4.38794813015e-05 +CTCCGT 0.000126869185671 0.000109340340884 +GCCTGA 3.45056301687e-05 0.000333436276242 +GCCTGC 0.000431843189687 0.000443572977948 +GCCTGG 0.000444739233285 0.00054996678379 +TCTTGC 0.000110487724883 0.000292503329983 +TAAAGG 0.0 0.000315279249497 +CAAATT 0.000169042733655 0.000281832094966 +TAAAGA 0.0 0.000447475145977 +GCATAA 5.576667502e-06 0.000137770422235 +AAGGTC 0.000463909027823 0.00023349299306 +TATTAG 5.576667502e-06 0.000163174332462 +GCCTGT 0.000345404843405 0.000375404491571 +TATCTA 6.23889676787e-05 0.000205700000366 +TAAAGT 0.0 0.000358043825647 +CAAATA 0.000133840020048 0.000340046071066 +CAAATC 0.000183332944128 0.000216928687959 +CGTGCT 0.000113276058634 0.000107508710993 +CAAATG 0.000242236494618 0.000364812892636 +AATACT 0.000151267105992 0.000255154007423 +TATCTC 0.000143599188177 0.000206098180777 +CCGGCG 8.78325131566e-05 0.000100660007922 +GGCATG 0.000531874663004 0.000238271157993 +GGCATA 0.00013941668755 0.000132594076891 +GGCGAC 0.000180544610377 7.27873791463e-05 +GGCATC 0.00067547385118 0.000216371235383 +AATACG 5.36754247068e-05 3.55176926688e-05 +AATACA 0.000182635860691 0.000302856020671 +AATACC 0.000171133983968 0.000155608904652 +GGCATT 0.000332857341526 0.000232537360073 +TATCTG 0.000268028581815 0.000265506698111 +TGCACA 0.000225157950393 0.000382332830723 +GAGCAC 0.000527692162377 0.000269568138304 +ACTGAT 0.000345404843405 0.000247907123941 +GAGCAG 0.00156251252572 0.000481559389166 +AGTAGT 0.00014255356302 0.000174403020055 +TCTCGC 9.55004309718e-05 7.0477932762e-05 +TCTCGA 0.000121989601606 6.21161441292e-05 +TCTCGG 0.000174619401157 9.95451027712e-05 +ACTGAG 0.000580670503646 0.000427008672847 +AGTAGA 9.86373064417e-05 0.000233652265224 +TTTGGT 0.000336691300434 0.000403516028593 +AGTAGG 7.04054272128e-05 0.000175119744795 +ACTGAA 0.000490398198458 0.000438715176933 +AAGGTT 0.000302882753703 0.000268134688824 +GGGTCA 0.000135234186924 0.000213345064259 +GTGTCG 0.000101077098474 6.25939606225e-05 +TCGAAC 4.25220897028e-05 4.25256679038e-05 +TCGAAA 5.54181333012e-05 4.89761905634e-05 +GTGTCC 0.000488306948144 0.000286849168145 +CGCACG 7.73762615903e-05 4.6985288508e-05 +GTGTCA 0.000245373370088 0.000268851413564 +GTTGTA 0.00013314293661 0.000207292722011 +CTTGCT 0.000313339005269 0.00045440348513 +TCGGTC 6.93598020562e-05 5.22412699343e-05 +GTGTCT 0.000451710067662 0.000406701471882 +TTGAAG 0.000466697361574 0.000397384050262 +TCGAAT 4.80987572048e-05 3.79864112175e-05 +CTTGCG 5.15841743935e-05 5.43914441542e-05 +CTTGCA 0.000224460866956 0.000274346303237 +CTTGCC 0.000303928378859 0.000327543206158 +CGAAAG 0.000183332944128 6.06030585671e-05 +CGAAAA 0.000128611894265 6.04437864026e-05 +CGAAAC 0.000119201267855 4.20478514105e-05 +AATCAT 0.000164511691309 0.000247907123941 +AAATGC 0.000234220035084 0.000342514789615 +CACGGC 0.000181938777253 9.42891213448e-05 +AAATGA 2.96260461044e-05 0.000518032714821 +ACAGTC 0.000291729418699 0.000247827487859 +CACGGG 0.000143599188177 0.000103208362553 +ACAGTA 0.000191000861944 0.00024894239301 +TAGAGC 0.0 0.000188100426196 +CGAAAT 0.000100031473317 4.17293070817e-05 +ACAGTT 0.000244327744932 0.000349124584439 +AATCAG 0.000367014429976 0.00026654196718 +CACGGT 8.60898045622e-05 7.31059234751e-05 +AAATGT 0.000283364417446 0.000588829191912 +AATCAC 0.000169042733655 0.000196462214829 +TTTGTA 0.000202154196948 0.000523288696247 +GCTCAA 0.000206685239293 0.000193037863294 +TCTAGT 0.000131748769735 0.00019757711998 +TTGCCG 6.83141768996e-05 6.3868137938e-05 +ATACGT 2.19581282891e-05 4.17293070817e-05 +GGGTAT 0.000100728556755 0.000130921719165 +ATACGA 4.42647982972e-05 3.33675184489e-05 +TCTAGG 9.27120972208e-05 0.000216530507548 +ATACGC 1.98668779759e-05 2.33333720896e-05 +TCTAGA 0.000104911057381 0.000269488502222 +GGGTAG 5.92520922088e-06 0.00015696271805 +TCTAGC 0.000146038980209 0.00018069427055 +ATACGG 4.7401673767e-05 3.10580720646e-05 +TGACTA 0.0 0.000202992373571 +TTTGTG 0.000747970528706 0.000547020248748 +TGGGCC 0.000299397336514 0.000293299690805 +AGATGT 9.30606389397e-05 0.000371343051378 +TCGCCC 0.000127217727389 7.84415409837e-05 +TCGCCA 8.99237634698e-05 7.5256097695e-05 +GGGTCG 5.05385492369e-05 5.77361596073e-05 +TCGCCG 5.89035504899e-05 5.16041812766e-05 +GGTAAT 9.16664720642e-05 0.000122161750121 +ACCCAT 0.000185772736161 0.000222105033303 +GTCAAC 0.000428357772498 0.00013362934596 +ACCCAC 0.000314384630426 0.000296166589765 +GGTAAC 0.000127217727389 0.00011387959757 +ACCCAA 0.000203896905542 0.000245040224981 +AGATGG 0.000114321683791 0.000430194116136 +ACCCAG 0.000672685517429 0.000433937012 +AGATGA 1.56843773494e-05 0.000384244096697 +TCGCCT 9.16664720642e-05 9.77134728802e-05 +AGATGC 8.9575221751e-05 0.000304289470151 +TTTGAG 0.000818375955919 0.000419363608954 +CCCCAT 0.000190652320225 0.000276815021786 +TGGTCT 0.000138371062393 0.000329534108214 +TTGGGT 0.000164860233028 0.000276576113539 +CACTAG 4.53104234538e-06 0.000159988889174 +CACTAA 9.75916812851e-06 0.000200284746776 +CACTAC 0.000313339005269 0.000136974061413 +CCCCAA 0.000223415241799 0.000338134805093 +CCCCAC 0.000287546918072 0.000422389780079 +CCCCAG 0.000729497817606 0.000512219280819 +CACTAT 0.000202154196948 0.000161263066489 +CTATCG 2.30037534458e-05 2.58817267205e-05 +TCGGTT 5.33268829879e-05 5.79750678539e-05 +TTGGGA 0.000248858787277 0.000374130314255 +TTGGGC 0.000258617955405 0.000219238134343 +CTCGAG 0.000156495231775 8.41753389033e-05 +TAGTGG 0.0 0.000170182307698 +TTTCTG 0.000443345066409 0.000712185483266 +TGCAGC 0.000395246309205 0.000393083701823 +TGCCTC 0.000287895459791 0.00048171866133 +TGCCTA 9.7940223004e-05 0.000229511188949 +TGCAGG 0.000200760030072 0.000394357879138 +TTTCCG 5.22812578313e-05 7.8919357477e-05 +GTGGCC 0.0010027545252 0.000308908362919 +TCAGAC 0.000462514860948 0.000281354278472 +TTTCTT 0.000247116078683 0.00095372172063 +TGCCTT 0.000181938777253 0.000508874565366 +TGCAGT 0.000221672533205 0.000369591057569 +TAGTGC 0.0 0.000136894425331 +GTGAGC 0.000432191731405 0.000297122222751 +AAGGAA 0.0011651749662 0.000638442471133 +AAGGAC 0.00100519431724 0.000314482888675 +AAGGAG 0.00171238546483 0.000456235115021 +ATATGC 6.62229265863e-05 0.000178225552001 +GTTCCA 0.000236311285397 0.000277133566115 +ATATGA 9.75916812851e-06 0.000243606775502 +TACGAA 0.000121292518169 3.39249710244e-05 +ATATGG 7.63306364337e-05 0.000189215331347 +AAGGAT 0.000728103650731 0.000304767286644 +CTATCA 6.86627186184e-05 0.000159590708763 +TGGTTT 0.000212261906795 0.000512537825148 +ATATGT 8.08616787791e-05 0.000327782114405 +GGTCCC 0.000264194622907 0.000239147154897 +GCCCAG 0.00112439558509 0.000484187379879 +CCACTG 0.000528389245815 0.000422628688325 +GCCCAA 0.000224112325237 0.000195028765349 +GGTCCG 4.80987572048e-05 6.00456059916e-05 +AGGTCT 0.000146038980209 0.000281354278472 +TGTGTC 0.000287895459791 0.00042063778627 +AGACCC 0.00019518336257 0.000279602284664 +AGACCA 0.000196577529446 0.00032292431339 +AGACCG 5.15841743935e-05 7.00797523509e-05 +CTTCGC 0.000121989601606 7.14335657486e-05 +ATGCAA 0.000216444407422 0.000250614750737 +AGGTCC 0.00014255356302 0.000222105033303 +AGGTCA 0.000135931270361 0.000293618235134 +AGGTCG 3.72939639197e-05 4.91354627278e-05 +GGTCCT 0.000309505046361 0.000278328107348 +TCCGGA 8.88781383132e-05 8.75996904386e-05 +CTTAGC 0.000128263352546 0.000206735269435 +ATTGTG 0.000509916534715 0.000305484011384 +CTTAGA 0.00010212272363 0.000261206349672 +ATGCAC 0.000284410042602 0.000209522532313 +CTTAGG 8.33014708112e-05 0.000206018544695 +ATTGTC 0.000357952345285 0.000217087960123 +ATTGTA 0.000173922317719 0.000270762679538 +CGCTAC 0.000276045041349 4.18885792461e-05 +ATTTTG 0.000250252954152 0.000522173791096 +CGCTAA 1.3941668755e-06 3.24118854623e-05 +ATTTTA 0.000194834820851 0.000658271855605 +CGCTAG 4.87958406425e-06 3.51991483399e-05 +ATTTTC 0.000233871493365 0.000504653853009 +GAGGTC 0.000484821530956 0.000225529384838 +TAGCAG 0.0 0.0002413769652 +GATACT 0.000188561069912 0.000162059427311 +TAGCAC 0.0 0.000158396167529 +TAGCAA 0.0 0.000215335966315 +GAGGTT 0.000327977757462 0.00025037584249 +ATTTTT 0.000242585036337 0.000916690942399 +GCCTTG 0.000535360080192 0.000373732133844 +GTGGGG 0.000398034642956 0.000376041580228 +GCCTTA 0.000169739817092 0.000190170964334 +TTGACA 0.000188212528193 0.000268851413564 +CGCTAT 0.000163117524434 2.66780875427e-05 +GATACG 6.02977173654e-05 2.69169957893e-05 +TAGCAT 0.0 0.00022019376733 +GATACC 0.000231083159614 0.000120967208888 +GATACA 0.000215747323984 0.000197099303487 +TGAGCT 0.0 0.000395233876043 +GTCACC 0.000590778213494 0.000245836585804 +GATCAT 0.000173573776 0.000172651026246 +TGGCAC 0.000174619401157 0.00024822566827 +AAGACA 0.000531526121285 0.0004720030593 +AAGACC 0.000634694470072 0.000267099419756 +ACAATG 0.000209822114763 0.00025682636515 +AAGACG 0.000233174409928 8.29011615878e-05 +CCACTT 0.000195880446008 0.000295370228943 +TGAGCG 0.0 9.03869533162e-05 +GATCAC 0.000203199822104 0.000150034378897 +GATCAA 0.00019832023804 0.000168032133478 +AAGACT 0.000431494647968 0.000364494348307 +GAAGGG 0.000511659243309 0.00037142268746 +CTTCCA 0.000314384630426 0.000481639025248 +GCGGTA 4.39162565783e-05 3.63936895731e-05 +GCGGTG 0.000281273167132 0.000113959233652 +CGGTGT 6.97083437751e-05 8.52106079721e-05 +TCGTTA 2.16095865703e-05 3.61547813265e-05 +TCGTTC 5.68123001767e-05 5.31969029209e-05 +CTTCCC 0.00027255962416 0.000533163570442 +ACTTGT 0.000126172102233 0.00029744076708 +TTGGAG 0.000653515722891 0.00038472191319 +TGTGTG 0.000495626324241 0.00122384731151 +GCGGTT 6.13433425221e-05 6.00456059916e-05 +ACTTGC 0.000122338143325 0.000227440650812 +ATGTGA 1.74270859438e-05 0.000379306659599 +ACTTGA 7.66791781526e-06 0.000315836702072 +CGGTGG 9.72431395662e-05 0.000123117383107 +ACTTGG 0.000115018767229 0.000331126829858 +CGGTGA 9.06208469076e-06 7.50171894483e-05 +CGGTGC 7.35423026827e-05 9.79523811268e-05 +TGCCGC 0.000191697945381 0.000109499613048 +TTCCTA 0.000215747323984 0.000328976655638 +TAGACT 0.0 0.000199886566364 +TTCCTC 0.000583110295678 0.000498044058185 +ATGTGC 0.000209822114763 0.000274744483648 +TGGGTT 0.000112578975197 0.00034052388756 +TTCCTG 0.00109999766477 0.00066384638136 +TCCTTT 0.000355164011534 0.000558010028094 +GACCCA 0.000480987572048 0.000279522648581 +TTCCGT 0.000133491478329 8.26622533412e-05 +TAGACG 0.0 3.79067751353e-05 +TTCCTT 0.000334251508401 0.000663687109196 +TAGACC 0.0 0.000154334727336 +TAGACA 0.0 0.000225370112674 +TCCTTG 0.000381653182168 0.000413629811035 +TCCTTA 0.000178453360064 0.000267338328002 +TCCTTC 0.00056707737661 0.000503459311775 +ATCTCT 0.000376076514666 0.000389659350287 +TCCGCA 6.86627186184e-05 6.14790554715e-05 +ACCGTT 6.72685517429e-05 5.35154472498e-05 +TGGCGC 0.000101077098474 8.64847852876e-05 +TTTACG 4.07793811084e-05 4.18885792461e-05 +TGGCGA 7.14510523694e-05 7.19910183241e-05 +GGCCAT 0.000276045041349 0.000239863879637 +ATCTCG 0.000100728556755 5.20819977699e-05 +ATCTCA 0.000274650874474 0.000311695625797 +ATCTCC 0.000502597158618 0.000272355401182 +GGCCAA 0.000221323991486 0.000213504336424 +GGCCAC 0.000447527567036 0.000286132443405 +ACCGTG 0.000324143798554 9.74745646335e-05 +ACCGTA 5.15841743935e-05 3.40842431888e-05 +GGCCAG 0.000758426780273 0.000436485366631 +ACCGTC 0.000144644813333 4.87372823168e-05 +AGAATT 0.00023247732649 0.000383527371957 +GGCAGT 0.000426266522185 0.000288043709379 +CCATAA 7.31937609638e-06 0.000185233527237 +CAAAGG 0.00020145711351 0.000369591057569 +CCATAC 0.000172179609124 0.000150910375801 +CAAAGA 0.00024467628665 0.000440546806824 +GAAACG 0.000105608140819 8.21844368479e-05 +CAAAGC 0.000221672533205 0.000351035850412 +CAACGT 4.11279228273e-05 5.35154472498e-05 +GGCAGA 0.000250252954152 0.000431627565616 +GGCAGC 0.000767837406682 0.000402878939935 +CGTGAG 0.000219581282891 9.81912893735e-05 +GGCAGG 0.000285455667759 0.000461172552118 +CAACGC 5.576667502e-05 4.50740225348e-05 +CAAAGT 0.000171831067406 0.000325711576267 +CAACGA 4.53104234538e-05 4.34813008904e-05 +CTAACC 0.00010839647457 0.000153219822185 +CAACGG 7.35423026827e-05 5.39932637431e-05 +CGTGAA 0.00013314293661 8.02731708747e-05 +GGGGGT 0.000206685239293 0.000204664731298 +TACGCT 7.73762615903e-05 3.20933411334e-05 +GAAACC 0.000368408596851 0.000317429423717 +CGATCT 7.87704284658e-05 4.2207123575e-05 +CTCTAG 9.75916812851e-06 0.000244801316735 +TGAGCC 0.0 0.000359318002963 +CTCTAA 9.75916812851e-06 0.000234687534293 +CTCTAC 0.000519327161124 0.000221308672481 +GGGGGG 8.15587622168e-05 0.000296564770176 +CGATCC 6.86627186184e-05 4.83391019057e-05 +CGATCA 5.68123001767e-05 3.304897412e-05 +GGGGGC 0.000448921733911 0.000240978784788 +CGATCG 1.88212528193e-05 1.68032133478e-05 +GGGGGA 0.000199017321478 0.000290830972256 +TACGCG 5.576667502e-05 1.09897793459e-05 +CTCTAT 0.00032240108996 0.000208805807573 +TTCTGA 2.89289626667e-05 0.000524323965316 +CAGACC 0.000608902382875 0.000282071003212 +CAGACA 0.000517932994249 0.000450899497512 +CAGACG 0.000199017321478 9.15814945495e-05 +TGCGTG 0.000137325437237 0.000109181068719 +TAGTTG 0.0 0.000167872861313 +GCTAAT 0.000154055439743 0.000151467828377 +TAGTTC 0.0 0.00016811176956 +TAGTTA 0.0 0.000172252845835 +CAGACT 0.00040605110249 0.000372696864775 +GCTAAG 0.000308110879486 0.000206018544695 +TAGTTT 0.0 0.00031145671755 +GCTAAC 0.000171482525687 0.000123993380012 +GCTAAA 0.000224809408675 0.00018857824269 +TAAGTG 0.0 0.000222423577632 +TGCTGC 0.000326235048867 0.000528703949838 +AACCCT 0.000437419857189 0.000289238250612 +TATCAA 0.000123035226763 0.000197178939569 +AACCCC 0.000464606111261 0.000220512311659 +AACCCA 0.000397686101237 0.000319181417525 +AACCCG 0.000122338143325 6.753139772e-05 +TGCATC 0.000355512553253 0.000251251839394 +AGATTT 0.000187166903036 0.000414266899692 +ACGTAA 2.43979203213e-06 4.28442122327e-05 +AACTGG 0.000360740679036 0.000295529501107 +AACTGA 2.82318792289e-05 0.000354380565865 +AACTGC 0.000364574637944 0.00027299248984 +CCCAGA 0.000275347957911 0.00049501788706 +CCCAGC 0.00080896532951 0.000580706311526 +CCCAGG 0.000382350265606 0.000561275107465 +AACTGT 0.000293472127293 0.000374608130748 +AGTTTT 0.000178104818345 0.000502264770542 +ACGTAT 5.75093836144e-05 3.81456833819e-05 +CCCAGT 0.000440556732658 0.000362742354498 +CTCCAA 0.000243630661494 0.000325870848432 +TTGACG 4.80987572048e-05 4.37998452193e-05 +TATAAT 0.000177756276626 0.000291786605243 +CGCATA 4.1825006265e-05 2.80319009404e-05 +ATACTG 0.000159632107245 0.000241934417775 +CGCATC 0.000298003169638 5.21616338521e-05 +ATACTA 5.85550087711e-05 0.000150910375801 +ATACTC 9.16664720642e-05 0.000141831862428 +CGCATG 0.000228991909301 6.00456059916e-05 +GCGCAA 3.10202129799e-05 3.69511421487e-05 +ATACTT 9.89858481606e-05 0.000272435037264 +CGCATT 0.000118504184418 3.87031359574e-05 +GCGCAC 9.20150137831e-05 8.27418894234e-05 +GCTCCG 0.000129308977703 0.00014549512221 +TATTGT 7.42393861204e-05 0.000302935656753 +GCTCCC 0.000348193177156 0.000333595548407 +GCTCCA 0.000359695053879 0.000337895896846 +CCTACT 0.000186818361317 0.000202514557078 +TGTACT 9.93343898795e-05 0.000270603407373 +TTTAGT 0.000134537103486 0.000283822997021 +TATTGG 8.22558456546e-05 0.000170819396355 +GCTCCT 0.000408142352803 0.000417213434734 +GCGCAG 0.000218187116016 0.000125108285163 +GCTGGG 0.00051444757706 0.000543277352884 +TTTAGG 8.12102204979e-05 0.000237952613664 +TGTACG 2.30037534458e-05 4.69056524258e-05 +CCTACG 4.56589651727e-05 4.54722029459e-05 +TGTACA 0.000119549809574 0.000343072242191 +CCTACA 0.000196577529446 0.000222503213714 +TGTACC 0.000136628353799 0.000193674951952 +CCTACC 0.000217838574297 0.000215495238479 +TTCGCG 4.98414657992e-05 2.00682927187e-05 +TTCGCA 7.24966775261e-05 4.1490398835e-05 +TTCGCC 0.000177756276626 5.79750678539e-05 +AGTATC 0.000147781688803 0.000149636198486 +GACTAA 1.35931270361e-05 0.000150990011883 +AGTATA 7.52850112771e-05 0.000183083353017 +GACTAC 0.000531526121285 0.000131160627411 +AGTATG 0.000160677732402 0.000195665854007 +TTTGGA 0.000561152167389 0.000446121332579 +GACTAG 1.3941668755e-05 0.000142070770675 +TTCGCT 0.000103168348787 5.99659699093e-05 +GTGTAT 0.00025408691306 0.000290193883598 +GACTAT 0.000442996524691 0.000155131088159 +AGTATT 0.000153358356305 0.000298953852642 +TTACTC 9.20150137831e-05 0.000190808052992 +GGTCTT 0.000172876692562 0.000254596554848 +TTACTG 0.000212958990233 0.000305563647466 +ACCTTG 0.000348541718875 0.000305085830973 +ACCTTC 0.000639574054136 0.000288760434119 +ACCTTA 0.000118155642699 0.000181888811783 +TTACTT 0.000123732310201 0.000321411227828 +ACCTTT 0.000355861094972 0.000345620596821 +ACAGGG 0.000322749631679 0.00033479008964 +CTGGGA 0.000566728834891 0.00059488153416 +CTGGGC 0.00104248828116 0.000466109989216 +ACAGGC 0.000386532766233 0.000289397522776 +ACAGGA 0.000459029443759 0.000424380682134 +CTGGGG 0.00058276175396 0.000519227256054 +CAGGAC 0.000866823254843 0.00038328846371 +GCAGCT 0.000653167181172 0.000435211189316 +CAGGAA 0.00094210826612 0.000552674410585 +TCGCTT 3.83395890763e-05 7.35837399684e-05 +AGGACC 0.000242933578056 0.000314881069086 +AGGACA 0.000241539411181 0.000432901742931 +GCGCAT 3.27629215743e-05 5.12856369477e-05 +TTATAC 7.24966775261e-05 0.000180057181892 +ACAGGT 0.000230386076177 0.000261126713589 +CTGGGT 0.000452058609381 0.000413151994541 +TTATAG 3.13687546988e-06 0.000214300697246 +CTCCGA 0.000187863986474 8.24233450945e-05 +TCGCTG 0.000213307531952 0.000111809059433 +GAGTTG 0.000410930686554 0.00025037584249 +GCAGCG 0.000189258153349 0.000172571390164 +AGGACT 0.000220278366329 0.000345859505068 +GCAGCA 0.000552787166136 0.000455438754199 +GCAGCC 0.000780036366843 0.000429716299643 +CAGGAT 0.000655258431486 0.00031217344229 +TGCTGA 2.64891706345e-05 0.00046985288508 +CCTCAG 0.000690461145092 0.000520421797288 +CCTCAA 0.00022934045102 0.000282389547541 +CCTCAC 0.000281273167132 0.000324835579363 +TATGTT 0.000200062946634 0.000309067635084 +ATGGGC 0.000453104234538 0.000202116376667 +TCTTTG 0.000257920871968 0.000493743709745 +ATGGGA 0.000333205883245 0.000309943631988 +TCTTTA 0.000135234186924 0.000404312389415 +ATGGGG 0.000306368170891 0.000252207472381 +TCTTTC 0.000239796702586 0.000542640264226 +CCTCAT 0.000194486279132 0.000285256446501 +TCTTTT 0.000212261906795 0.000721104724474 +GCTAGC 0.000148478772241 0.000160705613914 +TATGTA 0.000125823560514 0.000359955091621 +ATGGGT 0.000242933578056 0.000211752342615 +TATGTC 0.000261406289156 0.00018499461899 +GAAAGA 0.000442647982972 0.000539215912691 +ATCAAG 0.000843819501397 0.000237554433253 +CATGAT 0.000291729418699 0.000236598800266 +ATCAAC 0.000629466344289 0.000155290360323 +GAAAGG 0.000367362971695 0.000423026868736 +ATCAAA 0.000491792365333 0.000302218932013 +GGAACT 0.000217838574297 0.000316633062895 +GTCAGG 0.000184727111004 0.000273151762004 +CATGAG 0.000474365279389 0.000277611382608 +ATCAAT 0.000422781104996 0.000189135695265 +GTCCAA 0.000173922317719 0.000156883081967 +CATGAC 0.000334948591839 0.00020617781686 +CATGAA 0.000431494647968 0.000345620596821 +TGACCT 0.0 0.000358601278223 +GTAGCG 4.07793811084e-05 3.6712233902e-05 +CAAGTT 0.000203548363823 0.000285176810419 +GCGGCT 0.000183332944128 0.00013577952018 +GCTTTA 0.000159283565526 0.000254596554848 +GCTTTC 0.000346101926843 0.000365211073047 +TTTCTC 0.000257920871968 0.000598146613531 +GCGGCG 0.000243979203213 0.000234528262129 +CAAGTG 0.000410930686554 0.000324517035034 +GTAGCT 0.000197623154602 0.000215813782808 +TTCTTT 0.000414067562024 0.000867714751836 +GCGGCC 0.000351330052626 0.00017169539326 +CAAGTC 0.000211216281638 0.000240341696131 +GCGGCA 0.000119201267855 0.000100102555347 +CAAGTA 0.000156146690056 0.000195347309678 +AACGGT 8.85295965943e-05 5.10467287011e-05 +GGGGAA 0.000444390691566 0.00037142268746 +TAATCG 0.0 2.88282617625e-05 +CTTGTG 0.000389321099984 0.000378191754448 +TAATCA 0.0 0.000194630584938 +TAATCC 0.0 0.000157121990214 +TGCGAA 5.36754247068e-05 4.3322028726e-05 +CGGCCC 0.000307065254329 0.00014007986862 +AAAATT 0.00037503088951 0.000559523113656 +TTGATA 0.000101425640193 0.000230307549771 +CGGCCG 0.000104562515663 0.000120728300641 +CTTGTA 0.000117458559261 0.000255870732163 +GGTCTG 0.000314384630426 0.000292423693901 +TAATCT 0.0 0.000214937785903 +AGTTTA 0.000120595434731 0.00027912446817 +TGCGAC 0.000107350849414 4.32423926438e-05 +GGTCTC 0.000223763783518 0.000261524894 +GGAGGC 0.000526297995502 0.000425575223367 +GGTCTA 7.66791781526e-05 0.000136894425331 +AAAATG 0.000463909027823 0.000616542548523 +AAAATC 0.000433237356562 0.000373095045186 +TCAATA 6.83141768996e-05 0.000205301819955 +AAAATA 0.000251647121028 0.000736872668753 +TCGTCA 5.22812578313e-05 5.49488967297e-05 +ATCCCT 0.000414416103743 0.000274107394991 +TTCAGG 0.000278136291662 0.000367759427678 +ATATCC 9.7940223004e-05 0.000159909253092 +TCGTCG 3.52027136064e-05 1.96701123076e-05 +TTTAGC 0.000119898351293 0.000198692025131 +TGCCAA 0.000169739817092 0.000306439644371 +TCGGGA 0.000108047932851 9.41298491804e-05 +GAACAA 0.000419644229526 0.000284061905268 +ATCCCG 0.000134885645205 6.25143245403e-05 +GAACAC 0.00034087380106 0.000253640921861 +ATCCCC 0.000394897767486 0.000205700000366 +GAACAG 0.00072217844151 0.000342514789615 +ATCCCA 0.000404656935614 0.000306439644371 +AACGGA 0.000148130230522 6.55404956645e-05 +CACCAC 0.000349935885751 0.000318942509279 +CACCAA 0.00017984752694 0.000277929926937 +CACCAG 0.000666760308208 0.000375882308064 +AACATT 0.00042138693812 0.00038328846371 +AACGGG 0.000180196068659 6.30717771158e-05 +TCCGTC 0.000103168348787 7.03186605975e-05 +GAGTTA 0.000205639614136 0.000185313163319 +TCCGTA 2.96260461044e-05 4.05347658484e-05 +ACCAGA 0.000236311285397 0.00034378896693 +CACCAT 0.000223763783518 0.00031647379073 +AACATG 0.000612039258345 0.000300387302122 +AACATA 0.000200760030072 0.000243367867255 +AACATC 0.000775505324498 0.000237474797171 +CAGTCG 0.000112230433478 6.80888502955e-05 +GCACGG 0.000116761475823 8.93516842474e-05 +GTCAGA 0.000194486279132 0.000295927681518 +GCACGA 6.97083437751e-05 4.38794813015e-05 +CAGTCC 0.000425569438747 0.000286132443405 +GCACGC 8.12102204979e-05 8.05917152035e-05 +CAGTCA 0.000333205883245 0.000327702478323 +GCACCA 0.000239796702586 0.000250535114654 +GCAAGC 0.000168345650217 0.000251411111559 +GCACGT 4.56589651727e-05 6.63368564867e-05 +CAGTCT 0.000424175271871 0.0003613885411 +TAAAGC 0.0 0.000275142664059 +GCAAGG 0.000149175855679 0.00026582524244 +GTCAGT 0.000269771290409 0.000251570383723 +GTATAC 7.98160536224e-05 0.000112844328501 +TTCCCG 0.000138371062393 0.000109738521295 +GTATAA 7.31937609638e-06 0.000187383701456 +GTATAG 4.53104234538e-06 0.000132594076891 +CCGGAA 0.000158237940369 9.58818429892e-05 +CCGGAC 0.000163117524434 7.50968255306e-05 +CCGGAG 0.000349587344032 0.000163731785038 +CGTCTT 7.31937609638e-05 8.17066203546e-05 +TATTTG 0.00016102627412 0.000403834572922 +TTCCGG 0.00029138087698 9.22982192894e-05 +TTCCGA 0.000233174409928 6.41866822668e-05 +TTCCGC 0.000249904412434 6.22754162936e-05 +GTATAT 7.66791781526e-05 0.000263117615645 +CGTCTC 0.000106653765976 9.06258615629e-05 +CGTCTA 4.25220897028e-05 3.86234998752e-05 +CGTCTG 0.00017671065147 9.72356563869e-05 +CCGGAT 0.000135582728642 5.14449091121e-05 +CTCTTC 0.000878673673285 0.000526633411701 +TGTTAT 7.49364695582e-05 0.000283584088774 +CTCTTA 0.000136628353799 0.000274744483648 +CTCTTG 0.00031926421449 0.000384642277108 +CCCGAT 0.000118852726136 4.57111111925e-05 +TACGTA 4.35677148594e-05 3.53584205043e-05 +TACGTC 0.000123383768482 3.24118854623e-05 +TCTGGT 0.00024467628665 0.000349602400932 +CGCGAT 4.00822976707e-05 1.62457607723e-05 +TACGTG 0.000252692746185 5.78157956895e-05 +TGTTAG 4.87958406425e-06 0.000221308672481 +GGTTTC 0.000215747323984 0.000300546574287 +CTCGTG 0.000187863986474 9.71560203047e-05 +CTCTTT 0.000469485695325 0.000507680024133 +TGTTAA 5.576667502e-06 0.00033335664016 +CGCGAA 3.38085467309e-05 1.82366628277e-05 +CGCGAC 8.29529290923e-05 3.24118854623e-05 +GATATT 0.000317521505895 0.00022775919514 +TCTGGC 0.000415810270618 0.000344664963835 +TACGTT 6.16918842409e-05 4.85780101523e-05 +CGCGAG 0.000147084605365 5.16041812766e-05 +AGTCGT 5.36754247068e-05 4.68260163436e-05 +TTACCT 0.000156843773494 0.000242252962104 +CTCCAT 0.000236311285397 0.000368157608089 +TATGCC 0.000376076514666 0.000143663492319 +TTACCG 2.82318792289e-05 3.71900503953e-05 +CTCCAC 0.000300791503389 0.00034665586589 +AGTCGG 9.02723051887e-05 7.21502904885e-05 +TTACCC 8.78325131566e-05 0.000165563414929 +AGTCGA 6.37831345542e-05 3.94994967796e-05 +TTACCA 0.000140462312707 0.00022234394155 +AGTCGC 9.48033475341e-05 5.63027101274e-05 +GTACCG 3.65968804819e-05 3.29693380378e-05 +CCGACT 5.40239664257e-05 6.84870307066e-05 +GCCTAC 0.000494580699084 0.000148282385088 +GCCTAA 1.04562515663e-05 0.000140557685113 +GCCTAG 1.3941668755e-05 0.000199966202447 +GTGCGG 0.000233522951646 8.72811461098e-05 +CCGACA 5.75093836144e-05 5.08078204544e-05 +GATATC 0.000309156504642 0.000124073016094 +CCGACC 9.2363555502e-05 6.94426636932e-05 +CCGACG 4.07793811084e-05 2.81911731048e-05 +GCCTAT 0.000331811716369 0.000134505342864 +TGACGG 0.0 5.94085173338e-05 +TGACGT 0.0 7.43801007906e-05 +CCCGAG 0.00043079756453 0.000160785249996 +CTCGTT 6.41316762731e-05 6.42663183491e-05 +CCCGAA 0.000144644813333 6.72924894733e-05 +CGCACA 0.000137325437237 8.75996904386e-05 +ATACAC 8.29529290923e-05 0.000209522532313 +CGCACC 0.000191349403663 6.64961286511e-05 +ATACAA 0.000102471265349 0.000245756949721 +ACTGCT 0.000392109433735 0.000379625203928 +ATACAG 0.000210519198201 0.000265028881618 +ATTGCC 0.000461120694072 0.00018284444477 +GGGTCC 0.00022620357555 0.000227440650812 +GAGCGT 0.000169391275373 6.84073946243e-05 +CTCACT 0.00035934651216 0.00036712233902 +GGGTCT 0.000210170656482 0.000269488502222 +ACTGCA 0.000339828175903 0.000328021022652 +TTTGTC 0.000379213390136 0.000356371467921 +ACTGCC 0.000466000278136 0.000311217809304 +CGCACT 8.33014708112e-05 5.59045297163e-05 +ATACAT 0.000102819807068 0.000327861750487 +ACTGCG 8.60898045622e-05 7.46190090373e-05 +CTCACC 0.000621101343036 0.000316792335059 +CTCACA 0.000377819223261 0.000390057530699 +CTCACG 0.000184378569285 8.08306234502e-05 +GTGGGT 0.00029451775245 0.000310580720646 +GTGAGG 0.000301140045108 0.000325870848432 +GATTTT 0.000360043595598 0.000444448974853 +ACAATT 0.000178453360064 0.000222821758043 +CTTGAA 0.000369454222008 0.000392844793576 +TAGTGA 0.0 0.000213902516835 +GTGAGA 0.000237705452273 0.00032005741443 +TCGACG 1.81241693815e-05 1.40159504702e-05 +TCGACA 3.97337559518e-05 3.71900503953e-05 +TAGAGG 0.0 0.000225847929167 +ACAATA 0.000123732310201 0.000217008324041 +GTGAGT 0.000228643367582 0.000264073248631 +ACAATC 0.000150570022554 0.00015194564487 +GATTTC 0.000346450468562 0.000283743360939 +CCAGTA 0.000187515444755 0.000206098180777 +TCGACT 3.27629215743e-05 3.75085947242e-05 +CTGGTT 0.000370499847164 0.000343231514355 +TTGAGT 0.000152312731149 0.000288999342365 +TCAAAA 0.000264891706345 0.000405586566731 +CGAACG 1.70785442249e-05 1.83959349921e-05 +CGAACC 9.2363555502e-05 4.23663957394e-05 +CGAACA 9.37577223775e-05 4.55518390281e-05 +CTGGTG 0.00132062457282 0.000429955207889 +CTGGTA 0.000272211082442 0.000216371235383 +CTGGTC 0.000592520922088 0.000305882191795 +CGAACT 6.97083437751e-05 4.92150988101e-05 +TTGCAT 0.000140462312707 0.000277452110444 +GACGAG 0.000531177579566 6.92037554465e-05 +TTGCAG 0.000406748185927 0.000330808285529 +GAGGGA 0.000400125893269 0.000425097406874 +TTGCAC 0.000156843773494 0.000202116376667 +TTGCAA 0.000175665026313 0.000265427062029 +TCCATC 0.000621101343036 0.000325154123692 +TCGCAT 3.17172964177e-05 3.68715060664e-05 +GTCGTG 0.00014255356302 5.38339915786e-05 +GTCGTA 3.27629215743e-05 2.00682927187e-05 +GTCGTC 0.000113624600353 5.36747194142e-05 +CCGTTT 7.28452192449e-05 8.50513358077e-05 +GAGCAA 0.000479593405172 0.000286689895981 +TTTTTG 0.000191349403663 0.000673641619473 +ATATCG 3.20658381365e-05 2.36519164184e-05 +GTCGTT 4.80987572048e-05 4.22867596572e-05 +TCAGAT 0.000452755692819 0.000328419203063 +TCGCAG 0.000131400228016 7.88397213948e-05 +GTGCTC 0.000482381738923 0.000288919706283 +TCGCAC 5.26297995502e-05 3.86234998752e-05 +CCGTTG 6.72685517429e-05 6.19568719648e-05 +CCGTTA 2.64891706345e-05 3.70307782309e-05 +CCGTTC 9.55004309718e-05 6.7929578131e-05 +GATCGG 0.000118852726136 4.19682153283e-05 +AATGCT 0.000431146106249 0.000330171196871 +GGTACG 2.57920871968e-05 2.43686411584e-05 +GGTACA 0.000134188561767 0.000157201626296 +TCCAGG 0.000343662134811 0.000482594658235 +GGTACC 0.000152312731149 0.000144300580977 +GCGGAC 0.000174967942875 6.65757647334e-05 +CAGGGA 0.000412324853429 0.000471047426313 +TTCAGC 0.000608553841156 0.000304687650562 +TATTTC 0.000242236494618 0.000365768525622 +TATTTA 0.000110487724883 0.000563743826014 +CCCCCT 0.000416855895775 0.00031790724021 +GGTACT 0.000113973142072 0.000139124235633 +AATGCG 8.78325131566e-05 3.47213318466e-05 +AATGCC 0.000541633831132 0.000223140302372 +AATGCA 0.000387578391389 0.000301661479438 +CCCCCC 0.000221672533205 0.000376837941051 +CCCCCA 0.000525252370345 0.000418965428543 +TATTTT 0.00023247732649 0.000928715990814 +CCCCCG 0.00020773086445 0.000129966086178 +CTTGAG 0.000374333806072 0.000345859505068 +ATGATG 0.000553135707855 0.000310819628893 +GAAGTA 0.000348890260594 0.000233731901307 +ATGATA 0.000137673978956 0.000217167596206 +ATGATC 0.00040605110249 0.000157360898461 +TGCAAC 0.00029451775245 0.000159749980927 +TGCAAA 0.000313339005269 0.000358919822552 +TGCAAG 0.000432888814843 0.000278168835184 +GAAGTG 0.000697780521188 0.000357725281318 +ATGATT 0.000317521505895 0.000268771777482 +CTTGAT 0.000304276920578 0.000254755827012 +GTGCTT 0.000266285873221 0.000324039218541 +TTGAGG 0.000144296271614 0.000301980023767 +GCCATT 0.000531177579566 0.000263754704302 +TTGAGC 0.000175665026313 0.000223538482783 +TGCAAT 0.000215398782265 0.000209283624066 +GAGAAT 0.000672685517429 0.000308669454673 +TGTCTT 0.000123383768482 0.000536747194142 +TTCAGA 0.00026349753947 0.000484426288126 +AGTAGC 0.000185772736161 0.000171456485013 +AAGGCC 0.000813496371855 0.000298953852642 +ATAACC 9.7940223004e-05 0.00014334494799 +AAGGCA 0.000508522367839 0.000394676423467 +ATAACA 0.000123383768482 0.000227520286894 +AAGGCG 0.000196926071165 9.2138947125e-05 +ATAACG 2.96260461044e-05 3.49602400932e-05 +TATGCT 0.000267680040096 0.000213982152917 +GAGGGT 0.000342267967936 0.00023349299306 +TGTCTG 0.000259315038843 0.000552674410585 +GAGAAG 0.00224391158612 0.000557532211601 +GAGAAA 0.00119305830371 0.00062793050828 +TGTCTC 0.000169739817092 0.000453846032554 +GAGAAC 0.000995783690827 0.000291069880503 +TGTCTA 6.27375093976e-05 0.000265267789865 +TGTGGC 0.000445087775004 0.000378351026613 +TGTGGA 0.000355164011534 0.00046698598612 +TGTGGG 0.000432888814843 0.000454483121212 +ATAACT 0.000121989601606 0.000220830855988 +ACTGAC 0.000391760892016 0.000249738753832 +GGTATT 0.000117110017542 0.000176951374686 +AGGTAT 0.000107699391132 0.000175597561288 +ACCAAG 0.000869960130313 0.000311934534044 +GAGCAT 0.000369454222008 0.000256030004327 +ACCAAA 0.000493883615646 0.000341638792711 +ATCATC 0.000783870325751 0.000193515679787 +ACCAAC 0.000560106542233 0.000206416725106 +GGTCAC 0.000171133983968 0.000211194890039 +GATCGC 9.58489726907e-05 4.3322028726e-05 +GGTCAT 0.000135234186924 0.000189693147841 +ACCAAT 0.000315081713863 0.000172412118 +AGGTAG 1.04562515663e-05 0.000202036740584 +AGGTAA 1.35931270361e-05 0.000180933178797 +AGGTAC 0.000148478772241 0.000145893302621 +TTTTGC 0.000103865432225 0.000357327100907 +GCCGTC 0.000172179609124 7.27873791463e-05 +AATTGG 9.37577223775e-05 0.000198771661213 +GTCGGA 5.71608418956e-05 5.064854829e-05 +AATTGA 9.06208469076e-06 0.00024607549405 +ATCATG 0.000582413212241 0.000252685288874 +AATTGC 7.80733450281e-05 0.000177588463344 +TAGCCA 0.0 0.00023851006624 +TGGGGT 0.000149872939116 0.000327782114405 +CGCTCG 7.28452192449e-05 5.9488153416e-05 +GATAAT 0.00024781316212 0.00019001169217 +CGCTCA 0.000123035226763 7.84415409837e-05 +CGCTCC 0.000247116078683 0.00012646209856 +AATTGT 0.000100380015036 0.000299113124807 +AATCTT 0.000203896905542 0.000300944754698 +TCTTAT 0.000116412934104 0.000257304181643 +GTTATG 0.000125126477076 0.000150352923226 +CGCTCT 0.000163466066153 0.000109897793459 +GATAAA 0.000429403397654 0.000259215447616 +TCGAAG 8.43470959678e-05 6.06826946493e-05 +GATAAC 0.00026663441494 0.000124471196505 +TAGCCT 0.0 0.0002342097178 +TGGGGC 0.000252692746185 0.000338294077258 +GATAAG 0.000344707759968 0.00015552926857 +GATCCT 0.000376425056385 0.000244084591995 +AAGAAT 0.000582413212241 0.000423186140901 +GGGGCT 0.000377122139823 0.000352469299892 +GTTATC 0.000162768982715 0.000132116260398 +AAGAAC 0.000837197208738 0.000376360124557 +GATCCA 0.000297306086201 0.000213663608588 +AAGAAA 0.00135094770236 0.000889375766199 +GATCCC 0.000312641921831 0.000184596438579 +AAGAAG 0.00198773342275 0.000580308131115 +GATCCG 7.38908444016e-05 5.24005420987e-05 +GCCCCA 0.000424175271871 0.000332002826762 +TATTAA 5.92520922088e-06 0.000350717306083 +CAAGGG 0.000240145244305 0.000270762679538 +CAAGGC 0.000355164011534 0.000311615989715 +GGTTCT 0.000222021074924 0.000293777507298 +CAAGGA 0.000372591097478 0.000373493225597 +GGTTCA 0.000126869185671 0.000205700000366 +GGTTCC 0.000224112325237 0.000241536237364 +AAGGCT 0.000653515722891 0.000364573984389 +TGCATT 0.000212261906795 0.000338692257669 +GGTTCG 2.75347957911e-05 4.2207123575e-05 +TAGGCA 0.0 0.000186826248881 +ATTATA 0.000101425640193 0.000294494232038 +ATTATC 0.000183681485847 0.000169306310793 +TAGAAT 0.0 0.00027442593932 +GATAGT 0.000141159396145 0.000121524661463 +ATTATG 0.000164511691309 0.00022091049207 +AGTCTT 0.000171482525687 0.000343072242191 +TCCCAA 0.000231780243052 0.000327065389665 +TAGAAA 0.0 0.00042135451101 +TAGAAC 0.0 0.000199806930282 +ATTATT 0.000215398782265 0.000402002943031 +TAGAAG 0.0 0.00031073999281 +AGTCTA 8.29529290923e-05 0.000183800077757 +AGTCTC 0.00020459398898 0.000303174565 +AGTCTG 0.000305671087454 0.000348407859699 +TAACTC 0.0 0.000182286992195 +TAACTA 0.0 0.000172571390164 +TAACTG 0.0 0.000253322377532 +ATCTAT 0.000320309839646 0.000238430430157 +GCCCCG 0.000227946284144 0.000190330236498 +TAACTT 0.0 0.000278646651677 +ATCTAG 1.08047932851e-05 0.000166280139669 +ATCTAC 0.000472274029076 0.000166041231422 +ATCTAA 9.41062640963e-06 0.000190808052992 +GGTCAG 0.00030985358808 0.000276337205293 +CCATGT 0.000115367308948 0.000343550058684 +GCCCCC 0.000358300887004 0.000273470306333 +TGGAGG 0.000224460866956 0.000477577585055 +TGGAGA 0.000188561069912 0.000572663067222 +GGTCAA 0.000127217727389 0.000144459853142 +CCATGC 0.000116064392385 0.000282230275377 +TCCAGA 0.000308807962924 0.000469056524258 +CCATGA 1.70785442249e-05 0.000292264421736 +CCATGG 0.000147433147084 0.000355415834934 +TGGAGT 0.000169739817092 0.000358680914305 +TCAGTA 0.000150918564273 0.000236519164184 +TCAGTC 0.0001920464871 0.000269249593975 +CTCTCT 0.000406399644209 0.000795405189183 +GGGGAT 0.000341222342779 0.000202036740584 +TCAGTG 0.000416855895775 0.000456792567596 +CGATAT 6.20404259598e-05 2.58817267205e-05 +CTCTCG 0.000110139183165 9.96247388534e-05 +GGGGAC 0.00057335112755 0.000269807046551 +CTCTCC 0.000504688408931 0.000516678901423 +TCAGTT 0.000163117524434 0.000359477275127 +CTCTCA 0.000289638168385 0.000396348781194 +CGATAG 2.09125031325e-06 2.59613628027e-05 +CGATAA 1.74270859438e-06 2.73151762004e-05 +ATGCTG 0.000847653460305 0.000419761789365 +CGATAC 8.12102204979e-05 2.15017421986e-05 +TTCACT 0.000356558178409 0.00033550681438 +GTGTTT 0.000444390691566 0.0005064854829 +GCTACT 0.000194486279132 0.000181251723126 +GTGGTT 0.000375727972948 0.000295051684614 +GTGTTC 0.000447527567036 0.000319659234019 +GTGTTA 0.000135234186924 0.000218680681768 +GTGTTG 0.000295911919325 0.000296405498011 +CGTGCC 0.00017671065147 9.15018584673e-05 +GTGGTA 0.000293820669012 0.000195984398336 +GCTACA 0.000214353157108 0.000246473674461 +GTGGTC 0.000573002585831 0.000203948006558 +GCTACC 0.000240145244305 0.000154892179912 +GTGGTG 0.00106235515913 0.000362503446251 +GCTACG 6.83141768996e-05 4.21274874928e-05 +CGTGCA 9.30606389397e-05 8.79978708497e-05 +TTTCGT 6.16918842409e-05 6.37885018558e-05 +TGCCCT 0.000228294825863 0.000429955207889 +AGCGTT 6.44802179919e-05 6.37885018558e-05 +CGTGCG 4.00822976707e-05 5.00910957144e-05 +CACGGA 0.000122686685044 0.000104004723375 +TGCCCA 0.00022306670008 0.000379306659599 +TGCCCG 8.99237634698e-05 0.000112685056337 +ACAGTG 0.000582413212241 0.00043178683778 +AGCGTA 3.79910473574e-05 3.56769648332e-05 +AGCGTC 0.000147433147084 6.95222997754e-05 +GTACGT 2.40493786024e-05 3.87031359574e-05 +AGCGTG 0.000234568576803 9.59614790714e-05 +GGCCCC 0.000430449022811 0.000279522648581 +GGCCCA 0.000329720466056 0.000271877584689 +CACGTG 0.000293820669012 0.000146769299526 +GGCCCG 0.000147781688803 0.000128851181027 +CACGTA 5.05385492369e-05 4.86576462345e-05 +CACGTC 0.000150221480835 6.39477740202e-05 +ACGTGC 8.08616787791e-05 7.53357337772e-05 +ACGTGA 7.66791781526e-06 9.02276811518e-05 +ACGTGG 0.000102471265349 0.000106632714088 +AAATGG 0.000231083159614 0.000392526249247 +CACGTT 6.51773014297e-05 7.71673636682e-05 +GGGGCC 0.000469137153606 0.000280956098061 +TCTTAA 1.28960435984e-05 0.000346496593726 +GGCCCT 0.000432888814843 0.00031217344229 +GGGACA 0.000290335251823 0.000330091560789 +GGTATA 5.50695915823e-05 0.000122878474861 +GGGACC 0.000348541718875 0.000272753581593 +TCGTGG 5.68123001767e-05 8.38567945744e-05 +ACGTGT 7.35423026827e-05 0.000100500735758 +TCTTAG 8.71354297188e-06 0.000262799071316 +GCTTCC 0.000343662134811 0.000402401123442 +AGAGAG 0.000532223204723 0.000716167287377 +AGAGAA 0.000541982372851 0.000691161557561 +AGAGAC 0.000408490894522 0.000410683275993 +TCGTGA 3.48541718875e-06 6.57794039112e-05 +GTCTTT 0.000321355464803 0.000387110995656 +TCTATC 0.000142205021301 0.000192878591129 +TCTATG 0.000158237940369 0.000245995857968 +AGAGAT 0.000372242555759 0.000394596787385 +TTCACA 0.000390018183421 0.000361547813265 +GTCTTG 0.000260012122281 0.00028055791765 +TCTATT 0.000128263352546 0.000274027758908 +GTCTTC 0.000566031751454 0.000341320248382 +GTCTTA 0.000141507937863 0.000207213085928 +GCAGAG 0.00108535891258 0.000545984979679 +GAAGGC 0.000679307810088 0.000342275881368 +GCTCAC 0.000285804209478 0.0002651085177 +GCAGAC 0.000555924041606 0.000271001587784 +GCAGAA 0.00071590469057 0.000408214557444 +GCTCAG 0.00066327489102 0.00046268563768 +ATGCGT 8.53927211245e-05 4.95336431389e-05 +TTGCTG 0.000563243417703 0.000499636779829 +CTTGAC 0.000274650874474 0.000219875223001 +TTGCTA 0.000124429393638 0.000266303058933 +TTGCTC 0.000224460866956 0.000297679675327 +GCAGAT 0.000520372786281 0.000267338328002 +GCTCAT 0.00018890961163 0.000213902516835 +GTTTGC 9.30606389397e-05 0.000266701239344 +TTGCTT 0.00021992982461 0.000493186257169 +ATGCGG 0.000170785442249 4.41980256304e-05 +GAGAGG 0.000641665304449 0.00043895408518 +TAGCCG 0.0 4.29238483149e-05 +ATGCGC 0.000158586482088 4.45962060415e-05 +ATGCGA 7.63306364337e-05 3.27304297912e-05 +TGTAAG 0.00022620357555 0.000286530623817 +GGCTAG 7.66791781526e-06 0.000182605536523 +TGTAAC 0.000178801901783 0.000237634069335 +GGCTAC 0.00057648800302 0.000168748858218 +GACTGT 0.000411279228273 0.000315279249497 +GGCTAA 4.87958406425e-06 0.000152264189199 +ACTGTA 0.000189606695068 0.000312969803113 +TTGGTA 0.000127217727389 0.000211194890039 +ACTGTC 0.000354815469815 0.000307076733028 +TTGGTC 0.000173573776 0.000198134572556 +TTCGAC 0.000187166903036 3.03413473246e-05 +ACTGTG 0.000665017599614 0.000463880178914 +AATCAA 0.000222718158361 0.000272355401182 +GACTGC 0.000435677148594 0.000261445257918 +GGCTAT 0.000356906720128 0.000161820519065 +GACTGA 3.10202129799e-05 0.000333197367996 +GACTGG 0.000515841743935 0.000311217809304 +TGTAAT 0.000144644813333 0.000334710453558 +TGTGGT 0.000276045041349 0.000415381804843 +TTGGTT 0.000178453360064 0.000408533101773 +ACTGTT 0.000251298579309 0.000366087069951 +GTTTCT 0.000258966497124 0.000511024739586 +ATAATT 0.000128611894265 0.000328897019556 +CCTTTT 0.000228294825863 0.000500672048898 +GTTTCG 2.43979203213e-05 4.59500194392e-05 +GTTTCC 0.000211564823357 0.000354062021536 +GTTTCA 0.000176013568032 0.000292662602147 +ATAATG 0.000153358356305 0.000267656872331 +CCTTTA 0.000141856479582 0.000314881069086 +CCTTTC 0.000307065254329 0.000457748200583 +ATAATC 9.93343898795e-05 0.000160546341749 +ATAATA 8.78325131566e-05 0.000305245103137 +CCTTTG 0.000280576083695 0.00045368676039 +ACAGAA 0.000673731142586 0.000537225010635 +ACAGAC 0.000585201545992 0.000338532985504 +GCCCCT 0.000422084021558 0.000324437398952 +ACAGAG 0.000899934718136 0.000524323965316 +TTATCT 0.000124429393638 0.00028485826609 +CTGGAC 0.00130633436234 0.000343151878273 +CTGGAA 0.00114251975447 0.000580547039361 +CTGGAG 0.0022731890905 0.000644016996888 +ACAGAT 0.000520372786281 0.000333436276242 +GTGAAG 0.00104109411428 0.000348009679288 +TGGGGA 0.000196577529446 0.000477497948973 +CCTTGC 0.000147084605365 0.000317588695881 +TTATCA 0.000106653765976 0.00023492644254 +CTGGAT 0.000948730558779 0.000346178049397 +TTATCC 7.77248033092e-05 0.000164050329367 +CCAATG 0.000216095865703 0.00019829384472 +AAACCG 8.08616787791e-05 6.3868137938e-05 +AAACCA 0.000370848388883 0.00042493813471 +CCAATC 0.000125126477076 0.000138487146975 +AAACCC 0.000410930686554 0.000304289470151 +CCAATA 8.57412628433e-05 0.000165483778847 +TGTCGG 7.31937609638e-05 6.42663183491e-05 +GAGATC 0.000861943670779 0.00021302651993 +AACGAG 0.000338434009028 6.15586915537e-05 +GAGATA 0.000261754830875 0.000207929810668 +AACGAA 0.000169391275373 6.60183121578e-05 +GAGATG 0.000943502432995 0.000393163337905 +AACGAC 0.000214701698827 4.28442122327e-05 +AAACCT 0.000374682347791 0.000336382811284 +CCAATT 0.000141159396145 0.000151069647966 +ATTTGC 0.000100728556755 0.000274824119731 +GTCGAT 6.90112603373e-05 2.62002710494e-05 +ATTTGG 0.000116412934104 0.000331843554598 +AACGAT 0.000128960435984 4.2207123575e-05 +TGTCGT 4.94929240803e-05 6.42663183491e-05 +CCCTAC 0.000422781104996 0.000169067402547 +GAGATT 0.000528040704096 0.000262082346576 +CCCTAA 1.21989601606e-05 0.000181570267455 +TACAAC 0.00046809152845 0.000144937669635 +CATTAT 0.000144296271614 0.000235802439444 +CTACAC 0.000137673978956 0.000192878591129 +TGGATT 0.000193440653976 0.000333754820571 +ATCACA 0.000448573192193 0.000271081223866 +ATCACC 0.000618661551004 0.000186905884963 +CATGCT 0.00027569649963 0.000329693380378 +TGACAC 0.0 0.000249022029092 +ATCACG 0.000166602941622 5.2878358592e-05 +TGGATA 8.9575221751e-05 0.000228237011634 +CATTAG 7.31937609638e-06 0.000164368873696 +CATTAA 6.27375093976e-06 0.000269169957893 +CATTAC 0.000146038980209 0.000152901277857 +CATGCA 0.000231431701333 0.00030245784026 +TGACAT 3.48541718875e-07 0.000313527255688 +CATGCC 0.000335645675277 0.000258498722876 +ATCACT 0.000398034642956 0.000229829733278 +CATGCG 6.37831345542e-05 5.70990709495e-05 +GTAGAA 0.000348890260594 0.000248305304352 +GTAGAC 0.000236311285397 0.000163572512874 +GTAGAG 0.000365271721381 0.000211194890039 +ACAACA 0.000208427947887 0.000307315641275 +ACAACC 0.000201108571791 0.000199249477707 +GATTGC 0.000109093558008 0.000150352923226 +ACAACG 6.20404259598e-05 5.21616338521e-05 +ACTAAG 0.000189606695068 0.00019757711998 +GTAGAT 0.000237008368835 0.000171456485013 +TGCTAC 0.000240493786024 0.000200364382858 +GGGTAC 0.000144644813333 0.000113799961488 +TGATGC 0.0 0.000255870732163 +GATTGT 0.000124429393638 0.000208168718915 +TTCAGT 0.000401171518425 0.000418567248132 +ACAACT 0.000188561069912 0.000235324622951 +GGGTAA 4.53104234538e-06 0.000136416608838 +TTGCCT 0.00027569649963 0.000390614983274 +TAATAG 0.0 0.00015696271805 +AGTCCG 5.99491756466e-05 5.90103369227e-05 +TAATAC 0.0 0.000145176577881 +GTCCGA 0.000132445853173 4.50740225348e-05 +TAATAA 0.0 0.000411001820322 +GTCCGC 0.000146387521928 6.04437864026e-05 +TGTTTA 7.59820947148e-05 0.000446200968661 +TCAACA 0.000191697945381 0.000252207472381 +CACATC 0.000488655489863 0.000261843438329 +GTCCGT 9.48033475341e-05 5.80547039361e-05 +TAATAT 0.0 0.000284619357843 +ATTCGC 9.13179303453e-05 3.03413473246e-05 +TTCTCC 0.000593915088964 0.000470091793327 +ATTCGA 0.000130703144578 3.76678668886e-05 +TAACGA 0.0 3.52787844221e-05 +ATTCGG 0.000113624600353 4.02958576018e-05 +TTCTCG 0.000108745016289 8.38567945744e-05 +GAGCTA 0.000341919426217 0.000219078862179 +TCGTAC 4.4613340016e-05 2.41297329117e-05 +TACCTT 0.000209473573044 0.000243447503337 +TCGTAA 2.43979203213e-06 3.32878823667e-05 +TCGTAG 1.74270859438e-06 3.36064266955e-05 +GAACCT 0.000303231295422 0.000262719435234 +ATCCAA 0.000272211082442 0.00023134281884 +ATCCAC 0.000456938193446 0.000202196012749 +TGAACC 0.0 0.000235722803362 +TTCTCT 0.00043707131547 0.000689011383341 +ATCCAG 0.000915967637204 0.000304528378398 +GAACCC 0.000307065254329 0.000227679559058 +TTATAT 0.000108745016289 0.000394596787385 +GAACCA 0.000316824422458 0.000265904878522 +GAACCG 7.84218867469e-05 6.76906698844e-05 +TACCTC 0.00035342130294 0.000206018544695 +TCGTAT 2.54435454779e-05 3.0102439078e-05 +ACCATC 0.000785613034345 0.000240421332213 +ACCATA 0.000169391275373 0.000169784127286 +ACCATG 0.000585201545992 0.000329534108214 +ATGTTG 0.000270816915566 0.000283185908363 +AGCTCT 0.000392457975454 0.00043895408518 +TGTCCT 0.000209125031325 0.000497566241691 +AGTGGT 0.000314733172144 0.000265267789865 +TGATGT 0.0 0.000352071119481 +ATGTTT 0.000372939639197 0.00053093376014 +ACCATT 0.000387926933108 0.000240023151802 +AGCTCG 0.000122338143325 9.06258615629e-05 +AGTGGC 0.00047715361314 0.00029815749182 +AGCTCC 0.00052385820347 0.000370626326638 +AGCTCA 0.000324143798554 0.000367520519431 +TCCATG 0.000531177579566 0.000333834456653 +CAGTAA 1.98668779759e-05 0.000242730778597 +CAGTAC 0.000499111741429 0.000169624855122 +CAGTAG 1.70785442249e-05 0.000223857027112 +TTAATA 7.84218867469e-05 0.000346257685479 +CAGTAT 0.000411279228273 0.000239704607473 +AAAGTA 0.000253041287903 0.000347292954548 +AAAGTC 0.000388972558265 0.000293618235134 +AAAGTG 0.000581716128803 0.000389659350287 +GATCTA 0.000121641059887 0.000139681688208 +GATCTC 0.000315430255582 0.000209363260148 +TGTCCA 0.000160329190683 0.000328498839145 +GATCTG 0.00049597486596 0.000275461208388 +TCCCTT 0.000210170656482 0.000420717422352 +CCGGCC 0.000222718158361 0.000159988889174 +ATTAGG 6.02977173654e-05 0.000136018428427 +AAAGTT 0.000315430255582 0.000412196361555 +ACTCTC 0.000211216281638 0.000260330352767 +ATTAGC 0.000110487724883 0.000131797716069 +TCCCTC 0.000372242555759 0.000457588928418 +TGTCCG 4.87958406425e-05 7.17521100775e-05 +TCCCTA 0.00013000606114 0.000218123229192 +TTAATG 0.000144296271614 0.000305643283549 +TCCCTG 0.000775853866216 0.00054638316009 +GATCTT 0.000268028581815 0.000236041347691 +CACAAT 0.000243630661494 0.000203868370475 +AAGTGC 0.000358997970442 0.000260171080603 +TGTTCT 0.000164511691309 0.000546223887926 +AAGTGG 0.00036248338763 0.000314243980428 +TGATTG 0.0 0.00024965911775 +TGCGCA 4.11279228273e-05 7.91582657236e-05 +TCTGAT 0.000441602357815 0.000320694503088 +TGTATT 0.000101774181912 0.00044866968721 +CGAGGT 0.000116412934104 6.94426636932e-05 +TGTTCA 0.000116064392385 0.000372696864775 +CACAAG 0.000560106542233 0.000298794580478 +TGTTCC 0.000198668779759 0.000347611498877 +AAGTGT 0.000366665888257 0.000363459079238 +CACAAC 0.000360740679036 0.000204186914804 +TGTTCG 2.64891706345e-05 5.59841657985e-05 +TCTGAA 0.000555575499887 0.000497645877774 +TCTGAC 0.000509567992996 0.000307713821686 +TCTGAG 0.000766094698088 0.000525438870467 +CGCGCC 0.000121989601606 0.00011387959757 +CGCGCA 4.00822976707e-05 5.97270616627e-05 +GGTAAA 0.000188212528193 0.000195108401431 +CGCGCG 7.28452192449e-05 0.000112366512008 +CTCCCT 0.000307762337767 0.000550603872448 +AGCGGG 0.000151267105992 0.000121126481052 +AGCGGA 9.16664720642e-05 8.64051492054e-05 +ACCTCT 0.000312293380112 0.0003376569886 +GGTAAG 0.000104213973944 0.000127815911958 +CGCGCT 4.14764645462e-05 6.80092142133e-05 +ACCTCC 0.000408142352803 0.000292184785654 +ACCTCA 0.000310550671518 0.000318464692785 +ACCTCG 0.000117458559261 7.17521100775e-05 +CTCCCC 0.000275347957911 0.000468976888175 +GCCTCT 0.000500157366586 0.000476383043822 +TAAGCC 0.0 0.000180933178797 +CGGGAG 0.000602977173654 0.000161263066489 +CCCATG 0.000523161120032 0.00029130878875 +CGGGAA 0.000310202129799 0.000114277777981 +CCCATA 0.000126520643952 0.000168908130382 +CGGGAC 0.000330417549494 0.000105995625431 +CCCATC 0.00056707737661 0.000292662602147 +TTGTTG 0.000179150443502 0.000405984747142 +CCTGCT 0.000585898629429 0.000569397987851 +CCGAAT 5.64637584578e-05 4.37202091371e-05 +CCCATT 0.00028510712604 0.00025180929197 +CGGGAT 0.000203896905542 6.40274101024e-05 +GCCTCG 0.000218187116016 0.000137690786153 +GCCTCA 0.000364923179662 0.000341399884464 +TAAGCT 0.0 0.0002068945416 +GCCTCC 0.000654212806329 0.00044038753466 +CCGAAC 6.13433425221e-05 4.0375493684e-05 +CCTGCG 0.000144296271614 0.00013864641914 +CCGAAA 6.97083437751e-05 5.66212544562e-05 +CCGAAG 0.000144644813333 9.13425863028e-05 +CCTGCC 0.000678262184931 0.000614551646468 +CCTGCA 0.000481684655486 0.00044182098414 +GTACTG 0.000189258153349 0.00020116074368 +CAAGGT 0.000218535657735 0.000257622725972 +CCATTC 0.000203548363823 0.000259374719781 +CCATTA 9.16664720642e-05 0.000183003716935 +CCATTG 0.000161374815839 0.000251172203312 +TATAAC 0.000164511691309 0.000155688540734 +GATTTG 0.000302882753703 0.000283822997021 +GTACTC 8.6786888e-05 0.000129727177931 +GTCGAA 4.7401673767e-05 3.21729772156e-05 +CCATTT 0.000239099619148 0.000385757182259 +TCAGCG 8.15587622168e-05 9.3970577016e-05 +TCTACC 0.000215050240546 0.000218441773521 +TCTACG 5.05385492369e-05 3.9658768944e-05 +ACTTTC 0.000260709205719 0.000312412350537 +CGCAAC 0.000162768982715 3.20137050512e-05 +TGCTTA 9.44548058152e-05 0.000274107394991 +CGCAAA 0.000156843773494 4.34016648082e-05 +ATACCG 2.12610448514e-05 2.7155904036e-05 +CGCAAG 0.000361089220755 5.52674410585e-05 +ATACCA 0.000130703144578 0.000191684049896 +ATACCC 9.02723051887e-05 0.000134266434618 +TCTACT 0.000191697945381 0.000241775145611 +CCGAGT 7.14510523694e-05 9.34927605227e-05 +ATACCT 0.000113624600353 0.000190808052992 +GATAGG 8.50441794056e-05 0.000128930817109 +CGCAAT 0.000100728556755 2.68373597071e-05 +TAGTAA 0.0 0.000186348432388 +TGGGTG 0.000318567131052 0.000383447735875 +TAGTAC 0.0 9.92265584423e-05 +CTCAAG 0.000847304918586 0.000308271274262 +GTTTTG 0.000191000861944 0.000542003175568 +CTCAAA 0.000401171518425 0.000353186024632 +TAGTAG 0.0 0.000126780642889 +CTCAAC 0.000600188839903 0.000199806930282 +GTGTGC 0.000363180471068 0.000360990360689 +GTGTGA 3.10202129799e-05 0.000367918699842 +GTGTGG 0.000323795256835 0.000420797058434 +TAACGG 0.0 3.89420442041e-05 +TTGCGT 4.4613340016e-05 4.38794813015e-05 +CTCAAT 0.000362831929349 0.000183879713839 +TAGTAT 0.0 0.000158316531447 +GTTCTC 0.000239796702586 0.00031432361651 +GTGTGT 0.000310550671518 0.00102842036575 +ATCTTT 0.000376076514666 0.000400728765716 +GTTGGG 0.000209473573044 0.000256030004327 +GTTGGA 0.000264543164626 0.000244004955913 +GTTGGC 0.000253389829622 0.000188339334443 +CCTTGG 0.000153358356305 0.000410444367746 +CCTTGA 1.32445853173e-05 0.0003412406123 +GGGAAA 0.000540936747694 0.000446280604744 +AGGATT 0.000202851280385 0.000274187031073 +GGACGT 6.48287597108e-05 5.96474255805e-05 +GTTGGT 0.000197274612883 0.00023205954358 +ATCTTG 0.000350632969189 0.000277133566115 +GAGAGT 0.000485518614393 0.000265188153782 +ATCTTA 0.000151615647711 0.000229989005442 +ATCTTC 0.000649681763984 0.000310978901057 +AGGATC 0.00028510712604 0.000233811537389 +AGGATA 0.000115018767229 0.00020116074368 +AGGATG 0.00033146317465 0.000366246342116 +GACATG 0.000773762615903 0.000253640921861 +GACATC 0.00088006784016 0.000234607898211 +GACATA 0.000246767536964 0.000164448509778 +TCGGCG 6.65714683052e-05 5.14449091121e-05 +GAATAT 0.000346799010281 0.000254994735259 +GGTGCA 0.000232825868209 0.000211911614779 +TCGGCC 0.000181241693815 8.05120791213e-05 +ATCAGA 0.000258617955405 0.000309147271166 +TTGCGA 5.64637584578e-05 3.79864112175e-05 +GACATT 0.000585201545992 0.0002995909413 +TTGCGG 9.09693886265e-05 5.20819977699e-05 +GAATAG 1.18504184418e-05 0.000167076500491 +TCGGCT 0.000120595434731 9.04665893984e-05 +CATAGA 8.01645953413e-05 0.00022019376733 +GAATAC 0.000300442961671 0.000157281262378 +GAATAA 1.25475018795e-05 0.000293299690805 +TGCCTG 0.000533965913317 0.000583254666157 +CACCGC 0.000178104818345 0.000106393805842 +TGCAGA 0.000197274612883 0.000466508169627 +TTAATT 0.00012094397645 0.000429557027478 +GGACGA 8.85295965943e-05 6.29125049514e-05 +TGAGGT 1.04562515663e-06 0.000305006194891 +TTTCTA 0.0001551010649 0.000486257918017 +TTATAA 7.66791781526e-06 0.000362184901923 +GAAATG 0.000573002585831 0.000442537708879 +AATGAT 0.000470879862201 0.000282230275377 +GAAATA 0.000309505046361 0.000383845916286 +GAAATC 0.000487958406425 0.000269966318715 +GAAATT 0.000453801317976 0.000343709330848 +AATGAA 0.000758426780273 0.000495973520047 +AATGAC 0.000639225512417 0.000241058420871 +AATGAG 0.000782127617156 0.000321968680403 +TCAGCA 0.000352724219502 0.000399773132729 +TACTAG 7.31937609638e-06 0.000118657762503 +CTTGGA 0.000304276920578 0.000392844793576 +CTTGGC 0.000290683793542 0.000332799187585 +GTATTG 8.88781383132e-05 0.000177190282933 +GTCCTT 0.000262103372594 0.00032077413917 +CTTGGG 0.000274650874474 0.000398339683249 +AGCCCT 0.000484472989237 0.000421832327503 +GGGAAT 0.000248161703839 0.000244164228077 +TTGAAT 0.000241190869462 0.000348726404028 +GTCCTA 0.000138022520675 0.000166996864409 +GTCCTC 0.00040605110249 0.000297759311409 +TGCACG 9.82887647228e-05 8.02731708747e-05 +CTTGGT 0.000211913365076 0.000319022145361 +GTCCTG 0.000705448439004 0.000423026868736 +TGCACC 0.000251647121028 0.000219636314754 +TTGAAA 0.000386532766233 0.000513174913806 +AGCCCG 0.000159980648964 0.000136416608838 +AGCCCA 0.000414764645462 0.000403675300758 +AGCCCC 0.000531526121285 0.000356849284414 +TTAACT 0.000110139183165 0.000269169957893 +ATAAAT 0.000171831067406 0.00055068350853 +TGGTCG 4.35677148594e-05 4.66667441791e-05 +TGGTCA 0.000131400228016 0.000262719435234 +TGGTCC 0.000157889398651 0.000245836585804 +ACGATT 4.84472989237e-05 4.07736740951e-05 +GAGACT 0.000451361525944 0.000334153000982 +GATTCA 0.000224112325237 0.000205540728202 +GAGGAG 0.00278066583319 0.000580387767197 +ATAAAG 0.000290683793542 0.000428442122327 +ATAAAA 0.000240842327743 0.000702708789482 +TCAGCC 0.000455892568289 0.000364016531814 +ATAAAC 0.00014569043849 0.000302218932013 +GAGACG 0.000268377123534 9.45280295915e-05 +ACGATG 9.2363555502e-05 5.8373248265e-05 +CTACAG 0.000429403397654 0.00031790724021 +GAGACC 0.00073751427714 0.000290990244421 +ACGATC 7.38908444016e-05 2.89078978447e-05 +GAGACA 0.000606114049124 0.000425097406874 +ACGATA 2.82318792289e-05 3.00228029958e-05 +GGTTAC 0.000158237940369 0.000117542857352 +ATGTGT 0.000214701698827 0.000473117964451 +GGTTAA 7.31937609638e-06 0.000157599806707 +ACCACT 0.000327977757462 0.000253800194025 +GCACTT 0.000182635860691 0.000262560163069 +CTTCCT 0.000412673395148 0.000740854472864 +TAAACT 0.0 0.000305484011384 +CTCAGT 0.000396989017799 0.000418248703803 +TCCGCT 7.28452192449e-05 7.91582657236e-05 +ACCACG 0.000191000861944 8.14677121079e-05 +ATGTGG 0.000231083159614 0.000356928920496 +ACCACC 0.000591126755213 0.000277292838279 +GAGGAA 0.00145864709349 0.000513891638546 +ACCACA 0.000448573192193 0.000341718428793 +GGTTAT 0.000141159396145 0.000150273287143 +TCCGCC 0.000127914810827 0.000105517808937 +GACGTG 0.000389321099984 8.44938832322e-05 +TCCGCG 5.19327161124e-05 5.93288812516e-05 +AACGTC 0.000154055439743 5.33561750853e-05 +AACGTA 5.92520922088e-05 3.93402246152e-05 +AACGTG 0.000307762337767 8.35382502456e-05 +CGTGAC 0.000139765229269 6.99204801865e-05 +AGTGTG 0.000465303194699 0.000432264654273 +TCTTCG 7.70277198714e-05 7.5256097695e-05 +AGTGTA 0.000143599188177 0.00024320859509 +TCTTCA 0.000347496093719 0.000497486605609 +AGTGTC 0.000314384630426 0.000281195006308 +TCTTCC 0.000412324853429 0.000544551530199 +CGTGAT 0.000103516890506 6.1319783307e-05 +TAAGTC 0.0 0.000156405265474 +AACGTT 8.36500125301e-05 7.12742935842e-05 +CCGTGA 4.87958406425e-06 9.00684089873e-05 +TCTTCT 0.00040605110249 0.000604756408355 +AGTGTT 0.000220626908048 0.000388703717301 +GTGCGC 0.000257572330249 8.64051492054e-05 +TTGGCA 0.000325189423711 0.000280319009404 +AAAGGT 0.000299745878233 0.000309625087659 +TTGGCC 0.000354815469815 0.000244403136324 +GAGTAG 1.1153335004e-05 0.000162935424216 +TTAACG 2.68377123534e-05 4.29238483149e-05 +TTGGCG 8.74839714377e-05 5.79750678539e-05 +GACTTC 0.000825346790297 0.000296564770176 +TCGAGT 4.04308393895e-05 5.37543554964e-05 +GACTTA 0.000208427947887 0.00017639392211 +GACTTG 0.000489352573301 0.000289477158859 +AAAGGG 0.00034401067653 0.000338851529833 +AAAGGA 0.00053292028816 0.000536110105484 +TTGGCT 0.000400474434988 0.000369750329733 +AAAGGC 0.00048970111502 0.000337099536024 +GAGTAC 0.000496671949397 0.000127099187218 +GTATGG 6.69200100241e-05 0.00015409581909 +GACTTT 0.000566380293172 0.000354221293701 +TAAACC 0.0 0.000186666976716 +CAAGAA 0.000659092390393 0.000437122455289 +GACGTT 9.68945978473e-05 5.61434379629e-05 +CAAGAC 0.00039350360061 0.000258817267205 +GATTCG 7.70277198714e-05 3.84642277108e-05 +CAAGAG 0.00063852842898 0.000374847038995 +TTCTTG 0.00033146317465 0.000458863105734 +TTCTTC 0.000744136569799 0.000544551530199 +TTCTTA 0.000168694191936 0.000376599032804 +CAAGAT 0.000408839436241 0.000277850290855 +AGGGCT 0.000218187116016 0.000357804917401 +TACGGG 0.000129308977703 3.36064266955e-05 +TACGGA 0.000100728556755 4.36405730549e-05 +TACGGC 0.000168345650217 3.16951607223e-05 +CTCAGA 0.000231083159614 0.000523209060165 +TAGAGT 0.0 0.000208965079737 +TACGGT 7.14510523694e-05 3.62344174087e-05 +AGGGCG 6.27375093976e-05 0.000100261827511 +AGGGCA 0.000183332944128 0.000335586450462 +AGGGCC 0.00025408691306 0.000276098297046 +TCGAGA 4.1825006265e-05 6.41866822668e-05 +CTGTGC 0.000399428809831 0.000461172552118 +TAGAGA 0.0 0.000312571622701 +GGCGAG 0.000380607557012 0.000114038869735 +CCGTGT 6.76170934618e-05 0.000123993380012 +GTGCTG 0.00109198120524 0.000478055401548 +GTGCTA 0.000197971696321 0.000187065157128 +ATCTGT 0.000289638168385 0.000375643399817 +CGAATT 0.000106653765976 3.7827139053e-05 +GCTCGT 7.87704284658e-05 5.67805266207e-05 +ATCTGA 2.33522951646e-05 0.000318862873197 +CGAATC 0.000130703144578 3.83845916286e-05 +ATCTGC 0.000378864848417 0.000275301936224 +CGAATA 4.77502154859e-05 2.89078978447e-05 +CGAATG 0.000125475018795 4.77816493302e-05 +ATCTGG 0.000327280674024 0.000296405498011 +ATCATA 0.00016416314959 0.000182685172606 +GATAGC 0.000186121277879 0.00011889667075 +AAGCAG 0.00116099246557 0.00048243538607 +GATAGA 0.000133840020048 0.000177190282933 +AAGCAA 0.000456589651727 0.000425973403778 +CAAACT 0.000158586482088 0.000299909485629 +AAGCAC 0.000513750493622 0.000287884437214 +TTTACC 0.00019832023804 0.000222184669385 +CATTTC 0.000208776489606 0.000427725397587 +CTAGGT 0.000104213973944 0.000162855788134 +CATTTA 9.82887647228e-05 0.000354619474112 +CATTTG 0.000170785442249 0.000417133798652 +GTTATA 7.52850112771e-05 0.000160466705667 +CAAACA 0.000198668779759 0.000408612737855 +AAGCAT 0.000353072761221 0.000317668331963 +CAAACC 0.000182635860691 0.000260648897096 +CTCGGG 0.000138022520675 0.00014621184695 +ATCATT 0.000460772152353 0.000282469183623 +CAAACG 6.34345928353e-05 6.60183121578e-05 +CTAGGG 0.000131051686297 0.000184914982908 +GTTATT 0.000151615647711 0.000258657995041 +TGGAGC 0.000258966497124 0.000386394270917 +CTAGGC 0.000162071899277 0.000180534998386 +CATTTT 0.000215398782265 0.000643618816477 +CTAGGA 0.000154055439743 0.00024066024046 +AAGTTT 0.000503991325494 0.00039873786366 +TACAGT 0.000298700253076 0.000287406620721 +CTCGAA 5.89035504899e-05 6.90444832821e-05 +CTAACG 3.10202129799e-05 3.68715060664e-05 +CTAAGG 0.000110836266602 0.000223219938454 +CTAAGA 0.000129308977703 0.000250853658983 +CTAAGC 0.000127217727389 0.000190409872581 +TACAGG 0.000252344204466 0.000236598800266 +AAGTTG 0.000347844635438 0.00029887421656 +AAGTTA 0.000203199822104 0.000263993612549 +TACAGC 0.000479244863454 0.000229750097196 +AAGTTC 0.000602628631935 0.000279681920746 +TACAGA 0.000258617955405 0.000366325978198 +CTAAGT 0.000120246893012 0.000208248354997 +GTATGA 9.75916812851e-06 0.000167235772656 +GGGGCA 0.000265240248064 0.00027012559088 +GGGGCG 0.000132097311454 0.000154971815994 +TTCAAC 0.000549650290666 0.000205381456037 +TATATC 0.000177407734908 0.000173049206657 +TATATA 8.46956376867e-05 0.000539295548773 +TGTATA 6.72685517429e-05 0.000363618351402 +TATATG 0.000132445853173 0.000277850290855 +TCCGGT 6.41316762731e-05 6.53015874179e-05 +GGTCCA 0.000177756276626 0.000191126597321 +TATATT 0.000149175855679 0.000431707201698 +GGACTA 0.000108047932851 0.00015266236961 +TCTTAC 0.000178104818345 0.000221069764234 +TGATGG 0.0 0.000328658111309 +CTCGGT 8.01645953413e-05 8.94313203296e-05 +ATGAGT 0.000253738371341 0.00024320859509 +GCCCAC 0.000401171518425 0.00026654196718 +TGATGA 0.0 0.000337577352518 +TGCCAT 0.000173573776 0.00032292431339 +ACTCTT 0.000185424194442 0.00032435776287 +TGTTTC 0.000160329190683 0.000493186257169 +CACATA 0.00014569043849 0.000278567015595 +TCATAA 1.01077098474e-05 0.000239465699226 +CACATG 0.000429751939373 0.00036640561428 +TGTTTG 0.000137673978956 0.000632708673213 +ATGAGA 0.000269074206972 0.000334630817476 +ATGAGC 0.0003656202631 0.000219556678672 +ATGAGG 0.000299745878233 0.000273549942415 +GTGGCT 0.000767488864963 0.000395233876043 +ACTCTG 0.000493883615646 0.000431309021287 +TGCCAG 0.0005576667502 0.000408533101773 +ACTCTA 0.000117458559261 0.000190489508663 +ATTGTT 0.000269422748691 0.000360193999867 +CACATT 0.000246418995245 0.000326030120596 +TGCCAC 0.000263846081189 0.000317588695881 +AAATTC 0.000297306086201 0.000322287224732 +AAATTA 0.000172179609124 0.000435211189316 +AAATTG 0.000207033781012 0.000328737747391 +TCCACT 0.00030357983714 0.000286928804228 +TTCAAT 0.000354118386377 0.00025467619093 +CTTCGA 0.000217141490859 5.80547039361e-05 +CGTACC 5.85550087711e-05 2.69169957893e-05 +TTAATC 7.24966775261e-05 0.000195665854007 +AAATTT 0.000327629215743 0.000512776733395 +TTCGAA 0.000110836266602 4.82594658235e-05 +CTTAGT 0.000120246893012 0.0001961436705 +GGGAAC 0.000400125893269 0.000255472551752 +GGTGCG 7.87704284658e-05 7.11150214197e-05 +GCGTAT 4.60075068915e-05 2.96246225847e-05 +GGGAAG 0.000778990741686 0.000508237476708 +GGTGCC 0.000401868601863 0.000233652265224 +TCGTCC 0.000120595434731 6.67350368978e-05 +GCCCAT 0.000258617955405 0.000204983275626 +AGAGCG 7.04054272128e-05 0.000114357414064 +GCGACT 4.80987572048e-05 5.67008905385e-05 +AGAGCC 0.00034087380106 0.000402082579113 +AGAGCA 0.000280576083695 0.000466587805709 +GCGTAG 1.74270859438e-06 3.40046071066e-05 +GGTGCT 0.000377122139823 0.000312491986619 +TCCCCA 0.000452755692819 0.00045655365935 +GCGTAC 5.96006339277e-05 2.54835463094e-05 +TAATTT 0.0 0.000471764151053 +GCGTAA 1.74270859438e-06 2.21388308563e-05 +GCGACG 3.3460005012e-05 3.67918699842e-05 +AGAGCT 0.000272211082442 0.000443732250113 +GCGACA 5.61152167389e-05 5.63823462096e-05 +TATCGG 7.21481358072e-05 2.73948122826e-05 +GCGACC 7.24966775261e-05 5.95677894983e-05 +TCCGGC 0.000120246893012 8.8475687343e-05 +GCAGGA 0.000391760892016 0.000420956330599 +GCAGGC 0.000418598604369 0.000354938018441 +GCAGGG 0.000360740679036 0.000385677546177 +CAATTT 0.000103865432225 0.000258339450712 +GCACTA 0.000118155642699 0.000126780642889 +CCGTGC 7.45879278393e-05 0.00010774761924 +GCTCGG 0.000203199822104 0.000126701006807 +TATTCT 0.00017671065147 0.000332878823667 +CCGTGG 8.88781383132e-05 0.000145813666539 +CTTTAG 1.01077098474e-05 0.000255791096081 +GCAGGT 0.000253041287903 0.000273311034168 +CTTCGT 0.000111881891759 8.15473481901e-05 +TATTCC 0.000163466066153 0.000197816028227 +CAATTG 6.76170934618e-05 0.000142150406757 +TATTCA 0.000106653765976 0.000264232520796 +CAATTA 4.91443823614e-05 0.000168748858218 +TATTCG 3.17172964177e-05 3.06598916535e-05 +CAATTC 8.88781383132e-05 0.000162218699476 +TGTAGA 7.52850112771e-05 0.000315597793826 +GGCTCA 0.000321006923084 0.000317429423717 +AGCATT 0.000307065254329 0.000334391909229 +GGCTCC 0.000670594267116 0.000371661595706 +AATCCT 0.000302534211984 0.00027012559088 +TGTAGG 5.19327161124e-05 0.000220990128152 +GGCTCG 0.000135234186924 0.000101934185238 +TTCGGA 9.37577223775e-05 6.33903214447e-05 +TTCGGC 0.00017671065147 4.97725513856e-05 +CTTTAC 0.000152661272867 0.000229590825031 +TTCAAG 0.000685581561028 0.000361149632854 +TTCGGG 0.000124429393638 7.0477932762e-05 +AGCATG 0.00048029048861 0.000318544328868 +GGCTCT 0.000477850696578 0.000386473906999 +AGCATC 0.000658046765237 0.000317349787634 +TTTACT 0.000173922317719 0.000354938018441 +AGCATA 0.000145341896771 0.000196940031322 +TTACAT 0.000103865432225 0.000310501084564 +AGACCT 0.000222021074924 0.000322287224732 +TTCGGT 7.49364695582e-05 5.51878049763e-05 +GTTTAT 0.000167997108498 0.000299033488725 +GGGGAG 0.000736120110265 0.000421911963585 +GCACTC 0.00021679294914 0.000195267673596 +GCCTTT 0.000544770706602 0.000422469416161 +TGCTTG 0.000203199822104 0.000399295316236 +GTTTAA 5.576667502e-06 0.000288123345461 +GAGGTG 0.00107350849414 0.000325313395856 +AACTTG 0.000357952345285 0.000314881069086 +GTTTAG 5.576667502e-06 0.000186826248881 +GAGGTA 0.000252692746185 0.000164289237614 +CTGGCT 0.000994040982232 0.000574255788866 +AGGCGG 0.000147084605365 0.000149715834568 +AGGCGA 8.88781383132e-05 7.86008131481e-05 +AGGCGC 0.000127914810827 9.84301976201e-05 +AGGAGT 0.000171133983968 0.000307315641275 +ACAGCC 0.000601931548498 0.000376997213215 +ACAGCA 0.000398383184674 0.000442537708879 +TCCAGC 0.000648984680546 0.000414346535775 +ACAGCG 0.000123035226763 0.000101695276991 +AGGAGC 0.000268725665253 0.000431946109945 +AGGCGT 5.96006339277e-05 7.98749904636e-05 +AGGAGA 0.000254783996498 0.000550205692037 +CTGGCG 0.000299745878233 0.000113003600666 +AGGAGG 0.000300094419952 0.000565894000233 +CTGGCA 0.000698129062907 0.000374130314255 +CTGGCC 0.00129483248562 0.000480524120097 +GCGCCC 0.000184030027566 0.000145017305717 +GCATCG 6.83141768996e-05 6.41070461846e-05 +ACAGCT 0.000457635276883 0.000419522881119 +GGAAAA 0.000447527567036 0.000529341038496 +AACGCT 0.000127566269108 5.56656214696e-05 +AATTTT 0.000253041287903 0.000587236470268 +AAACAG 0.000562546334265 0.000450740225348 +TGTCAC 0.000160677732402 0.000326269028843 +AAACAC 0.00031926421449 0.000351752575152 +GCTCTA 0.000147084605365 0.000184038986003 +AAACAA 0.000343662134811 0.000660342393743 +AACGCG 6.76170934618e-05 2.22981030207e-05 +AATTTA 0.000156146690056 0.000382332830723 +AATTTC 0.000235962743679 0.000312332714455 +AACGCC 0.000239099619148 5.59841657985e-05 +TTTCAC 0.000161374815839 0.000300705846451 +ATTGGA 0.000378516306699 0.000231263182758 +AATTTG 0.000200411488353 0.000329693380378 +ATTTAC 0.000169391275373 0.000239147154897 +ATTTAA 1.04562515663e-05 0.000496292064376 +ATTTAG 6.97083437751e-06 0.000242730778597 +CCCTCG 0.00014569043849 0.000116666860448 +TAAATA 0.0 0.000533721023018 +GGAAAG 0.000508522367839 0.00048601900977 +TGACCA 0.0 0.000297600039245 +CATTCT 0.000195880446008 0.000366007433869 +TAAATC 0.0 0.000230227913689 +TGACCG 0.0 6.34699575269e-05 +GCGCCA 8.78325131566e-05 8.9192412083e-05 +CATTCG 4.53104234538e-05 4.96132792211e-05 +CATTCC 0.00016416314959 0.000282628455788 +CATTCA 0.000137673978956 0.00030245784026 +TCCATA 0.000159980648964 0.000217087960123 +GCCTTC 0.000906208469076 0.000368396516336 +ACAAAT 0.000238053993992 0.000351434030823 +GATTAA 9.06208469076e-06 0.0002033109179 +GTAGGC 0.000140113770988 0.000125426829492 +GATTAC 0.000245024828369 0.000110136701706 +GTAGGA 0.000143599188177 0.000178783004577 +GTAGGG 9.93343898795e-05 0.000149317654157 +GATTAG 6.97083437751e-06 0.000113720325406 +GCGGGT 7.38908444016e-05 8.10695316968e-05 +ACAAAC 0.000236311285397 0.000327463570076 +TTTCAA 0.000193789195695 0.000456314751103 +ACAAAA 0.000298351711357 0.00056541618374 +ACAAAG 0.000373636722634 0.000406701471882 +GCGGGC 0.000236311285397 0.000160625977832 +GTCGGT 4.53104234538e-05 4.89761905634e-05 +GCGGGA 9.55004309718e-05 0.000127497367629 +CCCTCA 0.000285455667759 0.000346894774137 +GATTAT 0.000265937331502 0.000183083353017 +GTAGGT 7.91189701847e-05 0.000145335850046 +AACCTG 0.000890524091726 0.000346815138055 +TAATGA 0.0 0.000279522648581 +GATGTT 0.000433585898281 0.000281672822801 +TAATGC 0.0 0.000162855788134 +GGGATG 0.000321355464803 0.000291786605243 +GCCCTT 0.000311944838393 0.000294255323792 +GCGCCG 8.92266800321e-05 0.000111092334693 +GGCGTA 3.41570884498e-05 2.62002710494e-05 +GCCCTC 0.000545119248321 0.000335188270051 +GATGTG 0.000949776183935 0.000316234882483 +GCCCTA 0.000171482525687 0.000151149284048 +GATGTA 0.000277787749944 0.000201797832338 +GCCCTG 0.00126590352296 0.000509113473613 +GATGTC 0.000579276336771 0.000230705730182 +AACCTC 0.000526646537221 0.000229033372456 +TGAAAT 0.0 0.000472879056204 +GCTCTT 0.000264891706345 0.000375802671982 +TTCTAA 1.3941668755e-05 0.000363459079238 +ATCCGT 0.000152661272867 4.94540070567e-05 +TTCTAC 0.000527692162377 0.000245677313639 +GGCTTA 0.0001551010649 0.000169147038629 +TTGGAT 0.000493883615646 0.000278248471266 +TGAAAC 0.0 0.000314801433004 +TGAAAG 0.0 0.000435927914055 +CCCTCC 0.000402217143582 0.000566929269302 +ATCCGC 0.000279879000257 4.4038753466e-05 +ATCCGA 0.000236311285397 4.56314751103e-05 +ATCCGG 0.000291032335261 5.064854829e-05 +TGGCTC 0.000231431701333 0.000404232753333 +TGGCTA 0.000111184808321 0.000237952613664 +TGGCTG 0.000496671949397 0.000561514015712 +GGGTGA 8.01645953413e-06 0.000231263182758 +AGTTGT 9.86373064417e-05 0.000296564770176 +AGCTAT 0.000235962743679 0.000215734146726 +ATTAGT 0.000101774181912 0.0001688284943 +GTCGGC 8.50441794056e-05 5.76565235251e-05 +TGGCTT 0.000168345650217 0.000499955324158 +AGTTGG 8.26043873734e-05 0.000263834340385 +AGCTAA 1.15018767229e-05 0.000228237011634 +AGCTAC 0.000377819223261 0.000213663608588 +AGTTGC 7.91189701847e-05 0.000204505459133 +AGTTGA 5.92520922088e-06 0.000248464576517 +AGCTAG 1.32445853173e-05 0.000221547580727 +TCCTAG 1.32445853173e-05 0.000242332598186 +TCCAGT 0.000416507354056 0.000348328223617 +CTAATT 9.02723051887e-05 0.000207213085928 +CAGTGT 0.00042452381359 0.000475427410835 +TAACAA 0.0 0.000236280255938 +TCAGGT 0.000208079406169 0.000275461208388 +TCAAGG 0.000128611894265 0.000292981146476 +GTCGGG 8.26043873734e-05 7.00001162687e-05 +TCAAGC 0.000152661272867 0.000200682927187 +CTGCAC 0.000631906136321 0.000331047193776 +CTAATA 5.33268829879e-05 0.000174721564384 +CAGTGC 0.000384790057638 0.000361866357594 +CTAATC 8.12102204979e-05 0.000123435927436 +CAGTGA 3.03231295422e-05 0.000434813008904 +CAGTGG 0.000431843189687 0.000442219164551 +CTAATG 0.00013627981208 0.000191445141649 +TCAAGT 0.000194137737414 0.000274744483648 +CTGCAA 0.000361089220755 0.00030532473922 +GAGCCC 0.00076226073918 0.000345063144246 +TCAGGC 0.000314384630426 0.00028270809187 +CGAGCT 0.000153009814586 8.68033296165e-05 +TCCTAA 1.77756276626e-05 0.000259374719781 +GATCAG 0.000410930686554 0.000196621486994 +CAACCA 0.000189258153349 0.000266143786769 +GTTCAT 0.000187515444755 0.000256507820821 +TTCCCT 0.00036875713857 0.000497486605609 +TAACAG 0.0 0.000229750097196 +CGAGCG 6.20404259598e-05 7.19910183241e-05 +GTTAGT 6.93598020562e-05 0.000144539489224 +CGAGCA 0.000143250646458 8.1865892519e-05 +GAGCCG 0.000243630661494 0.00015481254383 +CGAGCC 0.000243282119775 0.000109340340884 +GTTCAG 0.000357603803566 0.000285973171241 +TTCCCC 0.000347147552 0.000389579714205 +GTTCAC 0.000190652320225 0.000195984398336 +GTTCAA 0.000183681485847 0.00023205954358 +TGAGCA 0.0 0.000381297561655 +GGACCT 0.000300094419952 0.000288919706283 +TACATG 0.000441950899534 0.000272435037264 +AAGTAC 0.000537102788787 0.000186189160223 +AAGTAA 2.61406289156e-05 0.000286530623817 +TACATC 0.000539891122538 0.000182207356112 +AAGTAG 2.02154196948e-05 0.000239704607473 +TACATA 0.000127217727389 0.00030747491344 +GCACAC 0.000218187116016 0.000276496477457 +GGACCG 7.45879278393e-05 8.39364306566e-05 +ATTGGT 0.000225506492112 0.000192639682883 +GGACCA 0.000293472127293 0.000273231398086 +TCATCA 0.000222021074924 0.000284460085679 +GGACCC 0.000293820669012 0.000265267789865 +CACACG 0.000192743570538 0.00011172942335 +TACATT 0.000282318792289 0.000327941386569 +CACACC 0.000341570884498 0.000313367983524 +GCACAA 0.000182635860691 0.000206098180777 +CACACA 0.000380956098731 0.000899887729051 +TCTGCC 0.000555226958168 0.00049716806128 +TCTGCA 0.000426615063903 0.00045583693461 +TCTGCG 0.000125126477076 0.000104721448115 +TAGATT 0.0 0.000212389431273 +CCCGTC 9.93343898795e-05 7.94768100525e-05 +TTAAGG 9.44548058152e-05 0.000225768293085 +CCCGTA 4.60075068915e-05 3.22526132979e-05 +CGTCGA 3.86881307952e-05 1.35381339769e-05 +CCCGTG 0.000290335251823 0.00013219589648 +TTAAGC 6.09948008032e-05 0.000174243747891 +TTACGT 2.71862540723e-05 4.52332946992e-05 +TAGATC 0.0 0.00013291262122 +TGTCAA 0.000116412934104 0.00026797541666 +TAGATA 0.0 0.000180534998386 +TTTCGC 5.15841743935e-05 4.02162215195e-05 +TAGATG 0.0 0.000222105033303 +TGGTTA 5.43725081445e-05 0.000215335966315 +TCTGCT 0.000485518614393 0.000531411576634 +TTACGC 3.52027136064e-05 2.02275648831e-05 +TTAAGT 0.000116761475823 0.000284619357843 +TTACGA 4.4613340016e-05 3.1217344229e-05 +TTACGG 5.12356326747e-05 3.99773132729e-05 +CCCGTT 5.0190007518e-05 6.72128533911e-05 +AGTGAC 0.000668154475084 0.00029887421656 +AGTGAA 0.000542679456289 0.000347770771041 +AGTGAG 0.000720784274634 0.000373811769926 +GCGGTC 0.00010212272363 5.81343400184e-05 +CGGGCG 8.3998554249e-05 0.000117224313023 +CCCTCT 0.000330417549494 0.000455438754199 +TAAGAG 0.0 0.000262321254823 +CCTGAT 0.000482033197205 0.000262560163069 +GAAAGT 0.000368408596851 0.0003340733649 +CGGGCA 0.000185075652723 9.88283780312e-05 +TAAGAC 0.0 0.000188737514854 +ATTTAT 0.000200062946634 0.00056326600952 +AGTGAT 0.000451012984225 0.000271718312524 +TAAGAT 0.0 0.000228555555963 +CCTGAA 0.000637134262104 0.000410285095582 +CCTGAC 0.000535011538474 0.000334710453558 +CCTGAG 0.00100693702583 0.000528226133345 +GTTAGG 4.63560486104e-05 0.000138248238729 +ATTGGG 0.000283015875727 0.000178942276741 +TATACT 0.000124777935357 0.000191365505567 +GTTAGC 8.50441794056e-05 0.000123037747025 +CTCCAG 0.000755289904803 0.000572503795057 +GTTAGA 6.76170934618e-05 0.000174164111808 +TGTAGC 0.000123035226763 0.000254755827012 +GCCAAA 0.00061970717616 0.000247907123941 +TATACG 3.55512553253e-05 3.07395277357e-05 +GGTAGC 0.000125823560514 0.000144937669635 +TATACC 0.00013941668755 0.000120250484148 +TATACA 0.000119549809574 0.000271957220771 +ATTGGC 0.000372591097478 0.000166598683998 +TCTTGT 0.000134885645205 0.000387270267821 +TCCACG 0.000180544610377 9.74745646335e-05 +ACTAGT 9.5151889253e-05 0.000123834107847 +TTGACT 0.000171831067406 0.000279522648581 +GTAATA 5.89035504899e-05 0.000170580488109 +AACGCA 0.000113624600353 5.46303524008e-05 +GTAATC 7.07539689317e-05 0.000116666860448 +TCTAAC 0.000151267105992 0.000162457607723 +CGTTCT 9.44548058152e-05 8.52106079721e-05 +CCACCG 0.000180893152096 0.000123834107847 +GGTAGG 3.20658381365e-05 0.000162537243805 +CCACCA 0.000630860511164 0.000425973403778 +CCACCC 0.000552787166136 0.000448351142881 +GTTCGT 6.48287597108e-05 4.57111111925e-05 +TTTATG 0.000179498985221 0.000375484127653 +ACTAGG 5.43725081445e-05 0.00014836202117 +TTTATC 0.000221323991486 0.000274266667155 +ACTAGC 0.000106305224257 0.000122878474861 +TTTATA 0.000113624600353 0.000520182889041 +ACTAGA 7.45879278393e-05 0.000191922958143 +CCACCT 0.000577533628176 0.000397543322427 +CGTTCG 2.19581282891e-05 2.12628339519e-05 +TCGTTG 5.08870909558e-05 4.88965544812e-05 +CGTTCA 4.91443823614e-05 5.76565235251e-05 +CGTTCC 9.93343898795e-05 6.84073946243e-05 +TCTAAT 0.000147084605365 0.000220671583823 +TCGTCT 7.59820947148e-05 7.03982966798e-05 +CTTTGT 0.000132445853173 0.000529261402414 +AAACAT 0.000258617955405 0.000426132675943 +ACTGGA 0.000475410904546 0.00039372079048 +ACTGGC 0.000363180471068 0.000269886682633 +TAGTCC 0.0 0.0001379296944 +GTGACC 0.000538845497381 0.000264152884714 +TAGTCA 0.0 0.000180296090139 +GTGACA 0.000457286735164 0.000324676307198 +TAGTCG 0.0 2.50853658983e-05 +GTGACG 0.00014882731396 6.89648471999e-05 +TGTCAT 0.000105956682538 0.000330728649447 +ACTGGT 0.000234917118522 0.000238350794075 +CTTTGG 0.000123732310201 0.000436644638795 +CCTATG 0.000181938777253 0.000193197135458 +CTTTGC 0.000128960435984 0.000383208827628 +CTACGC 6.62229265863e-05 3.28100658734e-05 +GTGACT 0.000406748185927 0.000337258808189 +TAGTCT 0.0 0.000199488385953 +GTGCAC 0.000395943392642 0.000225449748756 +CGACGG 6.37831345542e-05 3.1217344229e-05 +ACCTAG 1.1153335004e-05 0.000173208478822 +CGACGA 4.35677148594e-05 2.01479288009e-05 +ACCTAA 1.08047932851e-05 0.000175199380877 +CGACGC 6.62229265863e-05 3.21729772156e-05 +ACCTAC 0.000450664442506 0.000167235772656 +CCGCAG 0.000247116078683 0.000142787495415 +CCGCAA 3.58997970442e-05 4.65074720147e-05 +CCGCAC 9.02723051887e-05 7.38226482151e-05 +CTACGG 0.000100380015036 4.68260163436e-05 +ACCTAT 0.000242933578056 0.000147008207772 +CGACGT 3.20658381365e-05 1.95904762254e-05 +CGAAGA 0.000131748769735 7.57339141883e-05 +CCCCGT 7.70277198714e-05 9.19796749606e-05 +CGAAGG 0.00013314293661 8.22640729301e-05 +GCGCTC 0.000177756276626 0.00011316287283 +GTGCAT 0.000190652320225 0.0002377933415 +GCGCTA 4.25220897028e-05 3.49602400932e-05 +CCGCAT 4.00822976707e-05 4.65871080969e-05 +GTTCGC 8.15587622168e-05 3.73493225597e-05 +GAAGAT 0.0011836476773 0.000382412466806 +GGATCT 0.000192743570538 0.000243288231173 +TCATGA 5.576667502e-06 0.000282548819706 +TCGGAA 0.000148478772241 7.03186605975e-05 +TCGGAC 0.000163814607871 4.73038328369e-05 +GAATCT 0.000302185670265 0.000262560163069 +TCGGAG 0.000260360664 9.34927605227e-05 +GGATCG 4.14764645462e-05 5.36747194142e-05 +GAAGAG 0.00182949548238 0.000569477623933 +GAAGAA 0.00171691650718 0.000625939606225 +CGAGAA 0.000236311285397 8.83960512608e-05 +GAAGAC 0.00109999766477 0.00038185501423 +GGATCA 0.00014882731396 0.000187781881868 +GAATCA 0.000212958990233 0.000221786488974 +GAATCC 0.000249207328996 0.000196302942665 +TCGGAT 0.00011780710098 4.84983740701e-05 +GAATCG 6.34345928353e-05 4.34813008904e-05 +ATGGTC 0.000317870047614 0.000177349555097 +CAGATG 0.000790492618409 0.000417930159474 +ATGGTA 0.000170088358811 0.000199647658118 +TGACCC 0.0 0.000256985637314 +ATGGTG 0.000666760308208 0.000319420325772 +CAGATC 0.000723572608385 0.000255791096081 +ACGCCC 0.000140462312707 7.82822688192e-05 +CAGATA 0.000258617955405 0.000245597677557 +TGTCAG 0.000281273167132 0.000375324855488 +ACGCCT 0.000105956682538 7.79637244904e-05 +CAGATT 0.000482381738923 0.000300148393876 +TGATTA 0.0 0.000204186914804 +CTCGAC 0.000104213973944 4.10125823417e-05 +ATGGTT 0.000250252954152 0.000267895780578 +TCGCAA 3.13687546988e-05 3.20137050512e-05 +TTGCGC 7.56335529959e-05 3.77475029708e-05 +GCTATT 0.000197971696321 0.000194232404527 +TGCGTC 7.04054272128e-05 5.70194348673e-05 +GCGTGG 7.11025106506e-05 0.000116268680037 +TTGTAG 9.75916812851e-06 0.000255870732163 +AATGGT 0.000299397336514 0.000237554433253 +TCCACC 0.000460423610634 0.000292025513489 +CTACGT 3.24143798554e-05 4.84983740701e-05 +AATGGC 0.00056080362567 0.000243845683748 +AATGGA 0.000528737787534 0.000361468177183 +AATGGG 0.000387926933108 0.000242571506433 +CCCCGA 0.000160677732402 0.000107269802746 +AATAGA 0.000107699391132 0.00024679221879 +AATAGC 0.000165905858185 0.000168589586053 +GTGAAT 0.000406399644209 0.000258657995041 +AGCCAG 0.000876582422971 0.000559443477574 +GGAGGG 0.000330069007775 0.000467145258285 +AAAAGT 0.000283364417446 0.000426053039861 +AGCCAC 0.000412673395148 0.000372617228693 +AGCCAA 0.000276742124787 0.000324915215445 +TGGTAG 9.06208469076e-06 0.000222025397221 +CGCCAT 0.000122338143325 7.88397213948e-05 +GAGTGT 0.000451361525944 0.000282389547541 +TGGTAC 0.000200411488353 0.000174164111808 +TGGTAA 1.18504184418e-05 0.000213265428177 +AAAAGG 0.000320658381365 0.000416178165666 +GGAGGT 0.000304625462297 0.000272036856853 +AGCCAT 0.00026663441494 0.000370785598802 +AAAAGC 0.000321704006522 0.000390376075027 +AAAAGA 0.000395943392642 0.000658032947358 +TCTATA 8.53927211245e-05 0.000224175571441 +GAGTGG 0.000464257569542 0.000316314518566 +CGCCAG 0.000382350265606 0.000115392683132 +TGGTAT 0.000126172102233 0.000214300697246 +CGCCAA 9.09693886265e-05 5.30376307565e-05 +GAGTGC 0.000379561931855 0.000220273403412 +CGCCAC 0.000199017321478 9.93061945245e-05 +GAGTGA 3.03231295422e-05 0.000286689895981 +GCCAGG 0.000445784858442 0.000458544561405 +GCCAGA 0.000313687546988 0.000347133682384 +GCCAGC 0.000821512831389 0.000427008672847 +GCCCGT 0.000136628353799 7.83619049015e-05 +TACCGA 0.000163466066153 3.38453349422e-05 +TACCGC 0.000231083159614 3.50398761754e-05 +TACTGA 2.75347957911e-05 0.000267417964084 +CTTCAT 0.000231083159614 0.000393004065741 +GCCCGC 0.00029138087698 0.000152104917034 +GCCAGT 0.000441253816096 0.000287565892885 +GCCCGA 0.000203199822104 8.34586141633e-05 +GCCCGG 0.000384790057638 0.000178305188084 +TGCGAT 7.59820947148e-05 3.68715060664e-05 +CTTCAC 0.000226552117269 0.000304687650562 +CTTCAA 0.000258617955405 0.000341399884464 +CTTCAG 0.000655955514923 0.000513573094217 +TCCGAG 0.000255132538217 9.89876501956e-05 +GTGGGC 0.000667108849927 0.000277053930033 +TCCGAC 0.000139765229269 4.45165699593e-05 +AATTCA 0.000206336697574 0.000312253078373 +GGACTG 0.000344359218249 0.000334630817476 +AATTCC 0.000227249200707 0.000242651142515 +AATTCG 4.11279228273e-05 3.59158730798e-05 +TCCGAT 8.9575221751e-05 3.98180411085e-05 +TAGCGG 0.0 3.63140534909e-05 +AATTCT 0.000249555870715 0.000346337321561 +TAGCGC 0.0 3.49602400932e-05 +GTCATC 0.000711025106506 0.00020617781686 +GTCATA 0.000143250646458 0.000140318776866 +GTCATG 0.000456589651727 0.000231183546676 +AGCAGG 0.000325886507148 0.000468499071682 +CCAGGT 0.000309505046361 0.000336621719531 +TTGGAA 0.000558712375357 0.000444687883099 +AGCAGC 0.00109372391383 0.000542242083815 +AGCAGA 0.000308459421205 0.000496929153034 +GGGGTG 0.000329023382618 0.000314562524757 +CCAGGG 0.000462166319229 0.000462048549023 +AGCAGT 0.000519675702843 0.000367281611184 +CCAGGA 0.000537799872225 0.000553311499243 +CCAGGC 0.00050817382612 0.000489045180894 +TCCCCG 0.000207382322731 0.000143185675826 +ATAGAG 0.00028510712604 0.000209920712724 +CATCTG 0.000379213390136 0.000434335192411 +ACGGTC 0.000114321683791 4.53129307814e-05 +CATCTA 9.16664720642e-05 0.000216371235383 +ATAGAC 0.000225506492112 0.000145176577881 +CATCTC 0.000221672533205 0.000373413589515 +CAAGCT 0.000335645675277 0.000314642160839 +TCCAAC 0.000449618817349 0.000206575997271 +GGGGTC 0.000257920871968 0.000189215331347 +CAAGCC 0.000372591097478 0.00028844188979 +CATCTT 0.000207382322731 0.000405029114155 +CAAGCA 0.000333902966683 0.000325950484514 +CAAGCG 7.14510523694e-05 6.63368564867e-05 +GTTCTT 0.000228294825863 0.000390614983274 +ACATGA 7.66791781526e-06 0.000293777507298 +CGTAAT 2.92775043855e-05 2.8987533927e-05 +ACATGC 0.000107699391132 0.000246553310544 +TCTTGA 1.18504184418e-05 0.00033335664016 +ACATGG 0.000135582728642 0.000324437398952 +AGGGAT 0.000236311285397 0.000259772900192 +GTACAT 8.53927211245e-05 0.00021589341889 +TGGCGG 0.000103516890506 0.00011817994601 +CGTAAG 6.79656351807e-05 3.55176926688e-05 +GAGTCA 0.000356209636691 0.000266382695016 +ACATGT 0.000123035226763 0.000326746845336 +CGTAAC 3.58997970442e-05 2.75540844471e-05 +CGTAAA 6.44802179919e-05 3.99773132729e-05 +AGGGAG 0.000431494647968 0.000470967790231 +TTATTT 0.000155449606618 0.000836736315853 +AGGGAC 0.000331811716369 0.000312730894866 +AGGGAA 0.000334948591839 0.000444289702688 +CCCTTT 0.000330417549494 0.000401286218291 +TGTTGT 8.60898045622e-05 0.000451616222252 +TCTTGG 0.000148478772241 0.00038687208741 +GTTGTT 0.000182287318972 0.000348726404028 +ATCCTT 0.000324840881992 0.0002960073176 +GCGGGG 0.00013000606114 0.000184676074661 +TTGTCC 0.000185772736161 0.000273231398086 +TTGTCA 0.000181590235534 0.000326109756678 +ATCCTC 0.000546513415196 0.00025467619093 +CTATGA 1.25475018795e-05 0.000220114131248 +ATCCTA 0.000186818361317 0.000181490631372 +GTTGTC 0.000240842327743 0.000210955981793 +ATCCTG 0.00100867973443 0.000371104143131 +GTTGTG 0.000327977757462 0.000282230275377 +TTGTCT 0.000232128784771 0.000430433024382 +CTATGC 5.99491756466e-05 0.000162616879887 +CTGTGG 0.00043079756453 0.000580069222868 +AAGGGG 0.000350632969189 0.000289795703187 +TGACTC 0.0 0.000292662602147 +TGGGCA 0.000208079406169 0.000358282733894 +TGGGCG 7.28452192449e-05 0.000104641812033 +TGACTG 0.0 0.000399693496647 +AAGCCG 0.000183681485847 9.50058460848e-05 +AAGCCC 0.000658046765237 0.000307076733028 +AAGCCA 0.000641665304449 0.000455199845952 +CAAAAT 0.000241539411181 0.000398498955414 +CTATGG 7.38908444016e-05 0.000206496361188 +TGGGCT 0.000248161703839 0.000408453465691 +CAAAAC 0.000237705452273 0.00035095621433 +CAAAAA 0.000283015875727 0.00046985288508 +AAGCCT 0.000614130508658 0.000385836818341 +CAAAAG 0.000334948591839 0.000380501200832 +TCAATG 0.000128960435984 0.000221308672481 +CCCTTA 0.000110836266602 0.000172013937589 +TCAATC 8.33014708112e-05 0.000130443902671 +CGTCGT 3.55512553253e-05 1.90330236498e-05 +CAGTTC 0.000589384046618 0.000325313395856 +CAGTTA 0.000187166903036 0.000235006078622 +CAGTTG 0.000344707759968 0.000294255323792 +GACCCC 0.000590429671775 0.000226405381743 +CTCTGT 0.000339131092466 0.000631195587651 +TCAATT 9.68945978473e-05 0.000212548703437 +TGGCGT 5.22812578313e-05 7.75655440793e-05 +GTTTGG 0.000102471265349 0.000332002826762 +CTCTGC 0.000410233603116 0.000594164809421 +GACCCT 0.000544073623164 0.000274107394991 +CTCTGA 3.76425056385e-05 0.000517793806574 +CTCTGG 0.000353769844658 0.000572105614646 +CAGTTT 0.000528389245815 0.000429875571807 +GCGAAG 0.000132445853173 6.64164925689e-05 +CTACTC 0.000126869185671 0.000161183430407 +TGCTGG 0.000231083159614 0.000567168177549 +GCGAAC 4.77502154859e-05 3.59158730798e-05 +GAGTCT 0.000463211944385 0.000294812776367 +GCGAAA 6.58743848674e-05 4.13311266706e-05 +TAGGGA 0.0 0.000208646535408 +GCTGGT 0.000339828175903 0.00032292431339 +GTGATT 0.000327629215743 0.000256189276492 +CTACTA 7.28452192449e-05 0.000130842083082 +TGCTGT 0.000264543164626 0.000582697213581 +GCGAAT 3.55512553253e-05 2.9385714338e-05 +TAATGG 0.0 0.000201081107598 +GTGATG 0.000528040704096 0.000304130197986 +CTACTG 0.000273256707598 0.000260728533178 +GTGATC 0.00046181777751 0.000168191405642 +GCTGGC 0.000615176133815 0.0004303533883 +GTGATA 0.000149872939116 0.000174164111808 +GCTGGA 0.000499460283148 0.000450899497512 +TGATAA 0.0 0.000259135811534 +TGTAGT 0.00010839647457 0.0002413769652 +TGATAC 3.48541718875e-07 0.000164289237614 +TGATAG 0.0 0.000176553194275 +CTTGTT 0.000200760030072 0.000380740109079 +GACGGT 0.000115367308948 5.59841657985e-05 +GCTAGG 7.31937609638e-05 0.000185153891154 +GCTAGA 9.55004309718e-05 0.000180375726221 +ATGAAT 0.000403611310458 0.000361229268936 +CTATGT 7.52850112771e-05 0.000247349671366 +AAGGGT 0.000280924625414 0.000232378087909 +TGATAT 0.0 0.000231661363169 +GTAGTG 0.000167997108498 0.000160705613914 +GCTAGT 0.000106305224257 0.000131319899576 +ATGAAC 0.00053292028816 0.000238908246651 +GACGGG 0.000203896905542 7.09557492553e-05 +ATGAAA 0.000556969666763 0.000454801665541 +GACGGA 0.000135234186924 7.33448317218e-05 +ATGAAG 0.000939319932369 0.000420398878023 +GACGGC 0.00028197025057 6.71332173089e-05 +CTTGTC 0.000236659827116 0.000294812776367 +CTAACT 9.86373064417e-05 0.000195188037514 +CCATAG 9.41062640963e-06 0.000189772783923 +GGTGAA 0.000384790057638 0.000240899148706 +GGTGAC 0.000445436316723 0.000261684166165 +GCGTCT 9.96829315983e-05 8.09898956146e-05 +TGTAAA 0.000211564823357 0.000464278359325 +GGTGAG 0.00042766068906 0.000268851413564 +GGAATG 0.000315778797301 0.00029529059286 +CGTCCT 8.78325131566e-05 9.8032017209e-05 +GGAATC 0.000267680040096 0.000209044715819 +GGAATA 0.00013314293661 0.000202116376667 +GCGTCA 6.48287597108e-05 5.16041812766e-05 +GCGTCC 0.000132097311454 8.64051492054e-05 +GGTGAT 0.00033146317465 0.000198054936474 +GCGTCG 4.60075068915e-05 3.0819163818e-05 +CGTCCG 2.54435454779e-05 4.23663957394e-05 +GGAATT 0.000253738371341 0.000284300813514 +CGTCCC 7.87704284658e-05 9.92265584423e-05 +CGTCCA 8.22558456546e-05 6.33106853625e-05 +TTTCGA 9.89858481606e-05 5.23209060165e-05 +CCGTAT 6.58743848674e-05 3.12969803113e-05 +GAAGTC 0.0004524071511 0.000267417964084 +CTACTT 0.0001052595991 0.00024392531983 +CGTGGT 0.000104213973944 0.000100261827511 +GCCGTA 4.42647982972e-05 3.38453349422e-05 +CCGTAC 8.88781383132e-05 2.76337205293e-05 +CCGTAA 1.3941668755e-06 3.40842431888e-05 +CCGTAG 2.43979203213e-06 4.74631050013e-05 +CTAACA 0.000117110017542 0.000192719318965 +GCCGTT 7.98160536224e-05 6.39477740202e-05 +TTGGTG 0.000413719020305 0.000329613744296 +GAAGTT 0.000419295687807 0.000308350910344 +GGCGCG 0.000119201267855 0.000117542857352 +ATGCCC 0.000344359218249 0.000219158498261 +TTAGTT 8.43470959678e-05 0.000248146032188 +ATGCCA 0.00032240108996 0.000280239373321 +GGCGCC 0.000244327744932 0.000134744251111 +ATGCCG 0.000122338143325 5.12060008655e-05 +GGCGCA 9.7940223004e-05 7.8202632737e-05 +TATTAC 0.000176013568032 0.0001652448706 +ACTATG 0.000137325437237 0.000199249477707 +CCATAT 0.000162071899277 0.000186746612799 +TTTAAG 0.000329720466056 0.00039873786366 +ACTATC 0.000147781688803 0.000113481417159 +TTTAAA 0.000314733172144 0.000941218855722 +ACTATA 9.30606389397e-05 0.000176712466439 +TTTAAC 0.000219581282891 0.000274983391895 +TATTAT 0.000143250646458 0.000325791212349 +TTAGTG 0.000136976895518 0.000216291599301 +GGCGCT 0.000134537103486 0.000110455246035 +TTAGTC 7.35423026827e-05 0.000152104917034 +ATGCCT 0.000338782550747 0.000315358885579 +TTAGTA 7.07539689317e-05 0.000176553194275 +TTTAAT 0.000234220035084 0.000556974759025 +ACTATT 0.000142902104739 0.000213185792095 +GGGCAT 0.00012408085192 0.000216769415794 +TCGATA 1.74270859438e-05 2.39704607473e-05 +TTCTAG 1.18504184418e-05 0.000302935656753 +TTTCCA 0.000263846081189 0.000516997445752 +TTTCCC 0.00020773086445 0.000463322726338 +GGGACT 0.000255829621654 0.000295848045436 +TATCGA 7.73762615903e-05 2.58817267205e-05 +GTTTGT 0.000106653765976 0.000489204453059 +TTTCCT 0.000327280674024 0.000714256021404 +GGGCAG 0.000533617371598 0.000437998452193 +GTCTGT 0.000219232741173 0.000397782230674 +GGGCAA 0.000149524397398 0.00020044401894 +GGGCAC 0.000174967942875 0.000209044715819 +GTACGG 5.75093836144e-05 2.73151762004e-05 +GTTTGA 5.92520922088e-06 0.000315358885579 +GTACGC 5.40239664257e-05 2.36519164184e-05 +GTACGA 4.4613340016e-05 2.18999226097e-05 +CCGCTC 0.000141159396145 0.000116905768694 +GCATGT 0.000103516890506 0.000270762679538 +CCGCTA 3.3460005012e-05 3.76678668886e-05 +CCGCTG 0.000295214835887 0.000159351800516 +TGAAAA 3.48541718875e-07 0.000563903098178 +GCGCGG 7.77248033092e-05 0.000139522416044 +GCGCGC 0.000101077098474 0.000146291483032 +TGTTTT 0.000130354602859 0.00101376732662 +GCGCGA 2.43979203213e-05 4.21274874928e-05 +GCATGG 0.000119201267855 0.000256428184739 +TGGATC 0.000247116078683 0.000233254084813 +GCATGC 9.75916812851e-05 0.000220830855988 +CCGCTT 5.64637584578e-05 7.66099110927e-05 +GCATGA 1.1153335004e-05 0.000198134572556 +TTTCAG 0.00039664047608 0.0004720030593 +GTCGAC 7.77248033092e-05 2.25370112674e-05 +GCGCGT 2.2306670008e-05 4.27645761505e-05 +GTCGAG 9.44548058152e-05 4.56314751103e-05 +CGTGGC 0.000167997108498 0.000108543980062 +CTTCCG 7.66791781526e-05 0.000114914866639 +TGCGCT 5.71608418956e-05 8.16269842724e-05 +GAGCTC 0.000751804487614 0.000318703601032 +ATTGAT 0.000445784858442 0.000223697754947 +GAGCTG 0.00195601612633 0.000517475262246 +AAACGA 0.000141159396145 7.51764616128e-05 +AAACGC 0.0001551010649 6.29125049514e-05 +GTAACA 0.000109790641446 0.00018141099529 +GTAGTA 7.35423026827e-05 0.000117702129517 +AAACGG 0.000162420440996 7.06372049264e-05 +ATTGAA 0.000620752801317 0.000314721796921 +TGCGCG 4.39162565783e-05 6.18772358826e-05 +ATTGAC 0.000498414657992 0.000157121990214 +GAGCTT 0.000471925487357 0.000287406620721 +TGCGCC 0.000102819807068 8.5529152301e-05 +ATTGAG 0.000651075930859 0.0002068945416 +CGCTGA 9.75916812851e-06 9.54040264959e-05 +CACGCA 8.43470959678e-05 8.85553234252e-05 +AAACGT 0.000101077098474 8.60069687943e-05 +ATTTCG 3.03231295422e-05 4.09329462595e-05 +ATTTCA 0.000210519198201 0.000424778862545 +ATTTCC 0.000230734617895 0.00035525656277 +ATCAGT 0.000303928378859 0.000233094812649 +ATCGGG 0.000150221480835 4.79409214946e-05 +CATAGT 9.16664720642e-05 0.000194550948856 +ATCGGC 0.000173922317719 4.11718545062e-05 +ATCGGA 9.93343898795e-05 4.53129307814e-05 +ATTTCT 0.000297306086201 0.000566929269302 +TACGCC 0.000167997108498 2.43686411584e-05 +TGGGTC 0.000189606695068 0.000269169957893 +TTTCGG 9.75916812851e-05 6.16383276359e-05 +TGGGTA 0.00012094397645 0.00022162721681 +CATAGG 5.89035504899e-05 0.000161103794325 +ATCGGT 8.60898045622e-05 3.51195122577e-05 +ATCAGG 0.000239448160867 0.000234289353882 +CATAGC 0.000118155642699 0.00018786151795 +ATCAGC 0.000544073623164 0.000222742121961 +ACGGGT 7.11025106506e-05 5.31172668387e-05 +GTCTGA 2.26552117269e-05 0.000290273519681 +GATTCT 0.000337388383871 0.000289477158859 +CTTTTG 0.000233522951646 0.000459022377898 +GCGTTC 9.5151889253e-05 5.55859853874e-05 +CTTTTC 0.000269771290409 0.000532207937456 +CTTTTA 0.000125126477076 0.000438794813015 +GATTCC 0.000273953791036 0.000217087960123 +ACGGGG 0.000109093558008 7.95564461347e-05 +ACGGGA 0.000105956682538 7.4141192544e-05 +ACGGGC 0.000160677732402 6.42663183491e-05 +TACTAT 0.000236659827116 0.000171297212849 +CTTTTT 0.000227946284144 0.000705734960606 +CGTAGA 2.92775043855e-05 4.50740225348e-05 +TCACGA 4.63560486104e-05 5.27987225098e-05 +TCACGC 5.19327161124e-05 5.29579946743e-05 +TCACGG 8.3998554249e-05 7.8919357477e-05 +TCTCAT 0.000177756276626 0.00032435776287 +TCACGT 3.72939639197e-05 8.44142471499e-05 +AGTGGA 0.000394897767486 0.000339727526737 +TTCCAC 0.000412324853429 0.000298316763985 +TTCCAA 0.000279181916819 0.000363220170991 +TTCCAG 0.00095744410175 0.000539773365266 +CGAGGC 0.000222021074924 0.000126063918149 +ACCCTT 0.000189258153349 0.000261047077507 +TGAAGT 0.0 0.000374687766831 +ATGCTT 0.000258617955405 0.000342355517451 +TTCCAT 0.000306019629173 0.000388624081219 +ACCCTA 0.000142205021301 0.000161024158243 +ACCCTC 0.000391760892016 0.000271718312524 +TGAAGG 0.0 0.000424858498627 +ACCCTG 0.000803040120289 0.000408134921362 +CTGACT 0.000443693608128 0.000402719667771 +CCCCTT 0.000181590235534 0.000330171196871 +AGTTAT 0.000103168348787 0.000233174448731 +CTGACG 0.000211216281638 7.95564461347e-05 +CTGACC 0.000619358634441 0.0003269061175 +CTGACA 0.000428706314217 0.000356212195756 +AGTTAA 4.87958406425e-06 0.000254994735259 +CCCCTG 0.000642710929606 0.000400330585304 +AGTTAC 0.000178453360064 0.000186109524141 +CCCCTC 0.00029138087698 0.00042207123575 +AGTTAG 4.1825006265e-06 0.00018284444477 +CCCCTA 9.7940223004e-05 0.000160785249996 +ACACAG 0.000460772152353 0.000488408092236 +ACACAA 0.000172876692562 0.000322765041225 +ACACAC 0.000234568576803 0.000776451801615 +AATGTT 0.000302882753703 0.000434096284164 +CTGCAG 0.00183437506644 0.000620205808305 +CATGGG 0.000234568576803 0.000292105149572 +CATGGA 0.000305322545735 0.00035095621433 +CATGGC 0.000318915672771 0.000285415718665 +AATGTC 0.000420341312964 0.000270045954798 +ACACAT 0.000179150443502 0.00037142268746 +AATGTA 0.000202154196948 0.000362901626663 +AATGTG 0.000603325715373 0.000402082579113 +CATGGT 0.000204942530699 0.00030174111552 +CTGCAT 0.000353072761221 0.000360353272032 +GCACAG 0.000459029443759 0.000387349903903 +ACATTT 0.000280924625414 0.000541127178664 +CGAGAT 0.000203548363823 5.47896245652e-05 +ATTACT 0.000139068145831 0.000218441773521 +ACATTG 0.000162768982715 0.000288123345461 +GTACCA 0.000137325437237 0.000170023035533 +ACATTA 0.00011153335004 0.000233572629142 +ACATTC 0.00021086773992 0.000273868486744 +ATTACA 0.000142902104739 0.000242890050762 +ATGCTC 0.000344359218249 0.00024535876931 +ATTACC 0.000141159396145 0.000129966086178 +CGAGAC 0.000270119832128 6.92037554465e-05 +ATTACG 2.33522951646e-05 2.75540844471e-05 +AAGTCT 0.000453104234538 0.000320535230923 +ATGCTA 0.000167648566779 0.000213902516835 +GGACAT 0.000182635860691 0.000249260937339 +GCTATA 9.30606389397e-05 0.000135222067604 +GCTATC 0.000203548363823 0.000121285753216 +AAGTCG 0.00013000606114 5.15245451944e-05 +AAGTCA 0.000376773598104 0.000346735501973 +GCTATG 0.00023247732649 0.000198692025131 +AAGTCC 0.000490398198458 0.000239067518815 +GGACAG 0.000497020491116 0.000427167945012 +GGACAC 0.000232825868209 0.000266701239344 +GGACAA 0.000231780243052 0.000250933295066 +GCACCT 0.000277439208225 0.000225609020921 +AGGGTT 9.96829315983e-05 0.000268293960989 +GAGGCG 0.000333205883245 0.000153777274761 +TGTTGG 6.44802179919e-05 0.000345063144246 +GAGGCA 0.000588338421462 0.000393083701823 +AGCGCT 0.000127914810827 0.000100022919264 +GAGGCC 0.00107769099476 0.00036210526584 +TGTTGC 6.93598020562e-05 0.000270205226962 +TTCTCA 0.000316475880739 0.000470489973738 +ACTCGA 8.81810548755e-05 4.78612854124e-05 +TAAAAT 0.0 0.000639318468038 +ACTCGC 7.77248033092e-05 5.89307008405e-05 +ACTCGG 0.000149524397398 8.92720481652e-05 +AGCGCA 9.34091806586e-05 7.8202632737e-05 +GAGGCT 0.000810359496385 0.000392765157494 +AGCGCC 0.000242933578056 0.000110933062528 +AGGGTG 0.000220626908048 0.000301581843356 +AGGGTA 6.34345928353e-05 0.000154732907747 +AGCGCG 8.19073039357e-05 7.66099110927e-05 +AGGGTC 0.000172876692562 0.000224414479687 +TAAAAG 0.0 0.00043178683778 +ACTCGT 5.61152167389e-05 5.16041812766e-05 +TAAAAC 0.0 0.00037428958642 +TAAAAA 0.0 0.000726997794558 +ACGTTC 0.000114321683791 5.59045297163e-05 +AGCTGC 0.000419992771245 0.000458464925323 +ACGTTA 3.83395890763e-05 4.07736740951e-05 +AGCTGA 2.5095003759e-05 0.000433698103753 +ACGTTG 7.07539689317e-05 6.45848626779e-05 +AGCTGG 0.000305322545735 0.000509033837531 +TAGGGG 0.0 0.00014907874591 +TAAATG 0.0 0.000420080333694 +AGTGCG 7.56335529959e-05 6.25939606225e-05 +AGTGCA 0.000299397336514 0.000264232520796 +AGAGTT 0.000181241693815 0.000332401007173 +AGTGCC 0.000500157366586 0.0002686921414 +CCAAAA 0.000274302332755 0.000338931165915 +TTCGTG 0.000278136291662 7.75655440793e-05 +TAGGGT 0.0 0.000148123112923 +GCATAG 3.83395890763e-06 0.000155688540734 +AGCTGT 0.000295911919325 0.000470489973738 +ACGTTT 0.000100380015036 9.17407667139e-05 +AGTGCT 0.000383744432482 0.000383447735875 +AGAGTC 0.000194486279132 0.000288362253707 +AGAGTA 0.000101077098474 0.000217406504452 +AGAGTG 0.000269074206972 0.000353186024632 +TTCGTA 4.42647982972e-05 3.86234998752e-05 +GTTAAA 0.000201805655229 0.000268055052742 +CCTGGC 0.00059496071412 0.000497486605609 +GTTAAC 0.000107350849414 0.000135381339769 +CCTGGA 0.000605416965686 0.000578317229059 +CCTGGG 0.000582064670522 0.000607623307315 +GTTAAG 0.000147433147084 0.000182286992195 +TTCGTC 0.000122338143325 4.73038328369e-05 +GCACCC 0.000280576083695 0.000203948006558 +GTCTGG 0.000227249200707 0.000313527255688 +GTCTGC 0.000279530458538 0.000290432791845 +GTTAAT 0.000131051686297 0.000198054936474 +CCTGGT 0.000411279228273 0.000385359001848 +TCTAAA 0.000237356910554 0.000310501084564 +GGAAGG 0.000209473573044 0.000523049788001 +CCACAT 0.000171482525687 0.000311854897961 +GGAAGC 0.000357952345285 0.000417213434734 +GCACCG 8.3998554249e-05 9.23778553716e-05 +GGAAGA 0.000290683793542 0.000586280837281 +GCGTTA 2.85804209478e-05 3.08987999002e-05 +CAATGA 7.66791781526e-06 0.000238987882733 +CCCGCA 9.58489726907e-05 8.99091368229e-05 +CAATGC 8.05131370602e-05 0.000173686295315 +GCGTTG 7.31937609638e-05 5.2480178181e-05 +CAATGG 8.29529290923e-05 0.000215654510643 +CCACAG 0.00061064509147 0.000501707317967 +GGAAGT 0.000281273167132 0.000344187147342 +CCACAC 0.000234220035084 0.000340205343231 +CCACAA 0.000214701698827 0.000262082346576 +ATTTGT 0.000130703144578 0.000459181650063 +TCCTCG 0.0001551010649 0.000109499613048 +TGTATG 0.000134885645205 0.000368157608089 +CAATGT 0.000120246893012 0.000248464576517 +GCGTTT 7.94675119036e-05 7.17521100775e-05 +TCCTCC 0.000611690716626 0.000543994077624 +TGTATC 0.000126869185671 0.000236439528102 +CGTTAG 3.48541718875e-06 3.83049555463e-05 +TAACCG 0.0 3.63936895731e-05 +TCAAAC 0.000151615647711 0.000234528262129 +TCAGGG 0.000304276920578 0.000352787844221 +CGTTAC 7.98160536224e-05 3.12969803113e-05 +TAACCC 0.0 0.000154971815994 +CGTTAA 2.43979203213e-06 4.07736740951e-05 +TAACCA 0.0 0.000205779636449 +ACCGCT 9.2363555502e-05 6.47441348424e-05 +TCGCGA 1.42902104739e-05 1.94312040609e-05 +TAGTGT 0.0 0.000223299574536 +TCAGGA 0.000374333806072 0.000433459195507 +TTCGTT 8.08616787791e-05 7.39819203795e-05 +TACCCT 0.00025095003759 0.00020474436738 +TAACCT 0.0 0.000198851297296 +CGTTAT 5.19327161124e-05 3.42435153533e-05 +GTGAAA 0.000540239664257 0.000327861750487 +GTGAAC 0.000541633831132 0.000218521409603 +TGCATA 8.36500125301e-05 0.00023707661676 +ACCGCG 6.27375093976e-05 4.20478514105e-05 +ACCGCA 8.9575221751e-05 6.1319783307e-05 +CCCTAT 0.000307065254329 0.000140876229442 +ACCGCC 0.000174967942875 8.41753389033e-05 +AGATCC 0.000121292518169 0.00023635989202 +AGGCTT 0.000165557316466 0.000342275881368 +CCTTCT 0.000422432563277 0.00049430116232 +GGGACG 0.000102819807068 9.35723966049e-05 +TGTCGC 6.65714683052e-05 6.41070461846e-05 +GTTGCC 0.000239099619148 0.000188100426196 +GTGCCG 0.000129308977703 7.86804492303e-05 +GTGCCA 0.000342616509654 0.000282230275377 +GTTGCG 3.93852142329e-05 3.45620596821e-05 +GTGCCC 0.000531177579566 0.000258817267205 +CCGCCG 0.000171482525687 0.000160625977832 +AGGCTA 0.000109093558008 0.000213185792095 +CCTTCC 0.000417552979213 0.000590103369227 +AGGCTC 0.00025095003759 0.000330728649447 +CCGCCC 0.000243979203213 0.000209522532313 +TGTCGA 5.12356326747e-05 4.34016648082e-05 +CCGCCA 0.000178453360064 0.000116109407872 +AGGCTG 0.000492140907052 0.000565018003329 +GTGCCT 0.00044927027563 0.000341638792711 +TCCAAA 0.000486215697831 0.000351035850412 +GTTGCT 0.000269074206972 0.000292423693901 +GGATAT 0.000137325437237 0.000173447387068 +TCGGGT 5.54181333012e-05 6.43459544313e-05 +ATTTGA 1.04562515663e-05 0.00036783906376 +GCAGTT 0.000240842327743 0.000272514673346 +GAAGCT 0.000700220313221 0.000397622958509 +GGATAA 2.43979203213e-06 0.000170739760273 +GGATAC 0.000128960435984 0.000129647541849 +TCGGGG 0.000115715850667 9.30149440294e-05 +GGATAG 2.43979203213e-06 0.000139044599551 +GAAGCG 0.000159980648964 9.23778553716e-05 +CCGCCT 0.000164511691309 0.000145893302621 +GCAGTG 0.000533268829879 0.000400808401798 +GAAGCC 0.000871354297188 0.000405666202813 +GCAGTA 0.000173225234281 0.000185074255072 +GAAGCA 0.000630511969445 0.000425973403778 +GCAGTC 0.000258617955405 0.000235802439444 +CGCCCG 5.36754247068e-05 0.000113481417159 +GAGAGA 0.000548256123791 0.000690843013232 +CGCCCC 0.000125823560514 0.00017711064685 +CGCCCA 0.000127566269108 9.59614790714e-05 +AGTTTG 0.000179150443502 0.0003685557885 +ACGCAG 0.000172876692562 8.1865892519e-05 +ACGCAC 7.98160536224e-05 6.16383276359e-05 +ACGCAA 3.27629215743e-05 4.23663957394e-05 +CTCGCA 7.07539689317e-05 6.43459544313e-05 +ACGTAC 8.36500125301e-05 2.81911731048e-05 +CCCTAG 1.08047932851e-05 0.000199169841625 +ACGCAT 3.38085467309e-05 4.39591173837e-05 +AGTTTC 0.000206336697574 0.000323481765965 +GCCGGG 0.00018298440241 0.00018356116951 +TACCCG 0.000101077098474 3.99773132729e-05 +GCCGGC 0.000241190869462 0.000144459853142 +CATGTT 0.000181241693815 0.000333277004078 +GCCGGA 0.000111881891759 9.93858306067e-05 +TTGGGG 0.000208079406169 0.000371661595706 +AGATCT 0.000134885645205 0.000303413473246 +ACGTAG 1.3941668755e-06 4.4436933877e-05 +GCCACT 0.000441602357815 0.000282309911459 +CATGTA 0.000120595434731 0.000272673945511 +GTTGCA 0.000189606695068 0.000183162989099 +CATGTC 0.000258269413687 0.000255074371341 +GCCGGT 7.70277198714e-05 6.96815719398e-05 +CATGTG 0.000403611310458 0.000418407975968 +GTGCGA 0.000107350849414 3.93402246152e-05 +TACCCC 0.000235962743679 0.000148441657252 +GATTGG 0.000128960435984 0.000173686295315 +GGCCTG 0.000825695332016 0.000426212312025 +GGCCTC 0.000525600912064 0.000364812892636 +GGCCTA 0.00013627981208 0.000140637321195 +TTTGTT 0.00031926421449 0.000944085754682 +AATCGG 9.20150137831e-05 4.07736740951e-05 +AATCGC 8.26043873734e-05 3.92605885329e-05 +AATCGA 7.31937609638e-05 3.83845916286e-05 +TTCCCA 0.000379213390136 0.00049645133654 +GGCCTT 0.000313687546988 0.000327941386569 +TCACTA 8.99237634698e-05 0.000188737514854 +TCACTC 0.000174967942875 0.000269488502222 +AATCGT 5.61152167389e-05 4.02958576018e-05 +AGCCGA 0.000179498985221 9.04665893984e-05 +CCTTCA 0.000308110879486 0.00039443751522 +AGCCGC 0.000302185670265 0.000134107162453 +TAATTG 0.0 0.000218362137439 +AAAAAT 0.000454498401413 0.000746667906866 +AGCCGG 0.000293472127293 0.000144141308813 +GTTTTT 0.000186469819598 0.000672526714322 +TGGTGA 1.32445853173e-05 0.000355097290605 +GATGGA 0.000664320516176 0.00034665586589 +TGGTGC 0.000122338143325 0.000274346303237 +CCAAGT 0.000246418995245 0.000302139295931 +GGGCTG 0.000620752801317 0.00046985288508 +TGGTGG 0.000192743570538 0.000429557027478 +GATGGG 0.000633300303196 0.00027155904036 +AAAAAA 0.000459726527197 0.00164687418025 +AAAAAC 0.000430100481092 0.000494619706649 +GAGCGA 0.000231083159614 9.1422222385e-05 +AGCCGT 0.000139765229269 8.28215255056e-05 +AAAAAG 0.000777596574811 0.000579033953799 +TGACAG 6.97083437751e-07 0.000386792451328 +CCAAGG 0.000225506492112 0.000370467054473 +CTACAT 9.34091806586e-05 0.000235085714704 +TCTCAG 0.000571259877237 0.000522014518932 +CCAAGC 0.000261057747438 0.000303413473246 +TGGTGT 0.000131400228016 0.000383607008039 +CCAAGA 0.000224112325237 0.000383925552368 +AGAACT 0.000202851280385 0.000396348781194 +GCATTA 0.00010212272363 0.000154892179912 +CCTTCG 8.64383462811e-05 8.62458770409e-05 +TACCAT 0.000189955236787 0.000192798955047 +AGATAG 7.66791781526e-06 0.000204585095215 +CACTGT 0.000260709205719 0.000445563880004 +ACCCGA 0.000119201267855 6.681467298e-05 +ATGTCT 0.000346799010281 0.00035167293907 +ACCCGC 0.000154055439743 7.36633760507e-05 +AGATAA 1.1153335004e-05 0.00027299248984 +TACCAC 0.000357603803566 0.000186905884963 +AGAACG 5.50695915823e-05 7.86804492303e-05 +TACCAA 0.000196926071165 0.000202514557078 +AGAACA 0.000237356910554 0.000444528610935 +TACCAG 0.000637134262104 0.000214778513739 +AGAACC 0.000200411488353 0.000299033488725 +ATGTCA 0.000279879000257 0.00030532473922 +CACTGG 0.000296957544482 0.000408692373937 +AGATAT 0.000107350849414 0.00025180929197 +TGACAA 3.48541718875e-07 0.000296644406258 +ACCAGG 0.000288592543229 0.000339488618491 +CACTGC 0.000326235048867 0.000361707085429 +ATGTCG 9.2363555502e-05 4.11718545062e-05 +CACTGA 2.92775043855e-05 0.000410125823417 +TAGGTT 0.0 0.000173288114904 +GTTTAC 0.000139765229269 0.000193595315869 +AATTAT 0.00016730002506 0.000342275881368 +CTGTTG 0.00043079756453 0.000399295316236 +TGCTAT 0.000179498985221 0.000245279133228 +CTGTTC 0.000452755692819 0.000380103020421 +CGGGTT 7.45879278393e-05 6.80092142133e-05 +CTGTTA 0.000149872939116 0.000281433914555 +AATTAC 0.000211216281638 0.000191126597321 +AATTAA 1.01077098474e-05 0.000395233876043 +AATTAG 4.1825006265e-06 0.000188498606607 +TAGGTC 0.0 0.000125745373821 +CGGGTC 0.000161374815839 8.49716997255e-05 +CTGTTT 0.000415810270618 0.000570911073413 +CGGGTA 6.83141768996e-05 3.72696864775e-05 +CGGGTG 0.000243282119775 0.000105915989349 +CCAAAC 0.00026663441494 0.000261604530083 +CTTCGG 0.000238402535711 8.87145955897e-05 +CACACT 0.000291032335261 0.000347133682384 +TGGATG 0.000283015875727 0.000366087069951 +AGCAAA 0.000496671949397 0.000401764034784 +ATATAT 0.000103865432225 0.000573937244537 +AGCAAC 0.000554878416449 0.000254357646601 +TTAGGC 7.59820947148e-05 0.000124869376916 +AAAGCT 0.000514099035341 0.000406382927553 +AGCAAG 0.000770277198714 0.000415222532679 +AAAGCA 0.000485867156112 0.000516360357095 +ATATAG 9.06208469076e-06 0.000183959349921 +TTAGGT 7.94675119036e-05 0.000171456485013 +ATATAA 7.66791781526e-06 0.000344824235999 +AGCAAT 0.000353769844658 0.000248384940435 +ATATAC 8.3998554249e-05 0.000201718196255 +ATAGCA 0.000172179609124 0.000210717073546 +ATAGCC 0.000181241693815 0.000166837592244 +TACTTG 0.000253738371341 0.00024320859509 +ATAGCG 3.3460005012e-05 2.8987533927e-05 +TCTCAC 0.000202154196948 0.000307076733028 +TGCTAA 1.49872939116e-05 0.000257702362054 +ATAGCT 0.000171133983968 0.00021087634571 +TACTTT 0.000310899213237 0.000365051800882 +GTTTTC 0.000215747323984 0.000445723152168 +CGAGTT 9.55004309718e-05 6.55404956645e-05 +CTCGCC 0.000149175855679 0.000116666860448 +CGTACT 3.79910473574e-05 3.00228029958e-05 +TCCCGT 7.84218867469e-05 8.44938832322e-05 +ACATAC 0.000180196068659 0.000234050445636 +CTTAAG 0.000209822114763 0.000240421332213 +ACATAA 8.36500125301e-06 0.000254357646601 +CTTAAA 0.000221323991486 0.000349761673097 +AGGGGT 0.000104213973944 0.000231820635333 +CTTAAC 0.000149524397398 0.000180534998386 +CGTACA 4.14764645462e-05 3.6712233902e-05 +CGAGTG 0.000240493786024 8.09102595324e-05 +TTGTTA 0.0001052595991 0.000305484011384 +CGAGTA 7.59820947148e-05 3.60751452443e-05 +CGAGTC 0.00013627981208 6.70535812267e-05 +AGGGGA 0.00014255356302 0.000355495471016 +CTTAAT 0.000151615647711 0.000229431552867 +AGGGGC 0.000221672533205 0.000288760434119 +TATGCA 0.000296260461044 0.000237872977582 +AGGGGG 8.33014708112e-05 0.000250216570326 +TCATTA 9.2363555502e-05 0.000251968564134 +CGACTT 7.66791781526e-05 5.98863338271e-05 +TCGGGC 0.000142902104739 8.32993419989e-05 +TTGTAA 9.06208469076e-06 0.000380899381244 +TTGTAC 0.000165557316466 0.000225449748756 +GTACCT 0.00016102627412 0.000177429191179 +TCATTG 0.000128263352546 0.000286928804228 +TGAATC 0.0 0.000215335966315 +CGACTA 4.98414657992e-05 2.54039102272e-05 +TGAATA 0.0 0.000313845800017 +CGACTC 0.000100380015036 5.58248936341e-05 +TGAATG 0.0 0.000392526249247 +AAGTAT 0.000357603803566 0.000272196129017 +CGACTG 0.000162071899277 6.84073946243e-05 +TTGCCA 0.000251298579309 0.000316792335059 +TTGTAT 0.000137673978956 0.000394596787385 +ATCGTG 0.000320658381365 5.92492451694e-05 +GCTTCG 9.27120972208e-05 8.15473481901e-05 +ATCGTC 0.000197274612883 4.07736740951e-05 +ATCGTA 5.61152167389e-05 3.08987999002e-05 +TGGGAG 0.000450315900787 0.00047128633456 +TGGGAA 0.00034087380106 0.00052336833233 +TGGGAC 0.000352375677783 0.000302537476342 +GCTTCT 0.000376425056385 0.000438316996522 +ATCGTT 9.93343898795e-05 5.00114596322e-05 +GATTGA 9.06208469076e-06 0.000173606659233 +TGGGAT 0.000307762337767 0.000347930043206 +GGGCTA 0.00010839647457 0.000172571390164 +CTAGCC 0.00021086773992 0.000198134572556 +CTAGCA 0.000151267105992 0.000204903639544 +CTAGCG 4.56589651727e-05 4.08533101773e-05 +CGGTAG 2.43979203213e-06 4.76223771657e-05 +ACTTAG 3.13687546988e-06 0.000204824003462 +ACTTAA 9.06208469076e-06 0.000258498722876 +CGGTAC 0.000127914810827 3.17747968046e-05 +ACTTAC 0.000184030027566 0.000174164111808 +CGGTAA 6.97083437751e-06 2.87486256803e-05 +GACCAG 0.000912133678297 0.000320296322676 +GACCAC 0.00046495465298 0.000200205110693 +TTGCCC 0.000224112325237 0.000240978784788 +CTAGCT 0.000175665026313 0.000215097058068 +TTCGAG 0.000241539411181 7.09557492553e-05 +ACTTAT 9.82887647228e-05 0.000173367750986 +CGGTAT 7.00568854939e-05 2.63595432138e-05 +TACACA 0.000324840881992 0.000335427178298 +TACACG 0.00014882731396 5.05689122077e-05 +GCGAGA 5.26297995502e-05 7.31855595574e-05 +GAGAGC 0.000790492618409 0.000358760550387 +GCGAGC 9.20150137831e-05 0.000115153774886 +GGCCGT 0.000152661272867 7.91582657236e-05 +GCGAGG 7.14510523694e-05 0.000113242508912 +TTAAGA 0.000103865432225 0.000350319125672 +CAGCCG 0.000245373370088 0.000158157259283 +CAGCCA 0.000695689270875 0.000516838173588 +CAGCCC 0.000712419273381 0.000475586682999 +TCCGGG 0.000107699391132 0.000120170848065 +GCGAGT 4.32191731405e-05 5.33561750853e-05 +CAGCCT 0.00073437740167 0.00059233317953 +GAACTC 0.000393852142329 0.000301183662944 +GAACTA 0.000220975449767 0.000187065157128 +GGACTT 0.000198668779759 0.000291945877407 +CACCTA 0.000142205021301 0.000175517925206 +AACTAT 0.000290335251823 0.000214937785903 +CACCTC 0.000356558178409 0.000333754820571 +CACCTG 0.000729149275887 0.000401764034784 +TGATCC 0.0 0.000195108401431 +AACACT 0.000322749631679 0.000316712698977 +TGATCA 0.0 0.000218123229192 +TGATCG 0.0 4.22867596572e-05 +TGCATG 0.000265240248064 0.000329215563885 +AACTAG 9.06208469076e-06 0.00016596159534 +GATTTA 0.000174967942875 0.000241934417775 +CACCTT 0.000219581282891 0.00031073999281 +AACTAC 0.000434282981719 0.000161024158243 +AACTAA 1.88212528193e-05 0.000234846806458 +AACACG 0.000162071899277 6.753139772e-05 +AACACA 0.000397686101237 0.000385916454423 +AACACC 0.00045554402657 0.000218123229192 +TGATCT 0.0 0.000271957220771 +CCAGCA 0.00050503695065 0.000518112350903 +CTATAC 6.76170934618e-05 0.000120091211983 +CAACAG 0.00049283799049 0.000303572745411 +GACAGT 0.00059496071412 0.00028629171557 +CAACAA 0.000257920871968 0.000298714944396 +CCAGCC 0.000674428226024 0.000572344522893 +CAACAC 0.000159980648964 0.000214698877657 +GGCCGG 0.000281273167132 0.000159829617009 +CTATAA 5.92520922088e-06 0.000207292722011 +GACAGG 0.000410930686554 0.000341001704053 +CAACAT 0.000146736063647 0.000221467944645 +GACAGC 0.000885644507662 0.000340762795806 +GACAGA 0.000431146106249 0.000428362486245 +GTATTA 6.06462590843e-05 0.000182366628277 +GTATTC 0.000112927516916 0.000168032133478 +GTAAGT 6.65714683052e-05 0.000155688540734 +CCAGCG 0.00013314293661 0.000140318776866 +AGAGGT 0.000171133983968 0.000319261053608 +GGTGGC 0.0005576667502 0.000333993728818 +TCCCCT 0.000358649428723 0.000407975649197 +GGTGGA 0.000336691300434 0.000305961827877 +CGTCAT 4.87958406425e-05 5.27190864276e-05 +GGTGGG 0.000291032335261 0.000397861866756 +AGAGGC 0.00026349753947 0.00041777088731 +GTAAGA 9.37577223775e-05 0.000204585095215 +AGAGGA 0.000291729418699 0.000554506040476 +CGCCCT 0.000126869185671 0.000125108285163 +AGAGGG 0.000202154196948 0.000395791328618 +GTAAGG 6.30860511164e-05 0.000151388192294 +CGTCAA 4.56589651727e-05 3.67918699842e-05 +CGTCAC 7.00568854939e-05 6.37885018558e-05 +TCCCCC 0.000253041287903 0.000341081340135 +GGTGGT 0.000328674840899 0.000276815021786 +CGTCAG 0.000138371062393 7.74859079971e-05 +GTCCGG 0.000172179609124 7.03186605975e-05 +GTCGCA 5.0190007518e-05 3.98976771907e-05 +GTCGCC 0.000115018767229 7.10353853375e-05 +CCGTCT 0.000103516890506 8.70422378631e-05 +TCGGTA 3.76425056385e-05 3.20137050512e-05 +CTCGCG 3.3460005012e-05 5.09670926188e-05 +TCGGTG 0.000196228987727 8.64051492054e-05 +GTCGCT 6.23889676787e-05 6.71332173089e-05 +TACCTA 0.000126520643952 0.000158236895365 +GTAACT 0.000123383768482 0.000202275648831 +CCGTCG 5.2978341269e-05 3.22526132979e-05 +CCGTCA 7.52850112771e-05 5.89307008405e-05 +CCGTCC 0.000146387521928 9.29353079472e-05 +GGCTGT 0.000347147552 0.000408692373937 +CGCGTG 0.000143250646458 5.00910957144e-05 +CTATAT 6.41316762731e-05 0.000190648780827 +CGCGTC 6.48287597108e-05 4.6268563768e-05 +CGCGTA 1.70785442249e-05 1.20250484148e-05 +GTTTTA 0.000139765229269 0.000424539954298 +ATGCAG 0.000694992187437 0.000337497716435 +CCACTA 0.000102819807068 0.000156166357227 +CCACTC 0.000236311285397 0.00027840774343 +CGCGTT 1.77756276626e-05 2.50853658983e-05 +GGCTGG 0.000459726527197 0.000536349013731 +GGCTGA 1.6730002506e-05 0.000374369222502 +GGCTGC 0.000468440070168 0.000442139528468 +ATGCAT 0.000187863986474 0.000278965196006 +TTTAGA 0.00011153335004 0.0003304897412 +TTTCAT 0.000177407734908 0.000482196477824 +TTGACC 0.000174270859438 0.000187304065374 +ATGACG 0.000138022520675 5.41525359075e-05 +GGAGTT 0.000226900658988 0.000272355401182 +ATGACA 0.000329023382618 0.000293777507298 +ATGACC 0.000387926933108 0.000199090205542 +CGGGCC 0.000330417549494 0.000141115137688 +GGGCCT 0.000342965051373 0.000313686527852 +TAAGAA 0.0 0.000375961944146 +ATGACT 0.000308459421205 0.000274585211484 +GGAGTA 0.000141507937863 0.000148999109828 +GGAGTC 0.000280924625414 0.000259852536274 +GGAGTG 0.000376773598104 0.000305643283549 +GGGCCG 0.000118155642699 0.000142548587168 +GGGCCC 0.00039036672514 0.000253561285779 +GGGCCA 0.000284410042602 0.000282947000117 +GGCAAG 0.000831271999518 0.000277850290855 +ACGAGT 5.92520922088e-05 4.88965544812e-05 +AGGCCA 0.000253041287903 0.000404710569826 +AGGCCC 0.000246070453526 0.000287247348556 +AGGCCG 8.99237634698e-05 0.000110853426446 +ACGAGG 6.51773014297e-05 7.93175378881e-05 +ACGAGC 7.59820947148e-05 5.55063493052e-05 +GACCCG 0.000185424194442 8.97498646585e-05 +ACGAGA 4.35677148594e-05 6.10012389782e-05 +GGCAAC 0.000483078822361 0.000144300580977 +AGGCCT 0.000260709205719 0.000355176926688 +AGACGG 9.58489726907e-05 8.57680605476e-05 +GGGTTG 0.000135234186924 0.000225210840509 +TTTGGG 0.000453452776257 0.000409249826513 +GGGTTA 7.24966775261e-05 0.000136098064509 +GGGTTC 0.000153706898024 0.000205700000366 +TTTGAT 0.000640619679293 0.000384323732779 +TGAACT 0.0 0.000373811769926 +CTACGA 6.37831345542e-05 3.04209834069e-05 +ATTGCT 0.000459029443759 0.000302776384589 +GGGTTT 0.000174967942875 0.000337816260764 +TACAAG 0.000603674257092 0.000222981030207 +TGCGAG 0.000167648566779 6.64164925689e-05 +AACGGC 0.00019832023804 5.08874565366e-05 +ATCCAT 0.000273953791036 0.000246155130133 +ATTGCA 0.000301488586827 0.000214619241575 +TTTGGC 0.00046809152845 0.000264710337289 +ATTGCG 7.80733450281e-05 2.74744483648e-05 +TTTGAC 0.000649333222265 0.00025682636515 +TTTGAA 0.000781430533718 0.000574415061031 +CGGGCT 0.000198668779759 0.000129966086178 +CTCGTA 4.1825006265e-05 3.16951607223e-05 +CTCGTC 0.000124777935357 7.01593884331e-05 +CCCTGG 0.00033460005012 0.000546462796173 +CCCTGA 2.71862540723e-05 0.000394915331714 +AAGCTT 0.000373985264353 0.0003340733649 +CCCTGC 0.000330766091213 0.000490956446867 +GTGTAA 1.53358356305e-05 0.000225529384838 +CCCTGT 0.000226900658988 0.000430831204794 +AAGCTC 0.000569517168642 0.000277292838279 +GATATA 0.000166951483341 0.0001652448706 +AAGCTA 0.000277439208225 0.000235722803362 +GATATG 0.000302882753703 0.000166359775751 +AAGCTG 0.00130947123781 0.000479011034535 +TGACGC 3.48541718875e-07 5.37543554964e-05 +TACTCT 0.000196926071165 0.000233094812649 +CATCGT 5.82064670522e-05 6.7929578131e-05 +GTAGTC 0.000100380015036 0.000120569028476 +CAGGGT 0.000347147552 0.000334630817476 +CCAGTC 0.000291729418699 0.000291229152667 +TACGAT 0.00013314293661 2.68373597071e-05 +CCAGTG 0.000535708621911 0.000426053039861 +TACTCG 9.65460561285e-05 3.45620596821e-05 +CATCGA 7.07539689317e-05 5.4709988483e-05 +CATCGC 7.66791781526e-05 6.45848626779e-05 +TACTCC 0.00026663441494 0.000156484901556 +TACTCA 0.000163117524434 0.000199090205542 +CATCGG 0.000105956682538 5.9089973005e-05 +CAGGGG 0.000340525259341 0.000328658111309 +CCAGTT 0.000239796702586 0.000304209834069 +CAGGGC 0.00065072738914 0.000401126946127 +AATCTG 0.000335297133558 0.000291866241325 +CGGACT 0.000125475018795 6.33106853625e-05 +AATCTC 0.000235962743679 0.000237395161089 +AATCTA 0.000122686685044 0.000199966202447 +TGAACG 0.0 6.5699767829e-05 +TCACAC 0.000136976895518 0.000279681920746 +TCACAA 0.000142205021301 0.000281354278472 +TAAATT 0.0 0.000415142896597 +GCCATA 0.000197274612883 0.00015767944279 +CGGACG 5.82064670522e-05 3.87031359574e-05 +ACATCG 5.96006339277e-05 5.16838173588e-05 +CGGACA 0.000152312731149 7.28670152285e-05 +CGGACC 0.000172876692562 7.25484708996e-05 +GCCATG 0.000876233881253 0.000319818506183 +TTTTAA 1.42902104739e-05 0.000959216610303 +TCACAT 0.00011467022551 0.000299033488725 +ATTAAC 0.000185075652723 0.00016811176956 +ATTAAA 0.00032553796543 0.000538738096198 +ATTAAG 0.000295563377606 0.000214539605492 +TTTTAC 0.000181590235534 0.000344346419506 +TCACTT 0.000148130230522 0.000328897019556 +ATTCGT 7.14510523694e-05 4.64278359325e-05 +GTGCAG 0.000848002002024 0.000333197367996 +TTACTA 7.49364695582e-05 0.0001997272942 +ATTAAT 0.000188561069912 0.000292582966065 +TACCTG 0.000773414074184 0.000272594309429 +GGTATG 0.000103865432225 0.000141592954182 +CTGAAT 0.000460075068915 0.000326746845336 +GGTATC 0.000135234186924 0.000105836353266 +CGAGAG 0.000388275474827 9.89876501956e-05 +CTGAAA 0.000677913643212 0.000488089547908 +CTGAAC 0.000616918842409 0.000309306543331 +TCGACC 4.21735479839e-05 3.57566009154e-05 +GGAACC 0.00025408691306 0.000246632946626 +CTGAAG 0.00155519314962 0.000567884902289 +CATTGC 8.57412628433e-05 0.000232298451827 +TATGAG 0.000664669057895 0.00019757711998 +CATTGA 1.01077098474e-05 0.000245597677557 +CTAGTT 7.77248033092e-05 0.00018642806847 +CATTGG 6.65714683052e-05 0.000226086837414 +TCAACT 0.000147084605365 0.000228316647716 +ATGTTA 0.000114321683791 0.000263117615645 +ACACCG 9.93343898795e-05 7.74062719149e-05 +CTGCCA 0.000482381738923 0.000474073597437 +CTGCCC 0.000847304918586 0.000510626559175 +ACACCC 0.00031612733902 0.000245597677557 +ACACCA 0.000321704006522 0.000299829849547 +CTGCCG 0.000238053993992 0.000155847812899 +CTAGTG 0.000209822114763 0.000180136817975 +TCAACC 0.000138719604112 0.000170580488109 +ATGTTC 0.000369454222008 0.000279761556828 +CTAGTC 9.13179303453e-05 0.00013864641914 +TCAACG 3.58997970442e-05 4.27645761505e-05 +CTAGTA 5.78579253333e-05 0.000113959233652 +CATTGT 9.34091806586e-05 0.000343550058684 +TATGAC 0.000524206745188 0.000159192528352 +GTAATG 0.000110836266602 0.000184118622086 +ACACCT 0.000292775043855 0.000265506698111 +CTGCCT 0.00067547385118 0.000753357337772 +CCTCGT 7.42393861204e-05 8.77589626031e-05 +TTGTGT 0.000129308977703 0.000503140767447 +AGACTG 0.000282318792289 0.000418009795557 +AGACTC 0.00017043690053 0.000291945877407 +AGACTA 9.20150137831e-05 0.000198532752967 +CCTCGG 0.000231431701333 0.000153060550021 +CCTCGC 0.00011780710098 0.000118339218174 +CCCGAC 0.000200760030072 7.35041038862e-05 +CCTCGA 0.000168694191936 7.69284554216e-05 +CGAGGA 0.000165557316466 0.00012319701919 +AGACTT 0.000165905858185 0.0003649721648 +CGAGGG 0.000160329190683 0.000113242508912 +CACCGT 9.58489726907e-05 8.6644057452e-05 +ACTTTA 0.000134537103486 0.000310899264975 +ACTTTG 0.000238402535711 0.000401445490456 +TAGCTA 0.0 0.000160625977832 +GACCTG 0.00128193644202 0.000348726404028 +GTCGCG 2.47464620401e-05 3.55176926688e-05 +GACCTA 0.000239099619148 0.000156166357227 +GACCTC 0.000680701976963 0.000252127836299 +CACCGG 0.000214353157108 9.5802206907e-05 +CTATTG 7.38908444016e-05 0.000176234649946 +CACCGA 0.000145341896771 8.16269842724e-05 +ACTTTT 0.000200411488353 0.00046770271086 +GACCTT 0.000418947146088 0.000273629578497 +GGACGC 9.55004309718e-05 8.65644213698e-05 +GGACGG 0.000123035226763 9.02276811518e-05 +CTATTA 4.11279228273e-05 0.00016237797164 +CACAGC 0.000533617371598 0.000453925668636 +CTTATC 0.000174270859438 0.000141831862428 +CACAGA 0.000270468373847 0.000530535579729 +CTTATA 7.94675119036e-05 0.000175916105617 +CACAGG 0.000302882753703 0.00044038753466 +CTTATG 0.000141507937863 0.000191126597321 +ACTCAT 0.0001551010649 0.000231024274511 +CGTATA 1.3941668755e-05 2.6439179296e-05 +GACGTA 6.69200100241e-05 3.07395277357e-05 +CGTATC 5.54181333012e-05 2.5324274145e-05 +CTATTC 6.86627186184e-05 0.000162457607723 +GAGGAC 0.00130284894516 0.00033909043808 +CGTATG 3.69454222008e-05 3.61547813265e-05 +AGCGAT 0.000146736063647 5.20023616877e-05 +TTAACA 0.000129657519422 0.000271957220771 +ACTCAC 0.000231431701333 0.000258100542465 +TTAACC 8.3998554249e-05 0.000174164111808 +ACTCAA 0.000174270859438 0.000242969686844 +ACTCAG 0.000403611310458 0.000390694619356 +CTTATT 0.000159632107245 0.000275779752717 +CACAGT 0.000319612756209 0.000394198606974 +AGCGAC 0.000240842327743 6.06030585671e-05 +TAAACA 0.0 0.000364175803978 +AGCGAA 0.000128263352546 6.23550523759e-05 +GAGGAT 0.00093583451518 0.000274266667155 +AGCGAG 0.000362134845911 0.000121922841874 +CGTATT 4.53104234538e-05 4.12514905884e-05 +TAAACG 0.0 5.35154472498e-05 +AGTTCC 0.000222718158361 0.000294414595956 +GTATGT 0.000103168348787 0.000291388424832 +AGTTCA 0.000198668779759 0.000335108633969 +TACTTA 0.000128960435984 0.000206575997271 +AGTTCG 3.3460005012e-05 4.97725513856e-05 +TAGCCC 0.0 0.000191684049896 +CTGTGA 3.76425056385e-05 0.00055140023327 +CGGGGT 0.000132794394891 8.89535038363e-05 +TAGGAC 0.0 0.000149238018075 +GCAAAG 0.000432540273124 0.00032220758865 +TAGGAA 0.0 0.0002995909413 +GCAAAA 0.000284410042602 0.000289477158859 +TAGGAG 0.0 0.000233015176567 +GCAAAC 0.000227249200707 0.000213185792095 +CGGGGC 0.000264543164626 0.000184835346825 +CGGGGA 0.00013314293661 0.00013936314388 +CTGTGT 0.000375727972948 0.00067603070194 +CGGGGG 9.30606389397e-05 0.000108145799651 +GTATGC 6.34345928353e-05 0.000128691908863 +AGTTCT 0.000296609002763 0.000394198606974 +GCAAAT 0.000254435454779 0.000240819512624 +TAGGAT 0.0 0.00018356116951 +TAATTA 0.0 0.000289317886694 +CTATTT 0.000105956682538 0.000352150755563 +TGCTTT 0.000243979203213 0.000600933876409 +GTCTAT 0.000215050240546 0.000164607781942 +GTAATT 0.000106653765976 0.000216530507548 +GTTACC 0.000140810854426 0.000139681688208 +GTTACA 0.000104213973944 0.000201957104502 +TAATTC 0.0 0.00020259419316 +GTTACG 2.40493786024e-05 2.88282617625e-05 +TATCGC 5.15841743935e-05 2.49260937339e-05 +TATAGT 7.98160536224e-05 0.000183800077757 +GTCTAA 6.97083437751e-06 0.000153219822185 +AGGACG 7.84218867469e-05 9.34927605227e-05 +GTCTAC 0.000340176717622 0.000156803445885 +TGCTTC 0.000360740679036 0.000427008672847 +TTCAAA 0.000423129646715 0.000471604878889 +GGCGTT 7.66791781526e-05 5.7258343114e-05 +GGGCTC 0.000314036088707 0.000312332714455 +TATAGC 0.000121989601606 0.000143106039744 +TATCGT 3.83395890763e-05 3.08987999002e-05 +TATAGA 9.2363555502e-05 0.000228635192045 +AGTGGG 0.000372939639197 0.000341877700957 +TATAGG 6.37831345542e-05 0.000132992257302 +GTGGCG 0.000211913365076 0.000110853426446 +GTTACT 0.000137325437237 0.000194471312774 +GCTCTC 0.000362134845911 0.000346018777233 +AACCTT 0.00029765462792 0.000293618235134 +GATGGC 0.000770974282152 0.000268771777482 +GCTCTG 0.000773762615903 0.000542003175568 +CCACGT 6.41316762731e-05 0.000103049090389 +CAATAT 0.000108047932851 0.000197895664309 +CAGGAG 0.00162629566027 0.000584210299143 +GAATGG 0.000281273167132 0.000276894657868 +GAATGA 1.98668779759e-05 0.000306360008289 +ACTACT 0.000125126477076 0.000171775029342 +GAATGC 0.000234568576803 0.000212389431273 +CCACGA 7.87704284658e-05 7.83619049015e-05 +CAATAC 0.000104911057381 0.000116427952201 +CCACGC 0.000115715850667 0.000108942160473 +CAATAA 5.576667502e-06 0.000300785482533 +GTTCCG 5.43725081445e-05 5.8373248265e-05 +CAATAG 4.53104234538e-06 0.000133549709878 +CCACGG 0.0001551010649 0.000118737398585 +AACCTA 0.000180544610377 0.000172810298411 +GAATGT 0.000413021936867 0.000328737747391 +ACTACC 0.000157192315213 0.000131399535658 +GGCTTC 0.000750410320739 0.000366166706033 +ACTACA 0.000161374815839 0.000211354162204 +ACTACG 4.07793811084e-05 3.16155246401e-05 +GGCTTG 0.000349238802313 0.000302139295931 +CGTTGA 2.788333751e-06 5.12060008655e-05 +AGTCAG 0.000366317346538 0.000354221293701 +CGTTGC 5.12356326747e-05 5.43118080719e-05 +TAACAC 0.0 0.000168032133478 +TCCTAC 0.00037816776498 0.000201479288009 +AGTCAC 0.000184727111004 0.000271001587784 +CGTTGG 4.7401673767e-05 6.23550523759e-05 +AGTCAA 0.000161374815839 0.000234448626047 +TCATCG 4.91443823614e-05 6.08419668137e-05 +TGAGGG 0.0 0.000327463570076 +TCATCC 0.000226900658988 0.00023492644254 +GAGCGG 0.0004524071511 0.000137531513989 +CTTTCT 0.000318218589333 0.000708203679155 +AGTCAT 0.000147084605365 0.000262878707398 +TCCTAT 0.000272211082442 0.000215256330232 +TAACAT 0.0 0.000240421332213 +CGTTGT 4.56589651727e-05 5.99659699093e-05 +CTTTCC 0.000276393583068 0.000508715293202 +CTTTCA 0.000194834820851 0.000395074603878 +CTTTCG 4.11279228273e-05 6.60979482401e-05 +GAGCGC 0.000432540273124 0.000126143554232 +GTTGAT 0.000255481079936 0.000186189160223 +CGACCA 8.53927211245e-05 4.84983740701e-05 +CGACCC 9.30606389397e-05 6.76906698844e-05 +GCATAC 0.000136976895518 0.000122241386203 +CGACCG 1.98668779759e-05 2.85097174337e-05 +CCTTAT 0.000212261906795 0.000186268796305 +GTTGAC 0.000250252954152 0.000145574758293 +GTTGAA 0.000332857341526 0.000241456601282 +ACTGGG 0.000348193177156 0.000343151878273 +GTTGAG 0.000262800456032 0.000222184669385 +TTAGGG 8.6786888e-05 0.000181172087044 +CCTTAC 0.000263148997751 0.000177907007673 +TTCGAT 0.00013627981208 3.6712233902e-05 +CCTTAA 8.36500125301e-06 0.000231422454922 +CCTTAG 8.36500125301e-06 0.000215415602397 +CGACCT 9.30606389397e-05 6.01252420738e-05 +CAACTC 0.000174619401157 0.000216769415794 +CAACTA 8.36500125301e-05 0.000142309678922 +CAACTG 0.000315081713863 0.000257941270301 +AATAGG 7.38908444016e-05 0.000156325629392 +GATGGT 0.00041825006265 0.000246712582708 +GGGGTT 0.00013941668755 0.000226485017825 +GGATGT 8.99237634698e-05 0.000256189276492 +AATAGT 0.000140462312707 0.000192958227212 +GGGCTT 0.000185075652723 0.000295370228943 +CAACTT 0.00016416314959 0.000269647774387 +GGATGC 9.09693886265e-05 0.000258180178547 +GGATGA 7.66791781526e-06 0.000260728533178 +GGATGG 0.000121292518169 0.00032077413917 +GGGGTA 8.78325131566e-05 0.000124232288258 +TCTCCG 0.00010839647457 0.000110694154282 +GAGTCC 0.000502248616899 0.000242014053857 +AAACTG 0.000616918842409 0.000443174797537 +TCTCCC 0.000295563377606 0.000438157724358 +AAACTA 0.000191000861944 0.000276416841375 +TCTCCA 0.000507825284401 0.00049860151076 +AAACTC 0.000372591097478 0.000297520403162 +ACGCGA 1.6730002506e-05 1.58475803612e-05 +AGTACG 2.99745878233e-05 2.8270809187e-05 +ACGCGC 5.08870909558e-05 3.99773132729e-05 +AGTACC 0.000176362109751 0.000145972938704 +ACGCGG 4.98414657992e-05 4.39591173837e-05 +AGTACA 0.000150570022554 0.000208805807573 +AAACTT 0.000344707759968 0.000384562641026 +CCCTTC 0.00052385820347 0.000404630933744 +TCTCCT 0.000427312147341 0.000552913318832 +CCCTTG 0.000309505046361 0.000328498839145 +TTCTAT 0.000298700253076 0.000325472668021 +AGTACT 0.000133491478329 0.000199249477707 +ACGCGT 1.91697945381e-05 2.2775919514e-05 +GCCGAA 0.000147084605365 5.32765390031e-05 +GTGGAG 0.00147084605365 0.000389181533794 +GCCGAC 0.000237705452273 5.27190864276e-05 +GTGGAC 0.000932349097991 0.000235165350787 +GCCGAG 0.000472622570795 0.000140239140784 +GTGGAA 0.000819073039357 0.000339727526737 +TATCAT 0.000104562515663 0.000215335966315 +GTGGAT 0.000757032613397 0.000262082346576 +GCCATC 0.000975219729413 0.000271160859949 +GCCGAT 0.000127914810827 3.66325978198e-05 +CCTATT 0.000151615647711 0.000172412118 +ACCCGG 0.000220626908048 9.54040264959e-05 +ACCGAG 0.000286152751197 9.23778553716e-05 +AAATCG 5.85550087711e-05 5.55063493052e-05 +CACGCG 5.99491756466e-05 4.18885792461e-05 +TTATCG 2.05639614136e-05 3.29693380378e-05 +ACCGAC 0.000175665026313 4.6268563768e-05 +AAATCC 0.000313339005269 0.000276337205293 +ACCGAA 0.00011153335004 5.75768874428e-05 +AAATCA 0.000283364417446 0.000381695742066 +GGCTTT 0.000500157366586 0.000394994967796 +CTTTGA 1.04562515663e-05 0.000431627565616 +AGATAC 0.000184727111004 0.000191763685978 +TCCCGA 0.000146387521928 8.99091368229e-05 +CACGCT 9.13179303453e-05 8.08306234502e-05 +AAATCT 0.000323795256835 0.000374767402913 +ACCGAT 9.7940223004e-05 3.59158730798e-05 +GTAGTT 0.000109442099727 0.000169784127286 +ACCAGT 0.000327977757462 0.000229033372456 +GATGAT 0.000905511385638 0.000230387185854 +GGAGCA 0.000351330052626 0.000363299807074 +GGAGCC 0.000534662996755 0.000422628688325 +CCGGCT 0.000116412934104 0.0001271788233 +CCAAAT 0.00026663441494 0.00028915861453 +GGAGCG 0.000112927516916 0.000145654394375 +AAAACT 0.000349238802313 0.000444687883099 +GATGAC 0.00110905974946 0.000215097058068 +TGTCCC 0.000217490032578 0.000369033604993 +GATGAA 0.00125440164623 0.000315199613415 +GATGAG 0.00134850791033 0.000273470306333 +AAAACC 0.000376076514666 0.000368396516336 +AAAACA 0.000413370478586 0.000698886257536 +AAAACG 0.000107699391132 9.28556718649e-05 +GGAGCT 0.000413719020305 0.000415700349172 +CCAAAG 0.00048029048861 0.00038615536267 +TCGCGG 3.20658381365e-05 4.18089431639e-05 +CTAGAG 0.000415810270618 0.000265506698111 +TCGCGC 4.32191731405e-05 4.22867596572e-05 +CTACCG 4.39162565783e-05 4.41980256304e-05 +GCCACA 0.00054860466551 0.000330171196871 +AGATCA 0.000115715850667 0.000248066396106 +GCCACC 0.000798857619662 0.000357247464825 +AGAAAT 0.000326583590586 0.000543675533295 +TCCCGG 0.000237705452273 0.000138088966564 +GCGCTT 6.55258431486e-05 7.30262873929e-05 +GCCACG 0.000251995662747 0.000106632714088 +AGATCG 3.20658381365e-05 5.13652730299e-05 +ACCAGC 0.00061064509147 0.000309386179413 +TCGCGT 1.15018767229e-05 2.07850174586e-05 +GTGCAA 0.000211564823357 0.000184038986003 +TCCCAT 0.000228643367582 0.000300944754698 +ACCCGT 7.73762615903e-05 5.6541618374e-05 +AGAAAG 0.000402565685301 0.000662811112291 +CTAGAC 0.000236659827116 0.000159192528352 +AGAAAC 0.00030985358808 0.000476621952068 +ATGTCC 0.000358649428723 0.000240341696131 +AGAAAA 0.000381653182168 0.000860467868354 +CTACCC 0.000150570022554 0.000194152768445 +ATGTAC 0.000345056301687 0.00020187746842 +GACCGA 0.000218535657735 5.87714286761e-05 +ATGTAA 1.49872939116e-05 0.000349124584439 +TATGTG 0.000493535073927 0.000348487495781 +ATGTAG 9.75916812851e-06 0.000250694386819 +CCTATA 6.86627186184e-05 0.000140637321195 +CGAAGT 8.60898045622e-05 5.22412699343e-05 +CCGGCA 9.65460561285e-05 8.33789780811e-05 +GACGAC 0.00035934651216 4.96929153034e-05 +CCTATC 0.000168345650217 0.000131399535658 +ATGTAT 0.000238402535711 0.000394357879138 +ATTAGA 8.85295965943e-05 0.000193117499376 +GGCGTC 0.000148130230522 7.65302750105e-05 +GACCGG 0.000337388383871 7.48579172839e-05 +AGCACT 0.000358300887004 0.000356052923592 +TTAGAA 0.000339828175903 0.000355415834934 +CGGCTG 0.000498763199711 0.000164368873696 +TTAGAC 0.000165905858185 0.000148282385088 +CGGCTC 0.000230037534458 0.000129249361438 +TTAGAG 0.000258617955405 0.00024822566827 +CGGCTA 9.75916812851e-05 4.16496709995e-05 +AGCACC 0.00057335112755 0.000276655749622 +AGCACA 0.000434282981719 0.000378351026613 +AGCACG 0.000163814607871 8.64051492054e-05 +GGTCGG 9.06208469076e-05 6.00456059916e-05 +CGGCTT 0.000121989601606 9.36520326871e-05 +TTAGAT 0.000202154196948 0.000207133449846 +AAAGAC 0.000714859065413 0.000357008556578 +CGAAGC 0.000155798148337 8.05917152035e-05 +AAAGAA 0.00114356537963 0.000834028689058 +AAAGAG 0.000949776183935 0.000477179404644 +ATATCA 9.20150137831e-05 0.000207372358093 +GCGCTG 0.000425220897028 0.000140159504702 +TCGCTA 2.30037534458e-05 3.28100658734e-05 +TGTGAG 0.000616221758972 0.000465711808805 +TGTGAA 0.000474365279389 0.000486337554099 +CTACCT 0.000157192315213 0.000269169957893 +TGTGAC 0.000522115494875 0.000364653620471 +ATATCT 0.000117110017542 0.000244960588899 +CCAGCT 0.000538148413943 0.000500592412816 +AAAGAT 0.000739605527453 0.000417372706899 +TGTGAT 0.000362831929349 0.000371263415295 +TTATTG 8.01645953413e-05 0.000283504452692 +TTATTA 7.59820947148e-05 0.000375165583324 +GAAAGC 0.000428357772498 0.000348726404028 +TTATTC 8.05131370602e-05 0.000261047077507 +ACATCT 0.000293123585574 0.000329534108214 +TTTTAT 0.000213307531952 0.000766497291338 +CTTACT 0.000128263352546 0.000250455478572 +AGACGC 7.31937609638e-05 7.79637244904e-05 +TACAAA 0.00037816776498 0.000348168951452 +AGACGA 6.93598020562e-05 6.61775843223e-05 +AAGTGA 4.1825006265e-05 0.00035597328751 +CCTCTG 0.000636088636947 0.000650865699959 +AGGTGG 0.000134537103486 0.000369670693651 +CCTCTC 0.000287895459791 0.000476144135575 +AGGTGC 0.000107350849414 0.000235882075527 +CCTCTA 0.000139068145831 0.000227679559058 +AGGTGA 1.25475018795e-05 0.00030819163818 +CTTACG 3.72939639197e-05 3.83049555463e-05 +TCCACA 0.000374682347791 0.000355734379263 +TTTTAG 8.01645953413e-06 0.000365449981293 +AGACGT 4.80987572048e-05 7.76451801615e-05 +CTTACC 0.000150221480835 0.000176234649946 +ACATCA 0.00021992982461 0.000295529501107 +CTTACA 0.000138719604112 0.000256985637314 +ACATCC 0.000240842327743 0.000237952613664 +AGGTGT 8.46956376867e-05 0.000295927681518 +CCTCTT 0.000258966497124 0.000444608247017 +GCGTGA 3.83395890763e-06 6.37885018558e-05 +CCCGGC 0.000201805655229 0.000167633953067 +CCCGGA 0.000124429393638 0.000130762447 +CCCGGG 0.000196577529446 0.00020402764264 +TATGAA 0.000617615925847 0.000322287224732 +TGGAAT 0.000252692746185 0.00036067181636 +TAGGTG 0.0 0.00018141099529 +TTGTGC 0.000100031473317 0.000262321254823 +TATGAT 0.000408839436241 0.000204824003462 +TTGTGG 0.000119898351293 0.000357964189565 +CCCGGT 8.05131370602e-05 7.46190090373e-05 +GGCGTG 0.000260360664 0.000115950135708 +TAGGTA 0.0 0.000134584978947 +GGTTTT 0.000177059193189 0.000458783469652 +TATCAC 0.000133491478329 0.000140000232537 +TGGAAA 0.000330069007775 0.000577839412566 +TGAGGA 0.0 0.000473914325273 +AAGAGG 0.000564637584578 0.000428840302738 +TGAGGC 0.0 0.000330728649447 +AAGAGA 0.000507476742682 0.000575529966182 +AAGAGC 0.000622495509911 0.000359556911209 +AAGCGT 0.000135582728642 5.62230740452e-05 +GCTTAG 8.71354297188e-06 0.000186905884963 +GCTTAC 0.000173225234281 0.000149397290239 +GCTTAA 9.41062640963e-06 0.000168032133478 +TGATTC 0.0 0.000238589702322 +AAGCGC 0.000327280674024 6.61775843223e-05 +AAGAGT 0.000390018183421 0.000319659234019 +AAGCGA 0.000190652320225 6.46644987601e-05 +AAGCGG 0.000352027136064 7.66895471749e-05 +GCTTAT 0.000167648566779 0.000167952497396 +GTTCGG 0.000105956682538 4.80205575768e-05 +CTGCTA 0.000315081713863 0.00025252601671 +GCGTGT 4.87958406425e-05 9.65985677291e-05 +CTGCTC 0.000883901799068 0.000501866590131 +ACTTCT 0.000252344204466 0.000380341928668 +GACCGT 0.000171133983968 5.43118080719e-05 +CTGCTG 0.00200934300932 0.000750490438812 +CGGTCA 8.08616787791e-05 5.15245451944e-05 +GGTCGT 7.07539689317e-05 3.93402246152e-05 +CGGTCC 0.000125823560514 7.29466513107e-05 +CTAGAA 0.00037189401404 0.000285813899077 +CGGTCG 2.16095865703e-05 2.96246225847e-05 +ACTTCG 4.87958406425e-05 5.63823462096e-05 +GACCGC 0.000278484833381 5.8373248265e-05 +TTTTGG 0.000127914810827 0.000423902865641 +ACTTCC 0.000243979203213 0.000333914092736 +CTGCTT 0.000451361525944 0.000539614093102 +ACTTCA 0.000227597742426 0.000345381688575 +CTAGAT 0.000230734617895 0.000164607781942 +CACAAA 0.000336342758715 0.000363857259649 +CGGTCT 8.60898045622e-05 6.33903214447e-05 +GGTCGC 0.000103168348787 5.40728998253e-05 +GGTCGA 6.44802179919e-05 2.99431669136e-05 +CGTCGC 7.17995940883e-05 3.81456833819e-05 +CAGCAT 0.000426963605622 0.000373493225597 +TGAAGC 6.97083437751e-07 0.000348487495781 +CGTCGG 7.59820947148e-05 3.05006194891e-05 +TGGTTG 0.000115367308948 0.000304289470151 +GCGTGC 6.41316762731e-05 9.45280295915e-05 +TGCTCA 0.000159632107245 0.000371741231789 +TTCATC 0.000713116356819 0.000291547696996 +TGCTCC 0.000317521505895 0.000365449981293 +TTCATA 0.000155449606618 0.000303015292835 +TTCATG 0.000525600912064 0.000351991483399 +TGCTCG 5.92520922088e-05 8.5529152301e-05 +GTACAA 0.000121641059887 0.000164767054107 +CAGCAG 0.00192116195444 0.000667270732896 +TACACT 0.000201108571791 0.000212946883848 +GCTTTG 0.000355164011534 0.000433061015096 +CAGCAC 0.000591475296931 0.000381616105984 +CAGCAA 0.00057648800302 0.000392765157494 +GGGAGT 0.000163117524434 0.000241615873446 +GTACAC 9.68945978473e-05 0.000145096941799 +TGATTT 0.0 0.000460057646967 +TTCATT 0.000413719020305 0.000499158963336 +GTAGCC 0.000237008368835 0.000179818273646 +GCTGCC 0.000939668474088 0.000472241967546 +GCTGCA 0.000590081130056 0.000392924429658 +AACTCT 0.000315778797301 0.000332162098927 +GCTGCG 0.00022306670008 0.000168350677807 +TGGCCA 0.000172528150843 0.000381058653408 +GTAGCA 0.000176362109751 0.000182446264359 +TCATCT 0.000276393583068 0.000373971042091 +TGCTCT 0.000231431701333 0.000511661828244 +AACTCA 0.000276045041349 0.000359875455538 +TGGCCT 0.000156843773494 0.000475268138671 +AACTCC 0.000368408596851 0.000243049322926 +GCTGCT 0.000837894292176 0.000564380914671 +AACTCG 0.000103865432225 6.14790554715e-05 +CTACAA 0.00016102627412 0.000224334843605 +GGATCC 0.000210519198201 0.00021445996941 +TGAAGA 0.0 0.000586917925939 +AATATT 0.000274650874474 0.000466428533545 +GTTCGA 0.00011780710098 3.83845916286e-05 +CAACCT 0.000168345650217 0.000244880952817 +TGGCCG 4.77502154859e-05 0.000104801084197 +GGCACG 0.000135582728642 6.64164925689e-05 +GGAGGA 0.000485170072674 0.000552196594092 +GGCACA 0.000349587344032 0.000272435037264 +GACAAT 0.000443345066409 0.00018499461899 +GGCACC 0.000540936747694 0.000209920712724 +CAACCG 4.98414657992e-05 5.07281843722e-05 +ACGACC 8.43470959678e-05 4.02958576018e-05 +AATATG 0.000237356910554 0.000280478281568 +CAACCC 0.000152312731149 0.00021015962097 +TTAGGA 0.00012408085192 0.00023994351572 +AATATC 0.00025722378853 0.000191922958143 +GACAAA 0.000650030305702 0.000297679675327 +GGCACT 0.000317870047614 0.00023635989202 +GACAAC 0.000674776767743 0.00018929496743 +ACGACA 5.47210498634e-05 5.20819977699e-05 +GACAAG 0.00103691161365 0.000280000465075 +TGGCCC 0.000166602941622 0.000311536353633 +GCAATC 0.000150221480835 0.000121524661463 +GCAATA 9.75916812851e-05 0.000161183430407 +GAATTT 0.000423478188434 0.000370068874062 +TTGAAC 0.000228991909301 0.000262799071316 +CGATGG 6.62229265863e-05 5.4709988483e-05 +GTTCCC 0.000206336697574 0.000262321254823 +CGATGC 8.26043873734e-05 5.78157956895e-05 +GTAAAT 0.000142902104739 0.000299113124807 +CGATGA 5.92520922088e-06 5.09670926188e-05 +GAATTA 0.000189606695068 0.000225131204427 +GAATTC 0.000360740679036 0.000251411111559 +GAATTG 0.00023875107743 0.000232935540485 +GTAAAC 0.000124777935357 0.00016667832008 +CGATGT 5.576667502e-05 5.15245451944e-05 +GTAAAA 0.000212261906795 0.000302537476342 +GTAAAG 0.000253041287903 0.000247190399201 +TCCCAG 0.000724618233542 0.000569796168262 +GGATTT 0.000270816915566 0.000327383933994 +GTGCGT 9.82887647228e-05 7.36633760507e-05 +ACGCCG 6.23889676787e-05 3.92605885329e-05 +GGATTC 0.000221672533205 0.000221866125056 +GGATTA 9.61975144096e-05 0.000170660124191 +GGATTG 0.000150570022554 0.000188976423101 +AGCGGC 0.000285804209478 0.000138248238729 +ACGCCA 0.000108745016289 6.5699767829e-05 +AAAGCC 0.000732634693076 0.000368953968911 +TCTGTG 0.000767140323245 0.000664244561771 +CTATCC 8.53927211245e-05 0.000152264189199 +GTCCCG 0.000101774181912 9.42094852626e-05 +TACAAT 0.000277439208225 0.000196382578747 +AACCGG 0.000214353157108 5.49488967297e-05 +CTCCTT 0.00029765462792 0.000462128185105 +GGCGGA 0.000102819807068 0.000112844328501 +AACCGC 0.000216444407422 5.18430895232e-05 +AACCGA 0.000176362109751 5.81343400184e-05 +GTGGGA 0.000391412350297 0.000361707085429 +TACCCA 0.000298351711357 0.000231024274511 +TCTGTT 0.000273953791036 0.000506724391146 +AAAGCG 0.000120246893012 7.12742935842e-05 +CTCCTG 0.000975219729413 0.000633743942282 +AACCGT 0.000119898351293 5.37543554964e-05 +CTCCTA 0.00016102627412 0.000239704607473 +CTCCTC 0.000491095281895 0.000541366086911 +GGCGGT 0.000129308977703 8.45735193144e-05 +AACAAG 0.000843819501397 0.000312253078373 +CCCACG 0.000216095865703 0.000123435927436 +AACAAC 0.000624586760225 0.000256428184739 +AACAAA 0.000583110295678 0.000641468642257 +TACTTC 0.000473319654233 0.000230466821936 +AGCCTT 0.000321006923084 0.000422549052243 +CCGATT 3.52027136064e-05 3.71104143131e-05 +CCCACT 0.000315430255582 0.000329852652543 +AACAAT 0.000367362971695 0.000263515796056 +GACGCT 0.000172528150843 7.31855595574e-05 +AGCGGT 9.2363555502e-05 6.64961286511e-05 +CCGATA 2.33522951646e-05 2.38111885829e-05 +AGCCTG 0.000903768677044 0.000548453698228 +AGCCTA 0.000159632107245 0.000209522532313 +AGCCTC 0.000506779659245 0.000409727643006 +GTACCC 0.000154752523181 0.000130284630507 +CTCCCG 0.000125823560514 0.000164767054107 +TAGGCT 0.0 0.000196541850911 +ACGAAT 4.25220897028e-05 4.18089431639e-05 +CGCCTT 0.000128263352546 9.47669378381e-05 +AGGCAT 0.000146736063647 0.000271877584689 +CAGAGT 0.000510265076433 0.0003958709647 +GCATCT 0.000283364417446 0.000307395277357 +CTCCCA 0.000295563377606 0.000456155478939 +ACGAAA 7.80733450281e-05 5.78954317717e-05 +CGCCTC 0.000270468373847 0.000139761324291 +AGGCAA 0.000152312731149 0.000294016415545 +CGCCTA 8.81810548755e-05 4.34813008904e-05 +AGGCAG 0.000465303194699 0.000579750678539 +CGCCTG 0.000558363833638 0.000144937669635 +ACGAAG 0.000128611894265 7.2309562653e-05 +CAGAGG 0.00062911780257 0.000590581185721 +GCATCC 0.000273605249317 0.000237235888924 +GCATCA 0.000189258153349 0.000228794464209 +CAGAGC 0.000768185948401 0.000517475262246 +CGGCCA 0.000194137737414 0.000107269802746 +CAGAGA 0.000590081130056 0.000631195587651 +CGGCAC 0.000164860233028 6.58590399934e-05 +CCTAGA 9.96829315983e-05 0.000233652265224 +CCTAGC 0.000173225234281 0.00019542694576 +TTTGCT 0.000530131954409 0.000507759660215 +TAGCTC 0.0 0.000198214208638 +TGCGGT 4.91443823614e-05 7.0477932762e-05 +CCTAGG 9.06208469076e-05 0.000219238134343 +TAAGCA 0.0 0.000246394038379 +GTCTAG 8.36500125301e-06 0.000150193651061 +TGCGGC 0.00013314293661 9.99432831822e-05 +TGCGGA 5.61152167389e-05 7.89989935592e-05 +CCTAGT 0.000135931270361 0.000166519047916 +TGCGGG 0.000117458559261 0.000101774913073 +TAAGCG 0.0 3.34471545311e-05 +TGAGTT 0.0 0.000395791328618 +TAGCTG 0.0 0.000272833217675 +ATCGCT 0.000166951483341 5.63823462096e-05 +TCTCGT 6.55258431486e-05 6.73721255555e-05 +ATCGCC 0.000258617955405 5.7258343114e-05 +TGAGTG 0.0 0.000364733256554 +ATCGCA 0.00012408085192 4.06144019306e-05 +TGAGTA 0.0 0.000209203987984 +ATCGCG 6.69200100241e-05 1.76792102522e-05 +TGAGTC 0.0 0.000256030004327 +AAGGTG 0.000860200962184 0.000328578475227 +ACGGCT 0.000132445853173 7.48579172839e-05 +CATCAT 0.000147781688803 0.000275620480553 +AAGGTA 0.000197274612883 0.000192639682883 +TTTATT 0.000233871493365 0.000820649827245 +ACGGCA 0.000106653765976 5.98066977449e-05 +CATCAC 0.000184030027566 0.000253879830108 +ACGGCC 0.000219581282891 6.87259389532e-05 +CATCAA 0.000167997108498 0.00024894239301 +CATCAG 0.000407445269365 0.000333515912324 +ACGGCG 8.08616787791e-05 4.06940380129e-05 +TCACCT 0.000278136291662 0.000333515912324 +TCTAAG 0.000270468373847 0.000247110763119 +TCGTTT 5.68123001767e-05 9.18204027961e-05 +CGGAAT 0.000132794394891 5.01707317967e-05 +CTCATG 0.000559409458795 0.000281672822801 +TCACCG 7.63306364337e-05 7.74062719149e-05 +TCACCA 0.000297306086201 0.000314005072181 +TCACCC 0.000235265660241 0.000260728533178 +CGGAAG 0.000439859649221 0.000122559930532 +AATATA 0.000142902104739 0.000359556911209 +CGGCCT 0.0001920464871 0.00011387959757 +CGGAAC 0.00020145711351 6.19568719648e-05 +CGGAAA 0.000248858787277 7.30262873929e-05 +TAGCTT 0.0 0.000237395161089 +ATGGCG 0.000298003169638 7.22299265708e-05 +TGTGTA 0.000125823560514 0.000455040573788 +ATGGCC 0.000666760308208 0.000238191521911 +ATGGCA 0.000452755692819 0.000280637553732 +TCCAAT 0.000276742124787 0.000179181184988 +ATGGCT 0.00058276175396 0.000363060898827 +TGTGTT 0.000174967942875 0.000579671042457 +TTGTCG 3.69454222008e-05 4.71445606724e-05 +GAACAT 0.000301488586827 0.000276815021786 +AGATTA 7.77248033092e-05 0.000209761440559 +AGATTC 0.000138719604112 0.000247668215695 +GAAACA 0.000430100481092 0.000419522881119 +AGATTG 0.000101425640193 0.000229829733278 +CTGAGT 0.000415810270618 0.000428601394492 +GAAACT 0.000370151305446 0.000352708208139 +AGAATG 0.000212610448514 0.000389818622452 +AGAATA 0.000132097311454 0.000328737747391 +AGAATC 0.000204942530699 0.000261923074411 +CTGAGC 0.000767837406682 0.000484505924208 +CTGAGA 0.00039036672514 0.000537782463211 +CTGAGG 0.00056707737661 0.000535871197238 +CATACC 0.000105608140819 0.000147884204677 +CATACA 0.000164511691309 0.000276416841375 +CATACG 4.14764645462e-05 3.63140534909e-05 +TCAAAT 0.000205639614136 0.000322765041225 +GTCCAT 0.000201108571791 0.000196382578747 +ACATAG 7.31937609638e-06 0.000219715950837 +CATACT 0.000185772736161 0.000198930933378 +GTCCAG 0.00056394050114 0.000297361130998 +TCAAAG 0.000295911919325 0.000372537592611 +CACGAG 0.000286501292916 8.01935347924e-05 +GTCCAC 0.00028510712604 0.000184835346825 +GCAATG 0.00023247732649 0.000200682927187 +CGGCGG 0.000197274612883 0.000208646535408 +TTGATG 0.000246070453526 0.000293936779463 +CTCATT 0.00044613340016 0.000316234882483 +CGGCGC 0.00017984752694 0.00011817994601 +TTGATC 0.000180544610377 0.000183481533428 +CGGCGA 6.76170934618e-05 5.62230740452e-05 +TCCAAG 0.000728103650731 0.000339408982409 +CTATCT 8.92266800321e-05 0.000220671583823 +TTGATT 0.000154403981462 0.000342036973122 +CGGCGT 7.17995940883e-05 4.55518390281e-05 +TATAAG 0.000240145244305 0.00019471022102 +TGACTT 0.0 0.000398817499742 +TCACAG 0.000346450468562 0.000441263531564 +CGGTTA 4.28706314217e-05 3.49602400932e-05 +GGTTTA 8.64383462811e-05 0.000186746612799 +CGGTTC 0.000133491478329 6.54608595823e-05 +GCTGTT 0.000351678594345 0.000378510298777 +TTTACA 0.000164860233028 0.000386951723492 +CGGTTG 7.91189701847e-05 6.06030585671e-05 +GGTTTG 0.000174967942875 0.000313766163935 +TATAAA 0.000256178163373 0.000457668564501 +CGTACG 1.32445853173e-05 1.17861401681e-05 +GCTGTG 0.00113101787775 0.000568044174453 +GCTGTC 0.000510265076433 0.000378112118366 +CGGTTT 9.65460561285e-05 7.08761131731e-05 +GCTGTA 0.000214004615389 0.000252207472381 +GGGAGG 0.00018890961163 0.000517236353999 +TCATAG 6.27375093976e-06 0.000199249477707 +TATGCG 7.07539689317e-05 2.83504452692e-05 +TCTCAA 0.000196228987727 0.000328737747391 +AGGTTG 0.000119898351293 0.000245836585804 +GCATTT 0.000258269413687 0.000364573984389 +AGGTTC 0.000189606695068 0.000229511188949 +AGGTTA 7.04054272128e-05 0.000175358653042 +ACTCCT 0.000310899213237 0.000306360008289 +TTTTTC 0.000243630661494 0.000703823694633 +GAGGGC 0.000701614480096 0.000288760434119 +TTTTTA 0.000138719604112 0.000915496401166 +TCCTCT 0.0004524071511 0.000530296671483 +TTTTGT 0.000147781688803 0.000894472475461 +GAGGGG 0.000415461728899 0.000357725281318 +TTAAAT 0.000203548363823 0.000510626559175 +AGGTTT 0.000201108571791 0.000337975532929 +ACATAT 0.000133491478329 0.000285336082583 +TTAAAC 0.000127217727389 0.000272514673346 +TTAAAA 0.000255829621654 0.00085672497249 +ACTCCG 8.99237634698e-05 6.92833915287e-05 +TTAAAG 0.000272908165879 0.000458385289241 +ACTCCA 0.00035934651216 0.000314005072181 +TTTTTT 0.000222369616642 0.00215280221057 +ACTCCC 0.000231083159614 0.000240978784788 +CTGTAA 1.60329190683e-05 0.000369591057569 +GTCACG 0.000122686685044 6.50626791712e-05 +CTGTAC 0.000453801317976 0.000246871854872 +GCAACT 0.000191349403663 0.000180216454057 +GCAATT 0.000164511691309 0.000146769299526 +CTGTAG 1.91697945381e-05 0.000314562524757 +GTCACA 0.000379213390136 0.000277133566115 +GCAACG 6.06462590843e-05 4.70649245902e-05 +GTCACT 0.000367362971695 0.000285495354748 +TAGGCG 0.0 3.68715060664e-05 +GCAACC 0.000169042733655 0.000157201626296 +CTGTAT 0.000340176717622 0.000337338444271 +GCAACA 0.000214353157108 0.000205700000366 +TAGGCC 0.0 0.000130284630507 +TGGTTC 0.00023247732649 0.000282309911459 +ATTCAT 0.000251298579309 0.000313925436099 +TCCCAC 0.00029765462792 0.000329613744296 +GCGATT 5.33268829879e-05 4.01365854373e-05 +TCGTGT 3.86881307952e-05 7.84415409837e-05 +TGAATT 0.0 0.000373811769926 +GTCTCT 0.000287198376353 0.000399295316236 +TCCGTG 0.000239099619148 0.00012104684497 +ATTCAG 0.000488655489863 0.000303493109329 +ATTCAA 0.000214353157108 0.000264789973371 +ATTCAC 0.000254783996498 0.000202673829242 +GTCTCC 0.00041511318718 0.000341718428793 +GTCTCA 0.000178104818345 0.000304528378398 +GCGATG 8.92266800321e-05 5.22412699343e-05 +GTCTCG 6.37831345542e-05 6.37885018558e-05 +GCGATA 2.75347957911e-05 2.26962834318e-05 +TCGTGC 3.76425056385e-05 5.9488153416e-05 +GCGATC 5.50695915823e-05 3.91013163685e-05 +CGATTG 5.05385492369e-05 3.71104143131e-05 +TCTACA 0.000223415241799 0.000290114247516 +GAAGGA 0.000630511969445 0.000527111228194 +CGATTC 8.6786888e-05 4.19682153283e-05 +CGATTA 3.17172964177e-05 2.55631823916e-05 +CAATCT 9.09693886265e-05 0.000177986643755 +GGAACA 0.000264891706345 0.000312093806208 +GGAACG 7.52850112771e-05 7.85211770659e-05 +TGACGA 3.48541718875e-07 5.92492451694e-05 +TACTGC 0.000303231295422 0.000198054936474 +CGATTT 8.46956376867e-05 5.22412699343e-05 +GAAGGT 0.000414067562024 0.000288282617625 +CTCGCT 9.75916812851e-05 0.00011961339549 +CAATCG 1.9518336257e-05 3.06598916535e-05 +CAATCA 9.06208469076e-05 0.000185153891154 +CAATCC 8.19073039357e-05 0.000147724932512 +TCCTGA 3.20658381365e-05 0.000486337554099 +AGTCCA 0.000217838574297 0.000253402013614 +TCCTGC 0.000360392137317 0.000499158963336 +AGTCCC 0.000257572330249 0.00026009144452 +ACTAAA 0.000180544610377 0.000254357646601 +TCCTGG 0.000400474434988 0.000588829191912 +ACTAAC 0.000127217727389 0.000137451877906 +TAACGC 0.0 2.65984514605e-05 +CAGCTG 0.00164232857934 0.000601809873313 +CAGCTA 0.000321006923084 0.000264232520796 +CAGCTC 0.000807222620915 0.000472401239711 +ACTAAT 0.000119549809574 0.00017097866852 +AGTCCT 0.000290335251823 0.000334153000982 +TCCTGT 0.000280924625414 0.000518191986986 +TAATGT 0.0 0.000305563647466 +CAGCTT 0.000471925487357 0.000434016648082 +GGTTAG 3.48541718875e-06 0.000136655517084 +TAACGT 0.0 4.70649245902e-05 +ACGGAC 0.000180893152096 5.78954317717e-05 +GTACTT 8.81810548755e-05 0.000207213085928 +CGACAT 6.72685517429e-05 3.90216802863e-05 +TTGTTC 0.0001551010649 0.000342992606108 +CCGAGG 8.92266800321e-05 0.00016022779742 +CCGAGA 5.78579253333e-05 0.000112923964584 +ACGGAA 0.000153009814586 7.31059234751e-05 +CCGAGC 0.00011780710098 0.000126223190314 +CCGCGT 1.81241693815e-05 4.67463802613e-05 +CGACAC 9.68945978473e-05 4.75427410835e-05 +CGACAA 7.70277198714e-05 3.98180411085e-05 +CGACAG 0.000194486279132 7.01593884331e-05 +GTACTA 4.94929240803e-05 0.00010129709658 +CCGCGC 7.66791781526e-05 0.000124789740834 +ACGGAG 0.000323446715116 0.000105517808937 +CCGCGA 2.5095003759e-05 4.09329462595e-05 +TACGCA 8.19073039357e-05 3.27304297912e-05 +CCGCGG 7.52850112771e-05 0.000134664615029 +TTGTTT 0.000200062946634 0.000978090361788 +GCATAT 0.000138371062393 0.000178862640659 +TCCGTT 5.85550087711e-05 6.753139772e-05 +AGGAAC 0.00034087380106 0.0003269061175 +AATAAC 0.000255829621654 0.000208805807573 +AATAAA 0.000370848388883 0.000961764964934 +AATAAG 0.000321355464803 0.00024066024046 +TACTAA 8.71354297188e-06 0.000176234649946 +TGGAAG 0.000488306948144 0.00053236720962 +AATAAT 0.000241887952899 0.000382651375052 +ACGGTG 0.000269074206972 8.13084399435e-05 +CCAACC 0.000227249200707 0.000246712582708 +CCAACA 0.000256526705092 0.000274027758908 +TTATGC 4.56589651727e-05 0.000155210724241 +CCAACG 6.37831345542e-05 5.42321719897e-05 +GGGATT 0.000211216281638 0.000251490747641 +CGCCGC 0.000212261906795 0.000178145915919 +AGTAAA 0.000242236494618 0.000315677429908 +CGCCGA 7.59820947148e-05 4.17293070817e-05 +AGTAAC 0.000198668779759 0.00016237797164 +CGCCGG 0.000170088358811 9.89080141134e-05 +GAGTAA 1.08047932851e-05 0.000164448509778 +AGTAAG 0.000206336697574 0.000200045838529 +CTCAGG 0.000307762337767 0.000470330701573 +CTCAGC 0.000661880724144 0.000483948471632 +CCAACT 0.000230734617895 0.000243049322926 +GAGTAT 0.000369454222008 0.000157042354132 +GCTTCA 0.000280576083695 0.000316234882483 +GTGTAG 1.49872939116e-05 0.000211513434368 +TCGAGC 5.576667502e-05 5.2878358592e-05 +AGTAAT 0.000164860233028 0.000202196012749 +GTGTAC 0.000341919426217 0.00017997754581 +CGCCGT 6.72685517429e-05 4.4436933877e-05 +GTGGCA 0.000493883615646 0.00030460801448 +CATATT 0.000134537103486 0.000275620480553 +CTCGGA 7.84218867469e-05 0.000100261827511 +CTCGGC 0.000127217727389 0.000133231165549 +GCCGCC 0.000401520060144 0.000231502091005 +GCCGCA 0.000149524397398 9.75542007157e-05 +GCCGCG 0.000173225234281 0.000120569028476 +TATTGA 5.22812578313e-06 0.000238350794075 +ACACTT 0.000171482525687 0.00030604146396 +CATATG 0.000133840020048 0.00021517669415 +TCGCTC 9.65460561285e-05 8.48920636432e-05 +CATATC 0.000148130230522 0.000147087843855 +CATATA 7.07539689317e-05 0.000240899148706 +GTCAGC 0.000444042149847 0.000264471429042 +TACTAC 0.000366665888257 0.000117224313023 +ACACTC 0.000237008368835 0.000238669338404 +ACACTA 0.000123732310201 0.000187463337539 +ACACTG 0.000498066116273 0.000421673055339 +GCCGCT 0.000197623154602 0.000130921719165 +ACCGGA 9.68945978473e-05 6.80888502955e-05 +AAATAA 2.37008368835e-05 0.000793414287127 +ACCGGC 0.000159632107245 7.00797523509e-05 +AAATAC 0.000344359218249 0.000330649013365 +CTTTAA 8.71354297188e-06 0.000435211189316 +ACCGGG 0.000109093558008 7.93971739703e-05 +AAATAG 1.3941668755e-05 0.00030747491344 +AACTTA 0.000156843773494 0.000227201742565 +CAGGTA 0.000193440653976 0.000212946883848 +AACTTC 0.000575442377863 0.000293220054723 +CAGGTC 0.000430449022811 0.000262082346576 +CACGAC 0.000163117524434 4.79409214946e-05 +CACGAA 0.000130354602859 6.06826946493e-05 +CAGGTG 0.000859155337028 0.000374369222502 +CTTTAT 0.000193440653976 0.000362583082334 +AATCCC 0.000217490032578 0.000207133449846 +AATCCA 0.000286501292916 0.00025610964041 +AAATAT 0.00028510712604 0.000561354743547 +AATCCG 4.94929240803e-05 4.3322028726e-05 +CACGAT 9.20150137831e-05 4.78612854124e-05 +CAGGTT 0.00031612733902 0.000297042586669 +AACTTT 0.000385138599357 0.00040160476262 +TCTGTC 0.000366665888257 0.000465393264476 +GGGATA 9.61975144096e-05 0.000152742005692 +GGGATC 0.000273605249317 0.000178623732413 +GGGTGT 7.87704284658e-05 0.000263037979563 +GGAGAT 0.000517932994249 0.000289716067105 +GATGCT 0.000704402813847 0.000348168951452 +CGCAGT 0.000126869185671 6.84073946243e-05 +GAGTCG 0.000161374815839 6.8646302871e-05 +GAGCCT 0.000645499263357 0.000377475029708 +GGAGAC 0.000674079684305 0.000344266783424 +GGCGGC 0.000417204437494 0.000265904878522 +GGAGAA 0.000738908444016 0.000487133914921 +GGGTGC 8.01645953413e-05 0.000220034495165 +GGAGAG 0.000915270553767 0.000533959931265 +TGGAAC 0.000278484833381 0.000308350910344 +GGGTGG 9.44548058152e-05 0.000392446613165 +GAGCCA 0.000646544888514 0.000416735618241 +CGCAGG 0.000213656073671 0.000110534882117 +GATGCG 0.000147433147084 5.04096400433e-05 +CGCAGA 0.000121292518169 9.65189316469e-05 +GATGCA 0.000493535073927 0.000235404259033 +CGCAGC 0.000308807962924 0.000153618002596 +GATGCC 0.000778990741686 0.000241695509528 +CGGATT 0.000123035226763 4.84983740701e-05 +GGTAGT 8.81810548755e-05 0.000121285753216 +ACCCCT 0.000324840881992 0.000262560163069 +GCCAAT 0.000434631523438 0.000142468951086 +GGCGGG 0.000181241693815 0.000211991250861 +AGAAGT 0.000193092112257 0.000395313512125 +ACCCCA 0.000436025690313 0.000342275881368 +GGTAGA 7.17995940883e-05 0.000200284746776 +ACCCCC 0.000246767536964 0.000253481649696 +CGGATG 0.000172179609124 7.00001162687e-05 +GCCAAG 0.0012889072764 0.000332719551502 +CGGATA 7.07539689317e-05 3.06598916535e-05 +ACCCCG 0.000152661272867 9.64392955647e-05 +CGGATC 0.000202154196948 5.27987225098e-05 +AGAAGA 0.000315430255582 0.000709955672964 +AGAAGC 0.000249904412434 0.000474631050013 +GCAAGA 0.000188561069912 0.000289795703187 +AGAAGG 0.000205639614136 0.000499238599418 +CACTCC 0.000260360664 0.000269249593975 +CACTCA 0.000208079406169 0.000307793457768 +CACTCG 0.000103516890506 7.62117306816e-05 +TGCGTT 3.03231295422e-05 6.42663183491e-05 +TCAGCT 0.000376773598104 0.000417372706899 +AGCTTT 0.000327280674024 0.000425973403778 +CCCCGG 0.000261057747438 0.000164767054107 +TGCGTA 2.26552117269e-05 3.7428958642e-05 +CCCCGC 0.000162071899277 0.000195825126171 +CACTCT 0.000216444407422 0.000328180294816 +AGCTTC 0.000528389245815 0.00035597328751 +AGCTTA 0.000127566269108 0.000198214208638 +AGCTTG 0.000293123585574 0.000313367983524 +TTAGCT 0.000155449606618 0.000226325745661 +TTAGCC 0.000130354602859 0.000162139063394 +GCACTG 0.000523509661751 0.000350796942166 +TTAGCA 0.000163117524434 0.000236280255938 +GACCAT 0.000298003169638 0.000205142547791 +TTAGCG 2.75347957911e-05 3.47213318466e-05 +TACCGG 0.000189258153349 3.29693380378e-05 +TCATAT 8.60898045622e-05 0.000228157375551 +ATAGGT 9.55004309718e-05 0.000133390437713 +ATAAGG 8.26043873734e-05 0.000174641928302 +TGTGCT 0.00029451775245 0.000473595780944 +AAGGGC 0.00055174154098 0.000245119861064 +ATAAGC 9.30606389397e-05 0.000164687418025 +AAGGGA 0.000357255261847 0.000381775378148 +ATAAGA 0.000104911057381 0.000248544212599 +CCAGAC 0.000552438624417 0.000306280372206 +TCTGTA 0.000184030027566 0.000429318119231 +CCAGAA 0.000785264492626 0.000490398994292 +CCAGAG 0.00103307765475 0.000566212544562 +GCAAGT 0.000153706898024 0.000220114131248 +TGTGCG 6.51773014297e-05 0.00010201382132 +ATAAGT 9.06208469076e-05 0.000175756833453 +ATAGGG 8.46956376867e-05 0.000123515563518 +TGTGCC 0.000379910473574 0.000366166706033 +ATAGGA 0.000135582728642 0.000202992373571 +TGTGCA 0.000235962743679 0.00041633743783 +ATAGGC 0.000100031473317 0.000119215215079 +GCTCGC 0.000140810854426 0.000103049090389 +TAAGTT 0.0 0.000236678436349 +TTTGCG 7.45879278393e-05 5.52674410585e-05 +CCAGAT 0.000556969666763 0.000290512427927 +AGACAA 0.000182287318972 0.000365529617376 +GACGCA 0.000121641059887 5.40728998253e-05 +AGACAC 0.000197274612883 0.000347770771041 +TCCATT 0.000310202129799 0.000324278126787 +AGACAG 0.000349238802313 0.000511263647833 +GCTTTT 0.000292077960418 0.000447076965566 +TTTTCT 0.000296609002763 0.000963755866989 +CGCTGT 0.000196926071165 0.000114755594475 +GACGCC 0.00027569649963 7.70080915038e-05 +TCAAGA 0.000178104818345 0.000337418080353 +CGTGGG 0.000132445853173 0.000130364266589 +CTATAG 9.41062640963e-06 0.000157759078872 +AGACAT 0.000172179609124 0.000350159853508 +CGTGGA 0.000125126477076 0.000113242508912 +TTTTCG 3.13687546988e-05 6.50626791712e-05 +CCCACA 0.000439162565783 0.00038973898637 +CGCTGC 0.000259663580562 0.000169147038629 +TTTTCC 0.000261406289156 0.000556496942532 +TTTTCA 0.000211913365076 0.0005991818826 +CGCTGG 0.000254783996498 0.0001343460707 +TATGGA 0.000352375677783 0.000235483895115 +TATGGC 0.000392806517172 0.000156803445885 +CGTAGG 3.97337559518e-05 3.95791328618e-05 +CCCACC 0.000565334668016 0.000470808518066 +TATGGG 0.000289638168385 0.000170182307698 +CGTAGC 5.43725081445e-05 4.36405730549e-05 +TACCGT 9.96829315983e-05 3.92605885329e-05 +TGGACT 0.00018298440241 0.000337099536024 +CGTAGT 2.89289626667e-05 3.1217344229e-05 +GCTCGA 0.000146736063647 5.12856369477e-05 +TATGGT 0.000218187116016 0.00017926082107 +TGGACA 0.000220278366329 0.00035740673699 +TGGACC 0.000225157950393 0.000250216570326 +TGGACG 7.07539689317e-05 7.29466513107e-05 +CCGGTT 5.2978341269e-05 5.89307008405e-05 +GACCAA 0.000318218589333 0.000216450871466 +GCTTGA 9.06208469076e-06 0.00022449411577 +TGAGAC 3.48541718875e-07 0.000319659234019 +GCTTGC 0.000149524397398 0.000274266667155 +TGAGAA 0.0 0.000518032714821 +TGAGAG 0.0 0.000401445490456 +GCTTGG 0.000136628353799 0.000315438521661 +CCTGTT 0.000296609002763 0.000381377197737 +CCGGTG 0.000218884199454 9.91469223601e-05 +CCGGTC 0.000100728556755 6.16383276359e-05 +CCGGTA 3.76425056385e-05 3.14562524757e-05 +CCTGTC 0.000417901520931 0.000425017770792 +CCTGTA 0.000203199822104 0.000275301936224 +CCTGTG 0.000844516584835 0.000574255788866 +GCTTGT 0.000157192315213 0.000275063027977 +TGAGAT 3.48541718875e-07 0.000336303175202 +TCCGAA 9.37577223775e-05 5.38339915786e-05 +GGTTGT 7.80733450281e-05 0.000240978784788 +TCCTCA 0.00037189401404 0.00040805528528 +TATCAG 0.000269771290409 0.000202434920995 +TATTGC 8.15587622168e-05 0.000168271041724 +TAGCGT 0.0 3.52787844221e-05 +GGTTGG 8.92266800321e-05 0.000248623848681 +GGTTGA 3.83395890763e-06 0.00017782737159 +TTCACG 0.000169042733655 7.0477932762e-05 +GGTTGC 8.50441794056e-05 0.000169226674711 +GGAAAC 0.000363529012787 0.000338453349422 +TCCCGC 0.000170088358811 0.000119055942914 +GTAACG 2.99745878233e-05 3.29693380378e-05 +CAGCGT 0.000181590235534 0.000104960356362 +GTAACC 9.48033475341e-05 0.000125187921245 +CGTTTT 8.53927211245e-05 0.000110614518199 +ATTCTC 0.000272908165879 0.000292025513489 +ATTCTA 0.000127914810827 0.000248703484763 +ATTCTG 0.000411976311711 0.000400330585304 +TCATTT 0.000179498985221 0.000553311499243 +CAGCGA 0.000221672533205 0.000105358536773 +CAGCGC 0.000380259015293 0.000138327874811 +TGCTAG 6.62229265863e-06 0.000217804684863 +CAGCGG 0.000400125893269 0.000159749980927 +CCTTGT 0.000128263352546 0.000350319125672 +TCATTC 0.000144296271614 0.000265347425947 +CGTTTG 6.34345928353e-05 8.79182347675e-05 +CGTTTA 3.58997970442e-05 5.56656214696e-05 +ATTCTT 0.000247116078683 0.000394039334809 +CGTTTC 0.000101774181912 8.60069687943e-05 +TGGCAA 0.000125823560514 0.000283663724857 +CCGATC 5.71608418956e-05 4.02958576018e-05 +TGGCAG 0.000384441515919 0.000423902865641 +GCTGAG 0.00105677849163 0.000506644755064 +GCTGAA 0.00063539155351 0.000348487495781 +GCTGAC 0.000646544888514 0.000273072125922 +TGGCAT 0.000138371062393 0.000285415718665 +GCTGAT 0.000509916534715 0.000245438405393 +CCGATG 8.6786888e-05 5.68601627029e-05 +GGCCGC 0.000358997970442 0.000157281262378 +GAACTT 0.000382698807325 0.00030460801448 +GGCCGA 0.000190652320225 8.40957028211e-05 +GGCAAT 0.00029138087698 0.000145893302621 +CTGATT 0.00036875713857 0.000284698993926 +CCATCG 8.3998554249e-05 7.07168410086e-05 +CTAAAT 0.000177059193189 0.000239386063144 +CCATCC 0.000324492340273 0.000334391909229 +CCATCA 0.000272211082442 0.000304289470151 +CTGATC 0.000471576945638 0.000218441773521 +CTGATA 0.000172528150843 0.000209203987984 +GAGTTT 0.000554878416449 0.000331843554598 +CTGATG 0.000604719882249 0.000363697987485 +GGCAAA 0.000488655489863 0.000261047077507 +CTAAAG 0.000320309839646 0.000261923074411 +CCATCT 0.000307065254329 0.000419124700708 +CTAAAC 0.000130703144578 0.000167872861313 +CTAAAA 0.000190652320225 0.00030819163818 +GACACC 0.000582413212241 0.000223379210618 +GACACA 0.000528389245815 0.0003376569886 +GACACG 0.000249904412434 7.4539372955e-05 +GCGCCT 0.000111881891759 0.000101137824415 +TTCACC 0.000523509661751 0.000236758072431 +TAGCGA 0.0 3.47213318466e-05 +TACGAG 0.000262451914313 3.376569886e-05 +TACGAC 0.000196228987727 2.26962834318e-05 +GACACT 0.000452058609381 0.000276337205293 +GGCGAT 0.000129308977703 4.88965544812e-05 +TGTTAC 0.000122686685044 0.000229989005442 +ACGCTG 0.000318915672771 0.000102252729567 +ACGCTA 4.84472989237e-05 3.07395277357e-05 +ACGCTC 0.000118155642699 5.90103369227e-05 +TCGATG 5.43725081445e-05 4.77816493302e-05 +TCGAGG 5.19327161124e-05 7.8919357477e-05 +TCGATC 2.61406289156e-05 3.22526132979e-05 +ACGCTT 6.37831345542e-05 4.96132792211e-05 +TCTGGG 0.000440905274377 0.000500353504569 +GCATTG 0.000180196068659 0.000221945761139 +GAACTG 0.000732983234795 0.000343550058684 +TCGATT 3.24143798554e-05 3.87031359574e-05 +TCGGCA 7.84218867469e-05 5.9488153416e-05 +AGGCAC 0.000197274612883 0.000261126713589 +TTTTGA 1.15018767229e-05 0.000520103252959 +AACCAA 0.000300791503389 0.000337895896846 +TGCCGG 0.000185772736161 9.26963997005e-05 +AACCAC 0.000368060055132 0.000248305304352 +ACGAAC 6.48287597108e-05 3.69511421487e-05 +AACCAG 0.000755986988241 0.000311058537139 +TGCCGA 0.000115715850667 6.58590399934e-05 +TTGGAC 0.000411627769992 0.000206655633353 +TCTGGA 0.000513401951903 0.000492071352018 +GAGTTC 0.000670245725397 0.000299511305218 +TGCCGT 7.73762615903e-05 8.14677121079e-05 +AACCAT 0.00026349753947 0.000264949245536 +AACAGA 0.000362134845911 0.000408533101773 +ACGTCG 3.45056301687e-05 1.7997754581e-05 +AACAGC 0.00069115822853 0.000315836702072 +ACGTCC 0.000110836266602 5.70990709495e-05 +AACAGG 0.000282318792289 0.000291706969161 +ACGTCA 8.33014708112e-05 6.54608595823e-05 +CCCAAG 0.000846956376867 0.000376599032804 +GTCCCT 0.000307065254329 0.000320694503088 +GACGAA 0.000205988155855 4.84983740701e-05 +CCCAAC 0.00055174154098 0.000231900271416 +CCCAAA 0.000518978619405 0.000335108633969 +ACGTCT 8.92266800321e-05 6.94426636932e-05 +GGGAGA 0.000197274612883 0.000412196361555 +AACAGT 0.000376773598104 0.000310182540235 +GGGAGC 0.000341919426217 0.000356371467921 +CCCAAT 0.00032240108996 0.000159670344845 +GACGAT 0.000249904412434 4.11718545062e-05 +GGGCGC 0.000148130230522 0.000134983159358 +GGGCGA 7.52850112771e-05 7.67691832571e-05 +GGGCGG 0.00013000606114 0.000195745490089 +TCTCTT 0.000228294825863 0.000547975881735 +TGGGGG 0.000126172102233 0.000375961944146 +GTACAG 0.00029138087698 0.000220432675576 +TCATAC 9.65460561285e-05 0.000152821641774 +CAGAAT 0.000559060917076 0.000356291831839 +ACGACT 5.26297995502e-05 4.66667441791e-05 +TCTCTC 0.000280576083695 0.000725166164667 +TCTCTA 0.000141159396145 0.000308430546426 +TCTCTG 0.000566728834891 0.000691639374054 +GGGCGT 6.58743848674e-05 7.21502904885e-05 +CAGAAA 0.000884250340787 0.000602845142382 +CAGAAC 0.000770974282152 0.000350080217426 +ACGACG 2.99745878233e-05 2.41297329117e-05 +CAGAAG 0.00151580793539 0.000628965777349 +CACTTT 0.000277787749944 0.000372537592611 +CCTAAT 0.000141507937863 0.000154016183008 +GCATTC 0.000172528150843 0.000203072009653 +GTCATT 0.000391760892016 0.000259613628027 +TGCCCC 0.000225157950393 0.000306439644371 +CACTTG 0.000273605249317 0.000323879946376 +CCTAAC 0.000186818361317 0.000154493999501 +CCTAAA 0.000226552117269 0.000224573751852 +CACTTC 0.000418947146088 0.000294972048532 +CCTAAG 0.00027569649963 0.000222105033303 +CACTTA 0.000110139183165 0.000201320015844 +GTCCCA 0.000252692746185 0.000267895780578 +GTCCCC 0.000308110879486 0.000285415718665 +AAGATT 0.000481336113767 0.000326507937089 +ATCGAT 0.000171482525687 3.58362369976e-05 +GACTCG 0.000197623154602 7.43801007906e-05 +CTCATC 0.000853578669526 0.00026367506822 +GACTCC 0.000559758000514 0.000261604530083 +CTCATA 0.000146038980209 0.000193595315869 +GACTCA 0.000353769844658 0.000256428184739 +AAGATA 0.000245024828369 0.000286689895981 +AAGATC 0.000770277198714 0.000236519164184 +AAGATG 0.000845213668273 0.000456712931514 +GACTCT 0.000484472989237 0.000312332714455 +ATCGAG 0.000382698807325 4.87372823168e-05 +TTGAGA 0.000163814607871 0.000379306659599 +ATCGAA 0.000172179609124 4.06144019306e-05 +ATCGAC 0.000287546918072 3.07395277357e-05 +ATATTT 0.000211216281638 0.000636929385571 +ACGGTA 5.05385492369e-05 3.45620596821e-05 +GGACTC 0.000247116078683 0.000263993612549 +ATAGTT 0.000100031473317 0.000214778513739 +CTACCA 0.000123035226763 0.000211752342615 +TACTGT 0.000234917118522 0.000314642160839 +ATATTC 0.000139068145831 0.000234687534293 +ATATTA 8.08616787791e-05 0.000284698993926 +ATATTG 8.36500125301e-05 0.000243845683748 +ATAGTA 7.94675119036e-05 0.000161342702571 +ATAGTC 9.86373064417e-05 0.000130682810918 +TACTGG 0.000290335251823 0.000229829733278 +ATAGTG 0.000169042733655 0.000185870615894 +GATCGA 0.00011780710098 3.70307782309e-05 +CAGGCA 0.000544073623164 0.000431149749122 +AGGAAG 0.000744136569799 0.000705814596689 +CATCCG 6.06462590843e-05 6.58590399934e-05 +AGGAAA 0.000495626324241 0.000636451569078 +CATCCA 0.000202851280385 0.000316394154648 +CAGGCG 0.000211216281638 0.00012248029445 +CATCCC 0.000198668779759 0.000296405498011 +TTATGA 9.75916812851e-06 0.000252764924957 +ATAGAA 0.000333902966683 0.00031432361651 +CGGAGT 0.000116412934104 7.73266358326e-05 +TTATGG 5.12356326747e-05 0.000192161866389 +CATCCT 0.000214353157108 0.000364653620471 +AGGAAT 0.000251995662747 0.000335427178298 +CAGGCT 0.000730543442763 0.000495973520047 +ACGGAT 0.00013000606114 5.00910957144e-05 +CGGAGA 0.000156495231775 0.000118259582092 +CGGAGC 0.000228991909301 0.000170341579862 +TTATGT 6.72685517429e-05 0.000336701355613 +CAGGCC 0.000890872633445 0.000366246342116 +CGGAGG 0.000232128784771 0.000153936546925 +ATGGAA 0.000775505324498 0.000411638908979 +ATGGAC 0.000658395306955 0.000232616996156 +ATGGAG 0.00116970600855 0.000377076849297 +CGCTTG 0.000133491478329 7.44597368728e-05 +CGCTTA 4.94929240803e-05 3.16155246401e-05 +CGCTTC 0.000387926933108 9.00684089873e-05 +TGAACA 0.0 0.000354619474112 +ATGGAT 0.000610296549751 0.000272833217675 +TTTGCA 0.000435677148594 0.000403675300758 +CGCTTT 0.000178104818345 7.69284554216e-05 +GCCGTG 0.000403262768739 0.000117622493434 +GATCGT 6.76170934618e-05 4.21274874928e-05 +TTTGCC 0.000533268829879 0.00031432361651 +TTGTGA 1.56843773494e-05 0.000388385172972 +GAAAAG 0.00101809036083 0.000501229501473 +GAAAAC 0.000621101343036 0.000387031359574 +ATAGAT 0.000218884199454 0.000203709098311 +GAAAAA 0.000759820947148 0.000624028340252 +GTTCCT 0.000285804209478 0.00035525656277 +TAAGTA 0.0 0.000208726171491 +TAGGGC 0.0 0.000133947890289 +ACGGTT 6.41316762731e-05 5.49488967297e-05 +GAAAAT 0.000686278644465 0.000503140767447 +ACAAGG 0.000156495231775 0.000285256446501 +ACAAGA 0.000194834820851 0.000337895896846 +ACAAGC 0.000185772736161 0.000241456601282 +ACACGT 4.42647982972e-05 8.25029811767e-05 +CTGCGT 0.000165905858185 0.000100819280087 +GACGTC 0.000195531904289 6.03641503204e-05 +GCGGAG 0.000366665888257 0.000156007085063 +CATAAG 0.000193092112257 0.000173049206657 +GCGGAA 0.00012408085192 6.84073946243e-05 +CATAAA 0.000228643367582 0.000304926558809 +GCCAAC 0.000668851558522 0.000173686295315 +CATAAC 0.000130354602859 0.000138088966564 +ACACGC 7.66791781526e-05 7.19113822419e-05 +ACAAGT 0.000184727111004 0.000249260937339 +ACACGA 5.82064670522e-05 6.16383276359e-05 +ACACGG 8.99237634698e-05 8.80775069319e-05 +CATAAT 0.000130354602859 0.000197656756062 +GCGGAT 0.000120246893012 5.16838173588e-05 +GCACAT 0.000142902104739 0.000250694386819 +CTGCGG 0.000498414657992 0.000167554316984 +CTGCGA 0.0001920464871 7.97157182991e-05 +CTGCGC 0.000478199238297 0.00013864641914 +CGGCAA 0.000112927516916 5.12060008655e-05 +ACCGGT 5.50695915823e-05 4.12514905884e-05 +CGGCAG 0.000468440070168 0.000141194773771 +GACGCG 0.000100728556755 4.71445606724e-05 +CCTCCT 0.000697083437751 0.0005991818826 +TGTTGA 7.66791781526e-06 0.00031790724021 +CGGCAT 9.61975144096e-05 4.46758421237e-05 +CCTCCC 0.000499460283148 0.000628089780445 +CCTCCA 0.000659789473831 0.000463083818091 +CACGCC 0.000168345650217 0.000103606542964 +CCTCCG 0.000188212528193 0.000152343825281 diff --git a/bin/cpat_model/Mouse_cutoff.txt b/bin/cpat_model/Mouse_cutoff.txt new file mode 100755 index 0000000..fc99925 --- /dev/null +++ b/bin/cpat_model/Mouse_cutoff.txt @@ -0,0 +1,2 @@ +Coding Probability Cutoff: 0.44 +Achieved Sensitivity and Specificity: 0.955 diff --git a/bin/cpat_model/Mouse_logitModel.RData b/bin/cpat_model/Mouse_logitModel.RData new file mode 100755 index 0000000..a0c06c1 Binary files /dev/null and b/bin/cpat_model/Mouse_logitModel.RData differ diff --git a/bin/cpat_model/Zebrafish_logitModel.RData b/bin/cpat_model/Zebrafish_logitModel.RData new file mode 100755 index 0000000..ea1ef0a Binary files /dev/null and b/bin/cpat_model/Zebrafish_logitModel.RData differ diff --git a/bin/cpat_model/fly_Hexamer.tsv b/bin/cpat_model/fly_Hexamer.tsv new file mode 100755 index 0000000..c619f8f --- /dev/null +++ b/bin/cpat_model/fly_Hexamer.tsv @@ -0,0 +1,4097 @@ +hexamer coding noncoding +GAACGT 0.000237052563287 0.000151040883142 +CTTCTT 0.000113847612631 0.000291206822698 +CACCCT 3.58697957605e-05 0.000114186907655 +GAACGG 0.000243290788636 0.000157082518468 +GAACGC 0.000414841985752 0.000160103336131 +GAACGA 0.000193384985839 0.000236227941234 +CACCCA 9.045426757e-05 0.000277311061449 +CTTCTA 8.10969295455e-05 0.000186686531564 +CACCCC 8.57755985577e-05 0.000123853524177 +CTTCTC 0.000149717408392 0.000177019915043 +CACCCG 0.000113847612631 0.000141978430154 +CTTCTG 0.000325947274519 0.000206623928138 +CGTGTG 0.000269803246372 0.000222936343518 +TAAGGT 0.0 0.000166144971456 +CGTGTC 0.000131002732343 0.000120832706514 +CGTGTA 5.14653591346e-05 0.000140770103088 +GGAAAT 0.00041016331674 0.000407206220951 +TAAGGG 0.0 0.000122041033579 +CGTGTT 9.35733802448e-05 0.000244082067158 +TAAGGC 0.0 0.000167957462054 +TAAGGA 0.0 0.000198769802215 +TCACTG 0.000305673042133 0.000161311663196 +GTCAAA 0.000268243690035 0.00033410243351 +CCCGCT 0.00027760102806 0.000122041033579 +GTCAAG 0.000878030217964 0.000179436569173 +CTGTCC 0.000581714513855 0.000142582593686 +TCAGAG 0.000232373894275 0.000164936644391 +CTGTCA 9.98116055944e-05 0.000177019915043 +CTGTCG 0.000374293520979 0.000116603561786 +GTATCT 7.95373732081e-05 0.000196957311617 +TCAGAA 0.000107609387282 0.000267040281395 +GTCAAT 0.000350900175918 0.000204207274008 +GTATCA 7.95373732081e-05 0.000119020215916 +GTATCC 0.000227695225262 0.00012929099597 +GTATCG 0.000115407168969 0.000103916127602 +CTGTCT 0.000118526281643 0.000166749134989 +GGTGTC 0.000252648126661 0.000105728618199 +GGTGTA 8.73351548951e-05 0.000128082668904 +GGTGTG 0.000350900175918 0.000155874191403 +TATCCT 0.00013256228868 0.00019756147515 +CCGGGG 4.21080211102e-05 8.33745674944e-05 +TTCTGT 0.000127883619668 0.000343164886499 +ATTCCT 0.00013256228868 0.000198769802215 +CCGGGC 0.000389889084353 7.85412592339e-05 +CCGGGA 0.000262005464685 8.94162028201e-05 +TATCCA 0.000237052563287 0.00024468623069 +TATCCC 0.000330625943532 0.000153457537272 +GTTCTG 0.000341542837893 0.000169165789119 +TACACC 0.000458509563199 0.000198769802215 +TATCCG 0.000343102394231 0.000133520140698 +GGTGTT 0.000207420992876 0.000163124153793 +ATTCCG 0.000354019288593 0.000218103035257 +TTCTGG 0.000291637035096 0.000227165488246 +CCGGGT 0.000233933450612 0.000100291146406 +ATTCCC 0.00040392509139 0.000273081916721 +TTCTGC 0.00036181707028 0.000227769651778 +ATTCCA 0.000233933450612 0.00035283150302 +GTTCTA 9.35733802448e-05 0.00019997812928 +TGCACT 0.000137240957692 0.00023985292243 +TATCTT 9.35733802448e-05 0.000301477602752 +CGCGGT 0.000163753415428 0.000126874341839 +CCCGCG 0.000120085837981 5.07497367357e-05 +CGCGGG 5.14653591346e-05 7.85412592339e-05 +CGCGGC 0.000433556661801 8.8207875755e-05 +CGCGGA 0.000226135668925 9.66661652109e-05 +ACCTGT 0.000188706316827 0.000127478505372 +TCATGT 2.80720140734e-05 0.000157082518468 +CCCGCC 0.000519332260359 0.000114186907655 +TTACAG 9.045426757e-05 0.000198165638682 +CTCCGC 0.000146598295717 0.000132915977165 +TTACAA 3.89889084353e-05 0.000474268373066 +TTACAC 3.89889084353e-05 0.00023985292243 +CTCCGG 9.8252049257e-05 0.000127478505372 +TCATGC 7.01800351836e-05 0.000138957612491 +ACCTGG 0.000244850344974 0.000144395084284 +GGCGAA 0.000550523387107 0.000218103035257 +GTAAGC 8.57755985577e-05 0.000196957311617 +TCATGG 5.14653591346e-05 0.000127478505372 +ACCTGC 0.000388329528016 0.000122041033579 +ACCTGA 1.55955633741e-06 0.000155874191403 +GTATTT 0.000124764506993 0.000544351342844 +CTCGAT 0.000308792154808 0.000163124153793 +CTCCGT 5.3024915472e-05 0.000121436870046 +GCCTGA 9.35733802448e-06 0.00011297858059 +GCCTGC 0.000483462464598 0.000148624229012 +GCCTGG 0.000322828161845 0.000115999398253 +TCTTGC 5.77035844843e-05 0.000154665864337 +TAAAGG 0.0 0.000246498721288 +CAAATT 0.000252648126661 0.000769704340492 +TAAAGA 0.0 0.000453726812959 +GCATAA 4.67866901224e-06 0.000297248458024 +AAGGTC 0.000548963830769 0.000158895009065 +TATTAG 1.24764506993e-05 0.000236227941234 +GCCTGT 0.000204301880201 0.000129895159502 +TATCTA 0.000124764506993 0.000282748533242 +TAAAGT 0.0 0.000465205920078 +CAAATA 0.000182468091477 0.000723183748484 +CAAATC 0.000308792154808 0.000494205769641 +CGTGCT 0.000141919626705 0.000128082668904 +CAAATG 0.000511534478672 0.000477893354262 +AATACT 0.000154396077404 0.000408414548016 +TATCTC 0.000121645394318 0.000186686531564 +CCGGCG 0.000399246422378 0.00010814527233 +GGCATG 0.000703359908173 0.000156478354935 +GGCATA 0.000227695225262 0.000174603260912 +GGCGAC 0.0006752878941 0.00011056192646 +GGCATC 0.000863994210927 0.000164332480859 +AATACG 0.000246409901311 0.000226561324713 +AATACA 0.000151276964729 0.000648871633978 +AATACC 0.000288517922421 0.000238644595364 +GGCATT 0.000606667415254 0.000281540206177 +TATCTG 0.000575476288505 0.000249519538951 +TGCACA 0.000118526281643 0.000268852771993 +GAGCAC 0.000851517760228 0.000185478204498 +ACTGAT 0.000263565021023 0.00026824860846 +GAGCAG 0.00183871692181 0.000291206822698 +AGTAGT 0.000145038739379 0.000175811587977 +TCTCGC 0.000110728499956 0.000138957612491 +TCTCGA 7.48587041958e-05 0.000163728317326 +TCTCGG 4.83462464598e-05 0.000107541108797 +ACTGAG 0.000279160584397 0.000185478204498 +AGTAGA 6.39418098339e-05 0.000193936493954 +TTTGGT 0.000285398809747 0.000378206371388 +AGTAGG 3.89889084353e-05 0.000109957762927 +ACTGAA 0.00027136280271 0.000378206371388 +AAGGTT 0.000333745056206 0.000202998946943 +GGGTCA 4.21080211102e-05 9.78744922761e-05 +GTGTCG 0.000290077478759 9.90828193412e-05 +TCGAAC 0.000293196591434 0.000208436418736 +TCGAAA 0.000198063654851 0.000541330525181 +GTGTCC 0.000491260246285 0.000149832556077 +CGCACG 0.000205861436539 9.48536746132e-05 +GTGTCA 9.51329365822e-05 0.000157082518468 +GTTGTA 7.95373732081e-05 0.000243477903625 +CTTGCT 8.26564858829e-05 0.000181249059771 +TCGGTC 0.000149717408392 0.000103916127602 +GTGTCT 9.8252049257e-05 0.000161311663196 +TTGAAG 0.000463188232212 0.000301477602752 +TCGAAT 0.000371174408304 0.000387268824376 +CTTGCG 6.39418098339e-05 0.000106332781732 +CTTGCA 7.95373732081e-05 0.000235623777702 +CTTGCC 0.000185587204152 0.00013654095836 +CGAAAG 0.000380531746329 0.000369748081932 +CGAAAA 0.000201182767526 0.000709892150768 +CGAAAC 0.00018090853514 0.000360685628943 +AATCAT 0.000207420992876 0.000465205920078 +AAATGC 0.000205861436539 0.000538309707518 +CACGGC 0.000357138401268 0.000128082668904 +AAATGA 2.49529013986e-05 0.000671829848216 +ACAGTC 0.000115407168969 0.000161311663196 +CACGGG 5.45844718095e-05 7.24996239082e-05 +ACAGTA 7.1739591521e-05 0.000161915826728 +TAGAGC 0.0 0.000198165638682 +CGAAAT 0.000210540105551 0.000544351342844 +ACAGTT 0.000168432084441 0.000291206822698 +AATCAG 0.000611346084266 0.000332289942913 +CACGGT 0.000115407168969 0.000119624379449 +AAATGT 0.000110728499956 0.000769704340492 +AATCAC 0.000322828161845 0.000289998495633 +TTTGTA 0.000135681401355 0.000694788062454 +GCTCAA 0.000263565021023 0.000194540657487 +TCTAGT 0.000107609387282 0.000196353148085 +TTGCCG 0.000324387718182 0.000246498721288 +ATACGT 7.48587041958e-05 0.00017641575151 +GGGTAT 3.11911267483e-05 0.000117207725318 +ATACGA 0.000107609387282 0.000244082067158 +TCTAGG 4.05484647727e-05 0.000123249360644 +ATACGC 0.000218337887238 0.000152249210207 +TCTAGA 6.08226971591e-05 0.000146207574882 +GGGTAG 0.0 7.31037874408e-05 +TCTAGC 0.000121645394318 0.000126874341839 +ATACGG 6.86204788462e-05 0.000143790920751 +TGACTA 0.0 0.000185478204498 +TTTGTG 0.000656573218051 0.000520788965074 +TGGGCC 0.000233933450612 0.000143790920751 +AGATGT 3.27506830857e-05 0.00022354050705 +TCGCCC 0.000595750520892 0.000148624229012 +TCGCCA 0.000411722873077 0.000196957311617 +GGGTCG 3.11911267483e-05 6.34371709197e-05 +TCGCCG 0.000558321168794 0.00015285337374 +GGTAAT 0.000151276964729 0.000196353148085 +ACCCAT 0.000210540105551 0.000190311512759 +GTCAAC 0.000492819802623 0.000199373965748 +ACCCAC 0.000339983281556 0.000241061249495 +GGTAAC 0.000199623211189 9.18328569504e-05 +ACCCAA 0.000247969457649 0.000317185854598 +AGATGG 6.08226971591e-05 0.000241665413027 +ACCCAG 0.000527130042046 0.000184269877433 +AGATGA 4.67866901224e-06 0.000253144520146 +TCGCCT 0.000159074746416 0.000159499172598 +AGATGC 4.83462464598e-05 0.000186686531564 +TTTGAG 0.000541166049082 0.000292415149763 +CCCCAT 0.000166872528103 0.000158290845533 +TGGTCT 4.36675774476e-05 0.000161915826728 +TTGGGT 0.000333745056206 0.000211457236399 +CACTAG 1.55955633741e-05 0.000131103486567 +CACTAA 9.35733802448e-06 0.000203603110476 +CACTAC 0.000378972189991 0.000140770103088 +CCCCAA 0.000202742323864 0.000241061249495 +CCCCAC 0.000168432084441 0.000170374116184 +CCCCAG 0.00041016331674 0.00013654095836 +CACTAT 0.0002167783309 0.000154665864337 +CTATCG 0.000182468091477 0.000132915977165 +TCGGTT 0.00017467030979 0.000270665262591 +TTGGGA 0.000274481915385 0.000256165337809 +TTGGGC 0.000516213147684 0.000179436569173 +CTCGAG 0.000408603760402 0.000113582744123 +TAGTGG 0.0 0.000166749134989 +TTTCTG 0.000408603760402 0.000392102132637 +TGCAGC 0.000329066387194 0.000292415149763 +TGCCTC 0.000185587204152 0.00013412430423 +TGCCTA 7.48587041958e-05 0.000140770103088 +TGCAGG 5.61440281469e-05 0.000170978279717 +TTTCCG 0.0002167783309 0.000254352847211 +GTGGCC 0.00165936794301 0.00019997812928 +TCAGAC 0.000134121845018 0.000151040883142 +TTTCTT 9.98116055944e-05 0.000676663156477 +TGCCTT 0.000106049830944 0.00021870719879 +TGCAGT 0.000201182767526 0.000299665112154 +TAGTGC 0.0 0.000160103336131 +GTGAGC 0.000343102394231 0.000137145121893 +AAGGAA 0.00100591383763 0.000436810234047 +AAGGAC 0.00111664233759 0.00020239478341 +AAGGAG 0.00232841761176 0.000273081916721 +ATATGC 0.000151276964729 0.000318998345196 +GTTCCA 0.00022301655625 0.000204207274008 +ATATGA 1.55955633741e-06 0.00052924725453 +TACGAA 0.000352459732255 0.000265227790798 +ATATGG 7.01800351836e-05 0.000288186005035 +AAGGAT 0.00123672817557 0.000251936193081 +CTATCA 8.88947112325e-05 0.000189103185694 +TGGTTT 0.00013880051403 0.000363102283074 +ATATGT 5.3024915472e-05 0.000607788513764 +GGTCCC 0.000274481915385 9.66661652109e-05 +GCCCAG 0.00113847612631 0.000196957311617 +CCACTG 0.000544285161757 0.000193332330422 +GCCCAA 0.000442913999825 0.000276706897916 +GGTCCG 0.000205861436539 6.70621521151e-05 +AGGTCT 3.43102394231e-05 0.000102707800537 +TGTGTC 9.66924929196e-05 0.000201790619878 +AGACCC 0.000116966725306 0.000122645197111 +AGACCA 7.95373732081e-05 0.000191519839824 +AGACCG 7.32991478584e-05 0.000130499323035 +CTTCGC 0.000176229866128 0.000133520140698 +ATGCAA 0.000349340619581 0.000432581089319 +AGGTCC 8.42160422203e-05 0.000113582744123 +AGGTCA 3.89889084353e-05 0.000117811888851 +AGGTCG 5.61440281469e-05 0.000118416052383 +GGTCCT 0.000193384985839 0.000100895309939 +TCCGGA 0.000378972189991 0.000165540807924 +CTTAGC 0.000182468091477 0.000142582593686 +ATTGTG 0.000720515027885 0.00035766481128 +CTTAGA 3.74293520979e-05 0.000217498871725 +ATGCAC 0.000425758880114 0.000198165638682 +CTTAGG 4.05484647727e-05 9.60620016784e-05 +ATTGTC 0.000377412633654 0.000241665413027 +ATTGTA 0.000149717408392 0.000506289040292 +CGCTAC 0.000433556661801 9.30411840155e-05 +ATTTTG 0.00031347082382 0.000793870881795 +CGCTAA 3.11911267483e-06 0.000137749285426 +ATTTTA 0.000121645394318 0.00114851487541 +CGCTAG 7.79778168707e-06 6.58538250499e-05 +ATTTTC 0.000357138401268 0.000788433410002 +GAGGTC 0.000460069119537 0.000115999398253 +TAGCAG 0.0 0.000163124153793 +GATACT 0.000134121845018 0.000155874191403 +TAGCAC 0.0 0.000168561625587 +TAGCAA 0.0 0.000337727414706 +GAGGTT 0.00035557884493 0.000117207725318 +ATTTTT 0.000141919626705 0.00154242949865 +GCCTTG 0.000458509563199 0.000166749134989 +GTGGGG 7.95373732081e-05 0.000104520291134 +GCCTTA 0.000101371161932 0.000127478505372 +TTGACA 0.000134121845018 0.000264623627265 +CGCTAT 0.000251088570324 0.000103916127602 +GATACG 0.000322828161845 0.000147415901947 +TAGCAT 0.0 0.000219311362322 +GATACC 0.000308792154808 0.000148020065479 +GATACA 0.000152836521066 0.000295435967426 +TGAGCT 0.0 0.000177624078575 +GTCACC 0.00053024915472 0.000115999398253 +GATCAT 0.000294756147771 0.000168561625587 +TGGCAC 0.000151276964729 0.000204207274008 +AAGACA 0.000330625943532 0.000289998495633 +AAGACC 0.000779778168707 0.000169165789119 +ACAATG 0.000254207682998 0.000291810986231 +AAGACG 0.000572357175831 0.000181853223303 +CCACTT 0.000191825429502 0.000281540206177 +TGAGCG 0.0 0.000129895159502 +GATCAC 0.000419520654764 0.000204207274008 +GATCAA 0.000388329528016 0.000266436117863 +AAGACT 0.000296315704108 0.000215082217594 +GAAGGG 7.48587041958e-05 0.000137145121893 +CTTCCA 0.000135681401355 0.000203603110476 +GCGGTA 8.73351548951e-05 0.000103311964069 +GCGGTG 0.00057859540118 0.000153457537272 +CGGTGT 3.74293520979e-05 0.000109353599395 +TCGTTA 5.14653591346e-05 0.000209644745801 +TCGTTC 0.000212099661888 0.000175811587977 +CTTCCC 0.00013880051403 0.000140770103088 +ACTTGT 5.92631408217e-05 0.000290602659165 +TTGGAG 0.000876470661626 0.000228373815311 +TGTGTG 0.000244850344974 0.000613225985557 +GCGGTT 0.000188706316827 0.000178228242108 +ACTTGC 0.00013256228868 0.000219915525855 +ATGTGA 9.35733802448e-06 0.000293623476828 +ACTTGA 4.67866901224e-06 0.000288790168568 +CGGTGG 4.36675774476e-05 0.000185478204498 +ACTTGG 9.51329365822e-05 0.000225957161181 +CGGTGA 0.0 0.000103311964069 +CGGTGC 9.045426757e-05 0.00013654095836 +TGCCGC 0.000269803246372 0.000183665713901 +TTCCTA 0.000169991640778 0.000212061399931 +TAGACT 0.0 0.00011297858059 +TTCCTC 0.000489700689948 0.000209644745801 +ATGTGC 0.000305673042133 0.000241665413027 +TGGGTT 8.10969295455e-05 0.000204811437541 +TTCCTG 0.000924816908086 0.000194540657487 +TCCTTT 0.000260445908348 0.000306310911012 +GACCCA 0.000106049830944 0.000169165789119 +TTCCGT 0.000233933450612 0.000180040732705 +TAGACG 0.0 9.60620016784e-05 +TTCCTT 0.0002292547816 0.000285165187372 +TAGACC 0.0 0.000100291146406 +TAGACA 0.0 0.000170374116184 +TCCTTG 0.000280720140734 0.000155874191403 +TCCTTA 6.55013661714e-05 0.00015285337374 +TCCTTC 0.000375853077317 0.000183061550368 +ATCTCT 0.00013880051403 0.00019997812928 +TCCGCA 0.0002167783309 0.000154665864337 +ACCGTT 0.000215218774563 0.000151040883142 +TGGCGC 0.000257326795673 0.000160707499663 +TTTACG 0.000201182767526 0.000226561324713 +TGGCGA 0.000102930718269 0.000160103336131 +GGCCAT 0.000208980549213 0.000203603110476 +ATCTCG 0.000399246422378 0.000140770103088 +ATCTCA 0.000168432084441 0.000212061399931 +ATCTCC 0.000350900175918 0.000164936644391 +GGCCAA 0.000286958366084 0.00030812340161 +GGCCAC 0.000364936182955 0.000177019915043 +ACCGTG 0.000456950006862 0.000102103637004 +ACCGTA 0.000121645394318 8.21662404293e-05 +GGCCAG 0.000715836358873 0.000213873890529 +ACCGTC 0.000279160584397 8.3978731027e-05 +AGAATT 8.88947112325e-05 0.000425331126928 +GGCAGT 0.000556761612456 0.000250123702483 +CCATAA 3.11911267483e-06 0.000270665262591 +CAAAGG 0.000149717408392 0.000251332029548 +CCATAC 0.000121645394318 0.000227769651778 +CAAAGA 0.000116966725306 0.000497226587304 +GAAACG 0.000463188232212 0.00028456102384 +CAAAGC 0.000210540105551 0.000406602057418 +CAACGT 0.000135681401355 0.00017641575151 +GGCAGA 0.000151276964729 0.000211457236399 +GGCAGC 0.00105270052775 0.000297852621556 +CGTGAG 0.000308792154808 0.000101499473471 +GGCAGG 0.000219897443575 0.000132915977165 +CAACGC 0.000238612119624 0.000184874040966 +CAAAGT 0.00022301655625 0.000405997893886 +CAACGA 0.000115407168969 0.000375185553725 +CTAACC 0.000237052563287 0.000157082518468 +CAACGG 0.000160634302754 0.000205415601073 +CGTGAA 0.000152836521066 0.000177019915043 +GGGGGT 5.3024915472e-05 4.95414096706e-05 +TACGCT 0.000137240957692 8.51870580921e-05 +GAAACC 0.000405484647727 0.000307519238077 +CGATCT 0.000124764506993 0.000159499172598 +CTCTAG 3.11911267483e-06 0.000111770253525 +TGAGCC 0.0 0.000138353448958 +CTCTAA 1.55955633741e-05 0.000248915375418 +CTCTAC 0.000417961098427 0.000126270178307 +GGGGGG 1.55955633741e-05 8.03537498316e-05 +CGATCC 0.000262005464685 0.000182457386836 +CGATCA 7.64182605332e-05 0.000213873890529 +GGGGGC 0.000127883619668 6.76663156477e-05 +CGATCG 0.000199623211189 0.000168561625587 +GGGGGA 7.1739591521e-05 0.000123249360644 +TACGCG 0.000165312971766 7.31037874408e-05 +CTCTAT 0.000193384985839 0.000163124153793 +TTCTGA 1.71551197115e-05 0.000252540356614 +CAGACC 0.000438235330813 0.000123853524177 +CAGACA 0.000257326795673 0.000260394482537 +CAGACG 0.000509974922334 0.000171582443249 +TGCGTG 0.000302553929458 0.000202998946943 +TAGTTG 0.0 0.000278519388514 +GCTAAT 0.000188706316827 0.000252540356614 +TAGTTC 0.0 0.000224748834115 +TAGTTA 0.0 0.000319602508729 +CAGACT 0.000235493006949 0.000142582593686 +GCTAAG 0.00026512457736 0.000154061700805 +TAGTTT 0.0 0.000544351342844 +GCTAAC 0.000169991640778 0.000155874191403 +GCTAAA 0.000121645394318 0.000317185854598 +TAAGTG 0.0 0.000341352395901 +TGCTGC 0.000260445908348 0.000323227489924 +AACCCT 5.45844718095e-05 0.000169769952652 +TATCAA 0.000185587204152 0.000435601906982 +AACCCC 0.000179348978803 0.000165540807924 +AACCCA 0.000154396077404 0.000293623476828 +AACCCG 0.000146598295717 0.000139561776023 +TGCATC 0.000385210415341 0.000206019764606 +AGATTT 7.32991478584e-05 0.000441643542307 +ACGTAA 1.09168943619e-05 0.000195748984552 +AACTGG 0.000318149492832 0.000272477753188 +AACTGA 2.49529013986e-05 0.000410227038614 +AACTGC 0.000447592668838 0.000319602508729 +CCCAGA 0.000140360070367 0.000195748984552 +CCCAGC 0.000647215880026 0.000205415601073 +CCCAGG 0.000190265873164 0.000101499473471 +AACTGT 0.000176229866128 0.000286373514437 +AGTTTT 0.00018714676049 0.00073164203794 +ACGTAT 0.000101371161932 0.000209040582269 +CCCAGT 0.000405484647727 0.000180644896238 +CTCCAA 0.000212099661888 0.000225957161181 +TTGACG 0.000145038739379 0.000128686832437 +TATAAT 0.000166872528103 0.000675454829411 +CGCATA 0.000151276964729 0.000198165638682 +ATACTG 0.000288517922421 0.000216894708192 +CGCATC 0.000658132774388 0.00017641575151 +ATACTA 8.88947112325e-05 0.000232602960039 +ATACTC 0.000196504098514 0.000185478204498 +CGCATG 0.000486581577273 0.000100895309939 +GCGCAA 0.000210540105551 0.000196353148085 +ATACTT 9.20138239074e-05 0.000381831352583 +CGCATT 0.000453830894187 0.000209040582269 +GCGCAC 0.000212099661888 0.000122041033579 +GCTCCG 0.000330625943532 0.000147415901947 +TATTGT 5.61440281469e-05 0.000584830299526 +GCTCCC 0.00041016331674 9.84786558086e-05 +GCTCCA 0.000442913999825 0.000203603110476 +CCTACT 5.92631408217e-05 0.000118416052383 +TGTACT 3.11911267483e-05 0.000236832104767 +TTTAGT 0.000121645394318 0.000547976324039 +TATTGG 8.42160422203e-05 0.000228373815311 +GCTCCT 0.000369614851967 0.000153457537272 +GCGCAG 0.000435116218138 0.000169165789119 +GCTGGG 0.000107609387282 0.000128686832437 +TTTAGG 6.39418098339e-05 0.000229582142376 +TGTACG 4.21080211102e-05 0.000154061700805 +CCTACG 9.66924929196e-05 7.61246051036e-05 +TGTACA 3.89889084353e-05 0.000358873138346 +CCTACA 8.10969295455e-05 0.000140165939556 +TGTACC 4.67866901224e-05 0.00015043671961 +CCTACC 8.42160422203e-05 9.18328569504e-05 +TTCGCG 9.51329365822e-05 0.000155874191403 +TTCGCA 0.000140360070367 0.000231998796506 +TTCGCC 0.000645656323689 0.000192124003357 +AGTATC 0.000112288056294 0.000163124153793 +GACTAA 9.35733802448e-06 0.000160707499663 +AGTATA 7.1739591521e-05 0.000281540206177 +GACTAC 0.000786016394056 0.000141374266621 +AGTATG 9.045426757e-05 0.000165540807924 +TTTGGA 0.000330625943532 0.000343164886499 +GACTAG 1.55955633741e-05 9.84786558086e-05 +TTCGCT 0.000165312971766 0.000253144520146 +GTGTAT 0.000124764506993 0.000304498420414 +GACTAT 0.000396127309703 0.000164936644391 +AGTATT 0.000124764506993 0.000389081314974 +TTACTC 5.45844718095e-05 0.000181249059771 +GGTCTT 0.000163753415428 0.000124457687709 +TTACTG 8.26564858829e-05 0.000183665713901 +ACCTTG 0.00027136280271 0.000132311813632 +ACCTTC 0.000601988746241 0.000131103486567 +ACCTTA 8.26564858829e-05 0.000172790770315 +TTACTT 7.1739591521e-05 0.000402372912691 +ACCTTT 0.000324387718182 0.000254957010744 +ACAGGG 3.43102394231e-05 8.63953851573e-05 +CTGGGA 0.00053024915472 0.000149832556077 +CTGGGC 0.00118994148545 0.000130499323035 +ACAGGC 0.000134121845018 0.000116603561786 +ACAGGA 0.000194944542177 0.000196353148085 +CTGGGG 0.000116966725306 8.45828945596e-05 +CAGGAC 0.000737670147596 0.000184269877433 +GCAGCT 0.000561440281469 0.000306310911012 +CAGGAA 0.000659692330726 0.000202998946943 +TCGCTT 0.000157515190079 0.000277915224981 +AGGACC 9.8252049257e-05 0.000105728618199 +AGGACA 8.73351548951e-05 0.000233811287104 +GCGCAT 0.000121645394318 0.000130499323035 +TTATAC 3.89889084353e-05 0.000339539905303 +ACAGGT 0.000104490274607 0.00011297858059 +CTGGGT 0.00040392509139 0.000140165939556 +TTATAG 9.35733802448e-06 0.000354643993618 +CTCCGA 5.92631408217e-05 0.000139561776023 +TCGCTG 0.00105114097142 0.000214478054062 +GAGTTG 0.000541166049082 0.00019756147515 +GCAGCG 0.000461628675874 0.000325644144054 +AGGACT 7.32991478584e-05 0.000167957462054 +GCAGCA 0.00080161195743 0.000691767244791 +GCAGCC 0.00066904966875 0.000244082067158 +CAGGAT 0.000832803084179 0.000191519839824 +TGCTGA 7.79778168707e-06 0.000262811136667 +CCTCAG 0.00018090853514 0.000115395234721 +CCTCAA 0.00013256228868 0.000156478354935 +CCTCAC 6.70609225088e-05 0.000126270178307 +TATGTT 0.000135681401355 0.000420497818668 +ATGGGC 0.000679966563112 0.000141374266621 +TCTTTG 0.000135681401355 0.000317790018131 +ATGGGA 0.000290077478759 0.000190311512759 +TCTTTA 6.55013661714e-05 0.000364914773671 +ATGGGG 7.32991478584e-05 0.000135332631295 +TCTTTC 7.1739591521e-05 0.000303894256882 +CCTCAT 7.01800351836e-05 0.000146811738414 +TCTTTT 7.1739591521e-05 0.000581205318331 +GCTAGC 0.000198063654851 0.00015285337374 +TATGTA 0.000131002732343 0.000734058692071 +ATGGGT 0.000296315704108 0.000139561776023 +TATGTC 0.000179348978803 0.000148624229012 +GAAAGA 0.000140360070367 0.000370352245464 +ATCAAG 0.00128663397837 0.000331081615847 +CATGAT 0.000145038739379 0.00017399909738 +ATCAAC 0.000860875098252 0.000347998194759 +GAAAGG 0.000212099661888 0.00024710288482 +ATCAAA 0.000441354443488 0.00070324635191 +GGAACT 0.000246409901311 0.000203603110476 +GTCAGG 7.95373732081e-05 8.03537498316e-05 +CATGAG 0.000247969457649 0.000107541108797 +ATCAAT 0.00057859540118 0.000453726812959 +GTCCAA 0.000169991640778 0.000212665563464 +CATGAC 0.000176229866128 0.00013654095836 +CATGAA 0.000188706316827 0.000213873890529 +TGACCT 0.0 0.000123853524177 +GTAGCG 6.23822534965e-05 0.000105728618199 +CAAGTT 0.000154396077404 0.000315977527533 +GCGGCT 0.000385210415341 0.000149228392544 +GCTTTA 9.35733802448e-05 0.000261602809602 +GCTTTC 0.000212099661888 0.000230790469441 +TTTCTC 0.000191825429502 0.000324435816989 +GCGGCG 0.000564559394144 0.000167353298521 +CAAGTG 0.000407044204065 0.000331081615847 +GTAGCT 0.000116966725306 0.000181249059771 +TTCTTT 0.000433556661801 0.000645850816316 +GCGGCC 0.000848398647553 0.000105124454667 +CAAGTC 0.00013880051403 0.000267040281395 +GCGGCA 0.000513094035009 0.000235623777702 +CAAGTA 0.000113847612631 0.000243477903625 +AACGGT 0.000297875260446 0.000178228242108 +GGGGAA 0.000131002732343 0.000165540807924 +TAATCG 0.0 0.000181249059771 +CTTGTG 0.000157515190079 0.00022354050705 +TAATCA 0.0 0.0003951229503 +TAATCC 0.0 0.000180644896238 +TGCGAA 0.00026512457736 0.000275498570851 +CGGCCC 0.000127883619668 7.9749586299e-05 +AAAATT 0.00027760102806 0.0013448680235 +TTGATA 0.000106049830944 0.000381227189051 +CGGCCG 0.000129443176005 8.51870580921e-05 +CTTGTA 4.83462464598e-05 0.000239248758897 +GGTCTG 0.000520891816696 9.18328569504e-05 +TAATCT 0.0 0.000250123702483 +AGTTTA 9.8252049257e-05 0.000479705844859 +TGCGAC 0.000304113485796 0.000146811738414 +GGTCTC 0.000184027647815 8.8207875755e-05 +GGAGGC 0.000467866901224 0.000164936644391 +GGTCTA 0.000118526281643 0.000135332631295 +AAAATG 0.000481902908261 0.00091047444358 +AAAATC 0.000375853077317 0.000839183146737 +TCAATA 8.88947112325e-05 0.000360685628943 +AAAATA 0.000249529013986 0.00158653343652 +TCGTCA 0.00012632406333 0.000145603411349 +ATCCCT 4.36675774476e-05 0.000128082668904 +TTCAGG 7.48587041958e-05 0.000145603411349 +ATATCC 0.000262005464685 0.000245894557755 +TCGTCG 0.000380531746329 0.00017399909738 +TTTAGC 0.00018090853514 0.000305102583947 +TGCCAA 0.000152836521066 0.000398143767963 +TCGGGA 0.000343102394231 0.000128686832437 +GAACAA 0.000333745056206 0.000413852019809 +ATCCCG 0.00012632406333 0.000144395084284 +GAACAC 0.0002292547816 0.000304498420414 +ATCCCC 0.000123204950656 0.000158895009065 +GAACAG 0.000659692330726 0.000236227941234 +ATCCCA 9.51329365822e-05 0.000254957010744 +AACGGA 0.000586393182867 0.000293019313296 +CACCAC 0.000456950006862 0.00028697767797 +CACCAA 0.000218337887238 0.00035766481128 +CACCAG 0.000676847450437 0.000230790469441 +AACATT 0.00050061758431 0.000602351041971 +AACGGG 0.000143479183042 0.00011297858059 +TCCGTC 0.000168432084441 0.000106332781732 +GAGTTA 0.000112288056294 0.000189103185694 +TCCGTA 0.000124764506993 0.000146811738414 +ACCAGA 0.000116966725306 0.000274290243786 +CACCAT 0.00031347082382 0.000189103185694 +AACATG 0.000623822534965 0.000279727715579 +AACATA 0.000333745056206 0.000529851418062 +AACATC 0.000743908372946 0.000292415149763 +CAGTCG 0.000555202056119 0.00019756147515 +GCACGG 5.61440281469e-05 9.66661652109e-05 +GTCAGA 6.70609225088e-05 0.000140770103088 +GCACGA 0.000102930718269 0.000121436870046 +CAGTCC 0.000346221506906 0.000129895159502 +GCACGC 0.000163753415428 0.000118416052383 +CAGTCA 0.000176229866128 0.000193936493954 +GCACCA 0.000391448640691 0.000235019614169 +GCAAGC 0.00013880051403 0.000227165488246 +GCACGT 9.51329365822e-05 0.000117207725318 +CAGTCT 0.000127883619668 0.000144395084284 +TAAAGC 0.0 0.000342560722966 +GCAAGG 9.8252049257e-05 0.000164332480859 +GTCAGT 0.000205861436539 0.000143790920751 +GTATAC 9.66924929196e-05 0.000186686531564 +TTCCCG 0.000196504098514 0.000172790770315 +GTATAA 3.11911267483e-06 0.000358873138346 +GTATAG 7.79778168707e-06 0.000149228392544 +CCGGAA 0.000481902908261 0.000143790920751 +CCGGAC 0.000417961098427 0.000125666014774 +CCGGAG 0.000901423563025 0.000137749285426 +CGTCTT 9.8252049257e-05 0.000118416052383 +TATTTG 0.00027760102806 0.000593892752515 +TTCCGG 0.000268243690035 0.00012929099597 +TTCCGA 0.00017467030979 0.000228373815311 +TTCCGC 0.000464747788549 0.000156478354935 +GTATAT 8.10969295455e-05 0.00053408056279 +CGTCTC 0.000162193859091 8.33745674944e-05 +CGTCTA 9.8252049257e-05 0.000100291146406 +CGTCTG 0.000427318436451 0.00015043671961 +CCGGAT 0.000509974922334 0.000126874341839 +CTCTTC 0.000587952739205 0.000206019764606 +TGTTAT 4.21080211102e-05 0.000385456333779 +CTCTTA 4.83462464598e-05 0.000183665713901 +CTCTTG 0.000152836521066 0.000193936493954 +CCCGAT 0.000511534478672 0.000160103336131 +TACGTA 8.26564858829e-05 0.000155874191403 +TACGTC 0.000166872528103 8.45828945596e-05 +TCTGGT 0.000196504098514 0.00017641575151 +CGCGAT 0.000499058027972 0.00011056192646 +TACGTG 0.00040392509139 0.000118416052383 +TGTTAG 7.79778168707e-06 0.000216894708192 +GGTTTC 0.000302553929458 0.000193332330422 +CTCGTG 0.000169991640778 9.36453475481e-05 +CTCTTT 0.000249529013986 0.00033410243351 +TGTTAA 4.67866901224e-06 0.000487559970783 +CGCGAA 0.000408603760402 0.000139561776023 +CGCGAC 0.000447592668838 8.21662404293e-05 +GATATT 0.000439794887151 0.000369748081932 +TCTGGC 0.000308792154808 0.000175811587977 +TACGTT 0.000155955633741 0.000192728166889 +CGCGAG 0.000868672879939 9.48536746132e-05 +AGTCGT 0.000109168943619 0.000128082668904 +TTACCT 6.55013661714e-05 0.000201790619878 +CTCCAT 0.000143479183042 0.000180040732705 +TATGCC 0.000514653591346 0.000177019915043 +TTACCG 4.21080211102e-05 0.000109957762927 +CTCCAC 0.000188706316827 0.000206019764606 +AGTCGG 9.51329365822e-05 0.000158290845533 +TTACCC 8.57755985577e-05 0.000151645046675 +AGTCGA 0.000134121845018 0.000228373815311 +TTACCA 9.98116055944e-05 0.000271873589656 +AGTCGC 0.000291637035096 0.000165540807924 +GTACCG 9.8252049257e-05 7.9749586299e-05 +CCGACT 0.000118526281643 0.00013654095836 +GCCTAC 0.000653454105376 0.000101499473471 +GCCTAA 1.55955633741e-05 0.000162519990261 +GCCTAG 1.24764506993e-05 7.9749586299e-05 +GTGCGG 0.000241731232299 0.000135936794828 +CCGACA 0.000137240957692 0.000138353448958 +GATATC 0.00040392509139 0.000225352997648 +CCGACC 0.000184027647815 0.000148020065479 +CCGACG 0.0002167783309 9.90828193412e-05 +GCCTAT 0.000428877992789 0.000106332781732 +TGACGG 0.0 9.06245298853e-05 +TGACGT 0.0 0.000109353599395 +CCCGAG 0.000639418098339 0.000107541108797 +CTCGTT 9.98116055944e-05 0.000177019915043 +CCCGAA 0.00041016331674 0.000189103185694 +CGCACA 0.000173110753453 0.000219915525855 +ATACAC 0.000120085837981 0.000425331126928 +CGCACC 0.000592631408217 0.000106936945265 +ATACAA 0.000141919626705 0.000624100929143 +ACTGCT 0.000205861436539 0.000177624078575 +ATACAG 0.000212099661888 0.000231998796506 +ATTGCC 0.000742348816609 0.000271269426123 +GGGTCC 7.48587041958e-05 5.79996991266e-05 +GAGCGT 0.000394567753366 0.000157686682 +CTCACT 0.000155955633741 0.000126874341839 +GGGTCT 3.11911267483e-05 9.12286934178e-05 +ACTGCA 0.000212099661888 0.000311144219273 +TTTGTC 0.000210540105551 0.000315373364001 +ACTGCC 0.000393008197028 0.000181853223303 +CGCACT 0.000213659218226 0.000145603411349 +ATACAT 5.92631408217e-05 0.000769704340492 +ACTGCG 0.000146598295717 0.000149832556077 +CTCACC 0.000506855809659 0.000146811738414 +CTCACA 0.000148157852054 0.000187290695096 +CTCACG 0.000166872528103 7.18954603756e-05 +GTGGGT 0.00045227133785 0.000128082668904 +GTGAGG 0.000123204950656 8.45828945596e-05 +GATTTT 0.000341542837893 0.0007104963143 +ACAATT 0.000171551197115 0.000602955205503 +CTTGAA 0.000162193859091 0.000341352395901 +TAGTGA 0.0 0.000206623928138 +GTGAGA 9.35733802448e-05 0.000160103336131 +TCGACG 0.000276041471722 0.000141978430154 +TCGACA 0.000134121845018 0.000159499172598 +TAGAGG 0.0 0.000104520291134 +ACAATA 0.000141919626705 0.000558247104093 +GTGAGT 0.000152836521066 0.000204811437541 +ACAATC 0.000163753415428 0.000279123552047 +GATTTC 0.000469426457561 0.000369748081932 +CCAGTA 0.000109168943619 0.000138353448958 +TCGACT 0.000123204950656 0.000189103185694 +CTGGTT 0.000322828161845 0.000226561324713 +TTGAGT 0.000155955633741 0.000256165337809 +TCAAAA 0.000159074746416 0.000878453776354 +CGAACG 0.000102930718269 0.000180644896238 +CGAACC 0.000127883619668 0.000160103336131 +CGAACA 9.045426757e-05 0.000264623627265 +CTGGTG 0.00118058414742 0.000160103336131 +CTGGTA 0.000160634302754 0.000103916127602 +CTGGTC 0.000731431922247 0.000157082518468 +CGAACT 8.10969295455e-05 0.000201186456345 +TTGCAT 0.000169991640778 0.000384852170246 +GACGAG 0.00152836521066 0.000180040732705 +TTGCAG 0.000506855809659 0.000320810835794 +GAGGGA 0.000439794887151 0.000161915826728 +TTGCAC 0.000218337887238 0.000253144520146 +TTGCAA 0.000296315704108 0.000461580938882 +TCCATC 0.000460069119537 0.000236227941234 +TCGCAT 0.000268243690035 0.000210853072866 +GTCGTG 0.000210540105551 0.000115999398253 +GTCGTA 7.01800351836e-05 0.000103916127602 +GTCGTC 0.000212099661888 0.000132915977165 +CCGTTT 0.000116966725306 0.00020239478341 +GAGCAA 0.000670609225088 0.000300269275686 +TTTTTG 0.000193384985839 0.00104339042075 +ATATCG 0.000230814337937 0.000242873740092 +GTCGTT 0.000129443176005 0.000141978430154 +TCAGAT 0.000185587204152 0.0002634153002 +TCGCAG 0.000709598133523 0.000183061550368 +GTGCTC 0.00058483362653 0.000172790770315 +TCGCAC 0.000338423725219 0.000142582593686 +CCGTTG 0.0002292547816 0.000139561776023 +CCGTTA 3.27506830857e-05 0.000114791071188 +CCGTTC 0.000205861436539 0.000112374417058 +GATCGG 0.000280720140734 0.000138957612491 +AATGCT 0.000338423725219 0.000334706597043 +GGTACG 8.42160422203e-05 7.12912968431e-05 +GGTACA 8.10969295455e-05 0.00011056192646 +TCCAGG 0.000145038739379 0.000128686832437 +GGTACC 0.00013880051403 5.43747179312e-05 +GCGGAC 0.00045227133785 0.000107541108797 +CAGGGA 0.000407044204065 0.000106936945265 +TTCAGC 0.000456950006862 0.000212061399931 +TATTTC 0.000218337887238 0.000492997442576 +TATTTA 0.000101371161932 0.00109897346574 +CCCCCT 5.14653591346e-05 0.000100895309939 +GGTACT 0.000106049830944 9.30411840155e-05 +AATGCG 0.000411722873077 0.000215686381127 +AATGCC 0.00104490274607 0.000250727866016 +AATGCA 0.000416401542089 0.000461580938882 +CCCCCC 9.045426757e-05 0.000148020065479 +CCCCCA 0.00012632406333 0.000132311813632 +TATTTT 0.000123204950656 0.00139018028844 +CCCCCG 0.000110728499956 7.91454227665e-05 +CTTGAG 0.0002292547816 0.00015527002787 +ATGATG 0.000592631408217 0.000267644444928 +GAAGTA 0.000171551197115 0.000207832255204 +ATGATA 0.000177789422465 0.000367331427802 +ATGATC 0.000489700689948 0.000163124153793 +TGCAAC 0.000439794887151 0.000301477602752 +TGCAAA 0.000177789422465 0.00057153870181 +TGCAAG 0.000575476288505 0.000244082067158 +GAAGTG 0.00063317987299 0.000262206973135 +ATGATT 0.000276041471722 0.000462185102415 +CTTGAT 0.000151276964729 0.000230186305909 +GTGCTT 0.000218337887238 0.000210853072866 +TTGAGG 0.000113847612631 0.000177624078575 +GCCATT 0.000806290626443 0.000297852621556 +TTGAGC 0.000304113485796 0.000202998946943 +TGCAAT 0.000280720140734 0.000418081164537 +GAGAAT 0.00076574216167 0.000271873589656 +TGTCTT 3.89889084353e-05 0.000231394632974 +TTCAGA 6.23822534965e-05 0.000254957010744 +AGTAGC 0.000227695225262 0.000182457386836 +AAGGCC 0.00100903295031 0.00017883240564 +ATAACC 0.000251088570324 0.000210853072866 +AAGGCA 0.000474105126574 0.000299060948621 +ATAACA 0.000146598295717 0.000477289190729 +AAGGCG 0.000712717246198 0.000215686381127 +ATAACG 0.000141919626705 0.000187894858629 +TATGCT 0.000137240957692 0.000216290544659 +GAGGGT 0.000492819802623 9.42495110807e-05 +TGTCTG 0.000194944542177 0.000208436418736 +GAGAAG 0.0020258636823 0.000281540206177 +GAGAAA 0.000619143865953 0.000504476549695 +TGTCTC 6.08226971591e-05 0.000144999247816 +GAGAAC 0.0011961797108 0.00019997812928 +TGTCTA 4.99058027972e-05 0.000188499022161 +TGTGGC 0.000221456999913 0.000210248909334 +TGTGGA 0.000118526281643 0.000213873890529 +TGTGGG 3.89889084353e-05 0.000157686682 +ATAACT 0.000102930718269 0.000344373213564 +ACTGAC 0.000154396077404 0.000137145121893 +GGTATT 0.000176229866128 0.00019756147515 +AGGTAT 6.70609225088e-05 0.00015043671961 +ACCAAG 0.0011213210066 0.000285165187372 +GAGCAT 0.000428877992789 0.000229582142376 +ACCAAA 0.000486581577273 0.000611413494959 +ATCATC 0.000762623048995 0.000274894407319 +ACCAAC 0.000698681239161 0.00035524815715 +GGTCAC 0.000283839253409 0.000126270178307 +GATCGC 0.000750146598296 0.000154665864337 +GGTCAT 0.000212099661888 0.000127478505372 +ACCAAT 0.000640977654677 0.000344373213564 +AGGTAG 1.55955633741e-06 0.000177624078575 +AGGTAA 3.11911267483e-06 0.000155874191403 +AGGTAC 6.55013661714e-05 9.12286934178e-05 +TTTTGC 0.000152836521066 0.000487559970783 +GCCGTC 0.000442913999825 0.000119624379449 +AATTGG 0.000162193859091 0.000375789717258 +GTCGGA 0.000151276964729 0.000115999398253 +AATTGA 1.40360070367e-05 0.000593892752515 +ATCATG 0.000422639767439 0.000160707499663 +AATTGC 0.000247969457649 0.000404185403288 +TAGCCA 0.0 0.000235623777702 +TGGGGT 0.000106049830944 9.66661652109e-05 +CGCTCG 0.000191825429502 0.000143186757219 +GATAAT 0.000408603760402 0.000338331578238 +CGCTCA 7.79778168707e-05 0.000122645197111 +CGCTCC 0.000450711781512 0.00013654095836 +AATTGT 9.51329365822e-05 0.000653704942239 +AATCTT 0.000302553929458 0.00037156057253 +TCTTAT 5.14653591346e-05 0.000298456785089 +GTTATG 0.000149717408392 0.00017399909738 +CGCTCT 9.20138239074e-05 0.000146811738414 +GATAAA 0.00031970904917 0.000450705995296 +TCGAAG 0.000408603760402 0.000248311211886 +GATAAC 0.000371174408304 0.000203603110476 +TAGCCT 0.0 0.000111166089993 +TGGGGC 0.000268243690035 0.000125666014774 +GATAAG 0.000625382091303 0.00023985292243 +GATCCT 0.000285398809747 0.000183665713901 +AAGAAT 0.000603548302579 0.00048695580725 +GGGGCT 7.79778168707e-05 8.57912216247e-05 +GTTATC 0.0002167783309 0.000192728166889 +AAGAAC 0.0011213210066 0.00041626867394 +GATCCA 0.000363376626617 0.000186082368031 +AAGAAA 0.000732991478584 0.000974515778033 +GATCCC 0.00075950393632 0.00015043671961 +AAGAAG 0.00209448416115 0.000447685177633 +GATCCG 0.000505296253322 0.000166144971456 +GCCCCA 0.000168432084441 0.000190915676292 +TATTAA 1.8714676049e-05 0.000756412742776 +CAAGGG 5.14653591346e-05 0.000125666014774 +CAAGGC 0.000166872528103 0.000160707499663 +GGTTCT 0.000115407168969 0.000184874040966 +CAAGGA 0.0002292547816 0.000259186155472 +GGTTCA 0.000106049830944 0.000227165488246 +GGTTCC 0.000310351711145 0.000137749285426 +AAGGCT 0.000533368267395 0.000171582443249 +TGCATT 0.000233933450612 0.000450705995296 +GGTTCG 0.000244850344974 0.000182457386836 +TAGGCA 0.0 0.000187894858629 +ATTATA 0.000157515190079 0.000869391323366 +ATTATC 0.000336864168881 0.000299060948621 +TAGAAT 0.0 0.000295435967426 +GATAGT 0.000193384985839 0.000169769952652 +ATTATG 0.000269803246372 0.000391497969104 +AGTCTT 0.000116966725306 0.000180040732705 +TCCCAA 0.000196504098514 0.00026583195433 +TAGAAA 0.0 0.00048695580725 +TAGAAC 0.0 0.000167957462054 +ATTATT 0.000224576112587 0.00092920351309 +TAGAAG 0.0 0.00024226957656 +AGTCTA 0.000115407168969 0.000186686531564 +AGTCTC 0.000140360070367 0.000119624379449 +AGTCTG 0.000413282429414 0.000123249360644 +TAACTC 0.0 0.000205415601073 +TAACTA 0.0 0.000377602207855 +TAACTG 0.0 0.000229582142376 +ATCTAT 0.000291637035096 0.000333498269978 +GCCCCG 0.000152836521066 9.18328569504e-05 +TAACTT 0.0 0.000383039679648 +ATCTAG 2.02742323864e-05 0.000138353448958 +ATCTAC 0.000555202056119 0.000196957311617 +ATCTAA 1.24764506993e-05 0.000314165036936 +GGTCAG 0.000408603760402 9.90828193412e-05 +CCATGT 6.39418098339e-05 0.000181853223303 +GCCCCC 0.000210540105551 0.000102707800537 +TGGAGG 3.58697957605e-05 0.000173394933847 +TGGAGA 3.27506830857e-05 0.000213269726997 +GGTCAA 0.000205861436539 0.000161915826728 +CCATGC 4.67866901224e-05 0.000138957612491 +TCCAGA 0.000118526281643 0.000187894858629 +CCATGA 3.11911267483e-06 0.00013412430423 +CCATGG 7.64182605332e-05 0.000121436870046 +TGGAGT 9.66924929196e-05 0.000192728166889 +TCAGTA 5.45844718095e-05 0.000164936644391 +TCAGTC 6.55013661714e-05 0.000156478354935 +CTCTCT 0.000110728499956 0.000300269275686 +GGGGAT 0.000194944542177 0.000102103637004 +TCAGTG 0.000198063654851 0.000212665563464 +CGATAT 0.00012632406333 0.000233811287104 +CTCTCG 0.000260445908348 0.000137145121893 +GGGGAC 0.000154396077404 9.78744922761e-05 +CTCTCC 0.000338423725219 0.000155874191403 +TCAGTT 0.000112288056294 0.000344373213564 +CTCTCA 9.045426757e-05 0.000193936493954 +CGATAG 6.23822534965e-06 0.00015285337374 +CGATAA 4.67866901224e-06 0.000258581991939 +ATGCTG 0.000948210253147 0.000241665413027 +CGATAC 0.000127883619668 0.000151040883142 +TTCACT 0.000168432084441 0.00022354050705 +GTGTTT 0.00017467030979 0.000430768598721 +GCTACT 0.000113847612631 0.000139561776023 +GTGGTT 0.000378972189991 0.00026583195433 +GTGTTC 0.000408603760402 0.000241061249495 +GTGTTA 5.61440281469e-05 0.000222332179985 +GTGTTG 0.000194944542177 0.000283352696775 +CGTGCC 0.000325947274519 9.54578381458e-05 +GTGGTA 0.000182468091477 0.000173394933847 +GCTACA 9.51329365822e-05 0.000153457537272 +GTGGTC 0.000645656323689 0.000131103486567 +GCTACC 0.000157515190079 0.000127478505372 +GTGGTG 0.00112755923195 0.000200582292813 +GCTACG 0.000118526281643 8.88120392875e-05 +CGTGCA 7.79778168707e-05 0.000143186757219 +TTTCGT 8.73351548951e-05 0.000389081314974 +TGCCCT 2.96315704108e-05 0.000135332631295 +AGCGTT 0.000233933450612 0.000199373965748 +CGTGCG 8.42160422203e-05 0.000115999398253 +CACGGA 0.000246409901311 0.000125061851242 +TGCCCA 0.000121645394318 0.000278519388514 +TGCCCG 0.00013256228868 0.000113582744123 +ACAGTG 0.000279160584397 0.000196957311617 +AGCGTA 0.000107609387282 0.000140770103088 +AGCGTC 0.000163753415428 0.000180644896238 +GTACGT 4.36675774476e-05 0.000100291146406 +AGCGTG 0.000304113485796 0.000148020065479 +GGCCCC 0.000134121845018 7.79370957013e-05 +GGCCCA 9.35733802448e-05 0.000156478354935 +CACGTG 0.000286958366084 0.000118416052383 +GGCCCG 7.64182605332e-05 8.03537498316e-05 +CACGTA 7.48587041958e-05 0.000114791071188 +CACGTC 0.000166872528103 0.000107541108797 +ACGTGC 0.000135681401355 0.000127478505372 +ACGTGA 7.79778168707e-06 0.000140770103088 +ACGTGG 8.10969295455e-05 9.90828193412e-05 +AAATGG 0.000221456999913 0.000447685177633 +CACGTT 0.00013256228868 0.000163124153793 +GGGGCC 0.000162193859091 6.04163532568e-05 +TCTTAA 0.0 0.000434997743449 +GGCCCT 3.27506830857e-05 8.69995486898e-05 +GGGACA 4.05484647727e-05 0.000123249360644 +GGTATA 6.08226971591e-05 0.000187894858629 +GGGACC 4.36675774476e-05 0.000123853524177 +TCGTGG 0.000140360070367 0.000135936794828 +ACGTGT 4.5227133785e-05 0.000169769952652 +TCTTAG 4.67866901224e-06 0.000148624229012 +GCTTCC 0.000276041471722 0.000145603411349 +AGAGAG 0.000109168943619 0.000298456785089 +AGAGAA 0.000118526281643 0.000412643692744 +AGAGAC 6.55013661714e-05 0.000166749134989 +TCGTGA 9.35733802448e-06 0.000122645197111 +GTCTTT 0.000262005464685 0.000233811287104 +TCTATC 5.92631408217e-05 0.000180644896238 +TCTATG 9.20138239074e-05 0.000204811437541 +AGAGAT 0.000140360070367 0.000226561324713 +TTCACA 0.000182468091477 0.000303290093349 +GTCTTG 0.000152836521066 0.000144999247816 +TCTATT 0.000115407168969 0.00032926912525 +GTCTTC 0.000460069119537 0.000131103486567 +GTCTTA 4.83462464598e-05 0.000145603411349 +GCAGAG 0.000388329528016 0.000264623627265 +GAAGGC 0.000296315704108 0.000218103035257 +GCTCAC 0.000169991640778 0.000100291146406 +GCAGAC 0.000177789422465 0.000185478204498 +GCAGAA 0.000219897443575 0.000351623175955 +GCTCAG 0.00036181707028 0.00020239478341 +ATGCGT 0.000207420992876 0.000151645046675 +TTGCTG 0.000534927823733 0.000315373364001 +CTTGAC 0.000155955633741 0.000119624379449 +TTGCTA 0.000123204950656 0.000245290394223 +TTGCTC 0.000237052563287 0.000222936343518 +GCAGAT 0.000322828161845 0.000257977828407 +GCTCAT 0.000129443176005 0.000167957462054 +GTTTGC 0.000194944542177 0.000305102583947 +TTGCTT 0.000143479183042 0.000390289642039 +ATGCGG 0.000226135668925 0.000125061851242 +GAGAGG 0.000204301880201 0.000194540657487 +TAGCCG 0.0 0.000121436870046 +ATGCGC 0.000436675774476 0.000115395234721 +ATGCGA 0.00027760102806 0.000212665563464 +TGTAAG 4.36675774476e-05 0.000287581841503 +GGCTAG 4.67866901224e-06 0.000109353599395 +TGTAAC 5.45844718095e-05 0.000202998946943 +GGCTAC 0.000722074584222 0.000106936945265 +GACTGT 0.000140360070367 0.000146811738414 +GGCTAA 6.23822534965e-06 0.000147415901947 +ACTGTA 0.000101371161932 0.00021870719879 +TTGGTA 0.000109168943619 0.000236227941234 +ACTGTC 0.000168432084441 0.000140165939556 +TTGGTC 0.000283839253409 0.000179436569173 +TTCGAC 0.000611346084266 0.000204811437541 +ACTGTG 0.000322828161845 0.000163728317326 +AATCAA 0.000360257513942 0.00077393348522 +GACTGC 0.000327506830857 0.000156478354935 +GGCTAT 0.000461628675874 0.000139561776023 +GACTGA 4.67866901224e-06 0.000160707499663 +GACTGG 0.000254207682998 0.000109353599395 +TGTAAT 5.77035844843e-05 0.000502664059097 +TGTGGT 9.20138239074e-05 0.000193332330422 +TTGGTT 0.000219897443575 0.000422914472798 +ACTGTT 0.000169991640778 0.000269456935525 +GTTTCT 0.000109168943619 0.000285165187372 +ATAATT 0.000148157852054 0.000829516530216 +CCTTTT 8.73351548951e-05 0.000384852170246 +GTTTCG 0.000212099661888 0.000250727866016 +GTTTCC 0.000302553929458 0.000255561174276 +GTTTCA 0.000118526281643 0.000258581991939 +ATAATG 0.0002292547816 0.000430164435189 +CCTTTA 5.14653591346e-05 0.000207832255204 +CCTTTC 0.000129443176005 0.000181249059771 +ATAATC 0.000184027647815 0.000316581691066 +ATAATA 0.000109168943619 0.00108205688683 +CCTTTG 0.000204301880201 0.000231998796506 +ACAGAA 0.000213659218226 0.000428956108124 +ACAGAC 0.000179348978803 0.00022112385292 +GCCCCT 9.51329365822e-05 0.000109957762927 +ACAGAG 0.000310351711145 0.000217498871725 +TTATCT 2.80720140734e-05 0.000279123552047 +CTGGAC 0.0013630522389 0.000173394933847 +CTGGAA 0.00081408840813 0.000230790469441 +CTGGAG 0.00220989133011 0.000185478204498 +ACAGAT 0.000272922359047 0.000315373364001 +GTGAAG 0.000825005302492 0.000219311362322 +TGGGGA 0.000112288056294 0.000125061851242 +CCTTGC 7.32991478584e-05 0.000132915977165 +TTATCA 7.01800351836e-05 0.000372164736062 +CTGGAT 0.000884268443313 0.000181249059771 +TTATCC 0.000110728499956 0.00022354050705 +CCAATG 0.0002292547816 0.000206623928138 +AAACCG 0.000268243690035 0.000270665262591 +AAACCA 0.000291637035096 0.000632559218599 +CCAATC 0.000109168943619 0.000215686381127 +AAACCC 0.00027136280271 0.000317185854598 +CCAATA 9.66924929196e-05 0.000309331728675 +TGTCGG 4.83462464598e-05 8.21662404293e-05 +GAGATC 0.00111508278125 0.000161311663196 +AACGAG 0.0011213210066 0.000248915375418 +GAGATA 0.000294756147771 0.000220519689387 +AACGAA 0.00058483362653 0.000600538551373 +GAGATG 0.000756384823645 0.000161311663196 +AACGAC 0.000662811443401 0.000245290394223 +AAACCT 8.88947112325e-05 0.000280936042644 +CCAATT 0.000173110753453 0.000413247856277 +ATTTGC 0.000249529013986 0.000445268523503 +GTCGAT 0.000339983281556 0.000158895009065 +ATTTGG 0.00017467030979 0.000381227189051 +AACGAT 0.000873351548951 0.000326248307587 +TGTCGT 4.5227133785e-05 0.000144999247816 +CCCTAC 0.000385210415341 6.58538250499e-05 +GAGATT 0.000634739429327 0.000222332179985 +CCCTAA 9.35733802448e-06 0.000154665864337 +TACAAC 0.000658132774388 0.0002893943321 +CATTAT 9.35733802448e-05 0.000405393730353 +CTACAC 5.92631408217e-05 0.000152249210207 +TGGATT 0.000155955633741 0.000290602659165 +ATCACA 0.000279160584397 0.000277311061449 +ATCACC 0.000673728337762 0.000251332029548 +CATGCT 0.000169991640778 0.000161311663196 +TGACAC 0.0 0.000145603411349 +ATCACG 0.000311911267483 0.000125061851242 +TGGATA 0.000110728499956 0.000249519538951 +CATTAG 1.24764506993e-05 0.000181853223303 +CATTAA 7.79778168707e-06 0.000412643692744 +CATTAC 0.000118526281643 0.000198165638682 +CATGCA 9.8252049257e-05 0.000203603110476 +TGACAT 0.0 0.000186082368031 +CATGCC 0.000393008197028 0.000123853524177 +ATCACT 0.000182468091477 0.000213873890529 +CATGCG 0.00018090853514 9.96869828738e-05 +GTAGAA 0.000124764506993 0.000227165488246 +GTAGAC 0.000124764506993 9.42495110807e-05 +GTAGAG 0.000218337887238 0.000165540807924 +ACAACA 0.000393008197028 0.000811391624239 +ACAACC 0.000274481915385 0.000262206973135 +GATTGC 0.00026512457736 0.000222332179985 +ACAACG 0.000266684133698 0.000230790469441 +ACTAAG 0.000159074746416 0.000187290695096 +GTAGAT 0.000124764506993 0.000175207424445 +TGCTAC 0.000247969457649 0.000143790920751 +GGGTAC 5.61440281469e-05 5.37705543986e-05 +TGATGC 0.0 0.000175811587977 +GATTGT 0.000124764506993 0.000238644595364 +TTCAGT 0.000221456999913 0.000302081766284 +ACAACT 0.000184027647815 0.000410227038614 +GGGTAA 0.0 0.000121436870046 +TTGCCT 0.000110728499956 0.000220519689387 +TAATAG 0.0 0.00032926912525 +AGTCCG 0.000330625943532 0.000102103637004 +TAATAC 0.0 0.000313560873403 +GTCCGA 4.83462464598e-05 0.000198165638682 +TAATAA 0.0 0.00114428573068 +GTCCGC 0.000123204950656 9.72703287435e-05 +TGTTTA 3.27506830857e-05 0.000561872085289 +TCAACA 0.000145038739379 0.000381831352583 +CACATC 0.000533368267395 0.000229582142376 +GTCCGT 7.95373732081e-05 0.000100291146406 +TAATAT 0.0 0.000737079509733 +ATTCGC 0.000378972189991 0.000228373815311 +TTCTCC 0.000464747788549 0.000209040582269 +ATTCGA 0.00017467030979 0.000387268824376 +TAACGA 0.0 0.00022354050705 +ATTCGG 0.000148157852054 0.00017641575151 +TTCTCG 0.000363376626617 0.00024226957656 +GAGCTA 0.000371174408304 0.000146811738414 +TCGTAC 0.000218337887238 9.78744922761e-05 +TACCTT 9.20138239074e-05 0.000209040582269 +TCGTAA 3.11911267483e-06 0.000207832255204 +TCGTAG 7.79778168707e-06 0.000126270178307 +GAACCT 0.000131002732343 0.000125666014774 +ATCCAA 0.000279160584397 0.000378206371388 +ATCCAC 0.000338423725219 0.000209644745801 +TGAACC 0.0 0.00017641575151 +TTCTCT 0.000102930718269 0.000320206672261 +ATCCAG 0.000640977654677 0.000193936493954 +GAACCC 0.000344661950568 0.000143186757219 +TTATAT 7.01800351836e-05 0.000998682319335 +GAACCA 0.000263565021023 0.000233811287104 +GAACCG 0.000339983281556 0.000142582593686 +TACCTC 0.000143479183042 9.06245298853e-05 +TCGTAT 0.000137240957692 0.000183665713901 +ACCATC 0.000661251887063 0.000189707349226 +ACCATA 0.00027760102806 0.000297248458024 +ACCATG 0.000509974922334 0.000129895159502 +ATGTTG 0.000291637035096 0.000315373364001 +AGCTCT 0.000193384985839 0.000190311512759 +TGTCCT 0.000102930718269 0.000141978430154 +AGTGGT 0.000327506830857 0.000293019313296 +TGATGT 0.0 0.000207228091671 +ATGTTT 0.000263565021023 0.000542538852246 +ACCATT 0.00058483362653 0.000291206822698 +AGCTCG 0.000310351711145 0.000133520140698 +AGTGGC 0.000580154957518 0.000218103035257 +AGCTCC 0.000653454105376 0.000169165789119 +AGCTCA 0.000199623211189 0.000247707048353 +TCCATG 0.000463188232212 0.000163728317326 +CAGTAA 1.8714676049e-05 0.000216894708192 +CAGTAC 0.00057859540118 0.000118416052383 +CAGTAG 2.33933450612e-05 0.000133520140698 +TTAATA 7.01800351836e-05 0.000811995787772 +CAGTAT 0.000325947274519 0.000167957462054 +AAAGTA 0.000116966725306 0.000525622273334 +AAAGTC 0.000166872528103 0.000301477602752 +AAAGTG 0.000466307344887 0.000589059444254 +GATCTA 0.000286958366084 0.000162519990261 +GATCTC 0.000469426457561 0.000139561776023 +TGTCCA 0.000101371161932 0.000198769802215 +GATCTG 0.00129443176005 0.000184269877433 +TCCCTT 0.000134121845018 0.000151645046675 +CCGGCC 0.000438235330813 9.54578381458e-05 +ATTAGG 7.79778168707e-05 0.000148624229012 +AAAGTT 0.000198063654851 0.000606580186699 +ACTCTC 9.66924929196e-05 0.000156478354935 +ATTAGC 0.000204301880201 0.000252540356614 +TCCCTC 0.000145038739379 0.000123249360644 +TGTCCG 0.000162193859091 0.000160103336131 +TCCCTA 0.00012632406333 0.000143790920751 +TTAATG 0.000106049830944 0.000427143617526 +TCCCTG 0.000589512295542 0.000120832706514 +GATCTT 0.000333745056206 0.00019997812928 +CACAAT 0.0004460331125 0.000335310760575 +AAGTGC 0.000567678506818 0.000358268974813 +TGTTCT 3.89889084353e-05 0.000300269275686 +AAGTGG 0.000408603760402 0.000315977527533 +TGATTG 0.0 0.000283352696775 +TGCGCA 5.45844718095e-05 0.000163728317326 +TCTGAT 0.000179348978803 0.000210853072866 +TGTATT 6.70609225088e-05 0.000532268072193 +CGAGGT 9.35733802448e-05 8.51870580921e-05 +TGTTCA 4.05484647727e-05 0.000351623175955 +CACAAG 0.000764182605332 0.000273686080253 +TGTTCC 0.000102930718269 0.000169769952652 +AAGTGT 0.000190265873164 0.000415060346874 +CACAAC 0.000524010929371 0.000364310610139 +TGTTCG 8.26564858829e-05 0.000224144670583 +TCTGAA 0.000184027647815 0.000279727715579 +TCTGAC 0.000124764506993 0.000158290845533 +TCTGAG 0.000212099661888 0.000120832706514 +CGCGCC 0.000325947274519 9.96869828738e-05 +CGCGCA 6.55013661714e-05 0.000104520291134 +GGTAAA 0.000140360070367 0.000264623627265 +CGCGCG 5.14653591346e-05 8.15620768967e-05 +CTCCCT 1.8714676049e-05 0.000103916127602 +AGCGGG 0.000116966725306 0.00010814527233 +AGCGGA 0.00035557884493 0.000219311362322 +ACCTCT 0.000118526281643 0.000175207424445 +GGTAAG 0.000124764506993 0.000119624379449 +CGCGCT 0.000106049830944 9.78744922761e-05 +ACCTCC 0.000435116218138 0.00011297858059 +ACCTCA 0.000213659218226 0.000137145121893 +ACCTCG 0.000427318436451 0.000102707800537 +CTCCCC 5.45844718095e-05 0.000118416052383 +GCCTCT 0.000226135668925 0.000137145121893 +TAAGCC 0.0 0.000193332330422 +CGGGAG 0.000586393182867 0.000114186907655 +CCCATG 0.000558321168794 0.000117811888851 +CGGGAA 0.000322828161845 0.000143790920751 +CCCATA 0.000215218774563 0.000232602960039 +CGGGAC 0.000283839253409 7.85412592339e-05 +CCCATC 0.000605107858916 0.00019756147515 +TTGTTG 0.000152836521066 0.000602955205503 +CCTGCT 0.000246409901311 0.000126874341839 +CCGAAT 0.000308792154808 0.000220519689387 +CCCATT 0.000547404274432 0.000241665413027 +CGGGAT 0.000369614851967 0.000108749435862 +GCCTCG 0.000439794887151 0.000154061700805 +GCCTCA 0.000210540105551 0.000139561776023 +TAAGCT 0.0 0.000293623476828 +GCCTCC 0.000775099499694 0.000120832706514 +CCGAAC 0.000233933450612 0.000188499022161 +CCTGCG 0.000154396077404 0.000121436870046 +CCGAAA 0.000244850344974 0.000468830901273 +CCGAAG 0.000318149492832 0.000169165789119 +CCTGCC 0.000221456999913 0.00015527002787 +CCTGCA 0.000145038739379 0.000156478354935 +GTACTG 0.000159074746416 0.00011297858059 +CAAGGT 0.000137240957692 0.000138957612491 +CCATTC 0.000230814337937 0.000212665563464 +CCATTA 7.48587041958e-05 0.000222332179985 +CCATTG 0.000283839253409 0.000240457085962 +TATAAC 0.000123204950656 0.000306310911012 +GATTTG 0.000580154957518 0.000344977377097 +GTACTC 9.045426757e-05 0.000126270178307 +GTCGAA 0.000199623211189 0.000216894708192 +CCATTT 0.000176229866128 0.000465205920078 +TCAGCG 0.000205861436539 0.000160103336131 +TCTACC 8.73351548951e-05 0.000123853524177 +TCTACG 8.10969295455e-05 0.000139561776023 +ACTTTC 0.000157515190079 0.000303290093349 +CGCAAC 0.000662811443401 0.000196353148085 +TGCTTA 4.5227133785e-05 0.000269456935525 +CGCAAA 0.000435116218138 0.000293019313296 +ATACCG 0.000198063654851 0.000118416052383 +CGCAAG 0.001146273908 0.000170978279717 +ATACCA 0.000232373894275 0.00033410243351 +ATACCC 0.000360257513942 0.000186686531564 +TCTACT 8.57755985577e-05 0.000208436418736 +CCGAGT 0.000124764506993 0.000140165939556 +ATACCT 8.26564858829e-05 0.000167353298521 +GATAGG 0.000106049830944 9.2437020483e-05 +CGCAAT 0.000449152225175 0.000208436418736 +TAGTAA 0.0 0.000249519538951 +TGGGTG 0.000272922359047 0.000172186606782 +TAGTAC 0.0 0.000174603260912 +CTCAAG 0.000968484485534 0.000189103185694 +GTTTTG 0.000276041471722 0.000493601606108 +CTCAAA 0.000266684133698 0.000346185704162 +TAGTAG 0.0 0.000164936644391 +CTCAAC 0.000605107858916 0.000191519839824 +GTGTGC 0.000327506830857 0.000261602809602 +GTGTGA 1.24764506993e-05 0.000235623777702 +GTGTGG 0.0002167783309 0.000187290695096 +TAACGG 0.0 0.000140770103088 +TTGCGT 0.000182468091477 0.000186082368031 +CTCAAT 0.000444473556163 0.000222332179985 +TAGTAT 0.0 0.00024710288482 +GTTCTC 0.000201182767526 0.000224748834115 +GTGTGT 9.20138239074e-05 0.000566101230017 +ATCTTT 0.000338423725219 0.000354039830085 +GTTGGG 4.67866901224e-05 0.00013654095836 +GTTGGA 0.000219897443575 0.000194540657487 +GTTGGC 0.000382091302666 0.000282144369709 +CCTTGG 7.48587041958e-05 0.000129895159502 +CCTTGA 0.0 0.000153457537272 +GGGAAA 8.26564858829e-05 0.000340748232369 +AGGATT 0.00013256228868 0.000214478054062 +GGACGT 0.000184027647815 0.000106936945265 +GTTGGT 0.000184027647815 0.000226561324713 +ATCTTG 0.000188706316827 0.000217498871725 +GAGAGT 0.000339983281556 0.000132915977165 +ATCTTA 5.3024915472e-05 0.000306915074545 +ATCTTC 0.000648775436364 0.000243477903625 +AGGATC 0.000151276964729 0.000183665713901 +AGGATA 7.1739591521e-05 0.000203603110476 +AGGATG 0.000115407168969 0.000153457537272 +GACATG 0.00077198038702 0.000103311964069 +GACATC 0.000818767077142 0.000163124153793 +GACATA 0.000344661950568 0.000201790619878 +TCGGCG 0.000480343351923 0.000114186907655 +GAATAT 0.000249529013986 0.000500851568499 +GGTGCA 0.000177789422465 0.000159499172598 +TCGGCC 0.000544285161757 0.000161915826728 +ATCAGA 7.64182605332e-05 0.000299060948621 +TTGCGA 0.000140360070367 0.000216894708192 +GACATT 0.000595750520892 0.000257977828407 +TTGCGG 0.000102930718269 0.000145603411349 +GAATAG 2.33933450612e-05 0.000174603260912 +TCGGCT 0.000293196591434 0.000158895009065 +CATAGA 5.14653591346e-05 0.000207832255204 +GAATAC 0.000346221506906 0.000297852621556 +GAATAA 1.55955633741e-05 0.000448289341166 +TGCCTG 0.000472545570236 0.00015285337374 +CACCGC 0.000179348978803 0.000216290544659 +TGCAGA 4.67866901224e-05 0.000262206973135 +TTAATT 0.000124764506993 0.00108386937743 +GGACGA 0.000190265873164 0.000170978279717 +TGAGGT 0.0 0.000111166089993 +TTTCTA 0.00012632406333 0.000434997743449 +TTATAA 6.23822534965e-06 0.000705058842507 +GAAATG 0.000678407006775 0.000462789265947 +AATGAT 0.000450711781512 0.000420497818668 +GAAATA 0.000311911267483 0.000654913269304 +GAAATC 0.000559880725131 0.000466414247143 +GAAATT 0.000374293520979 0.000761246051036 +AATGAA 0.000477224239248 0.000694183898921 +AATGAC 0.000385210415341 0.000208436418736 +AATGAG 0.000630060760315 0.000257373664874 +TCAGCA 0.000190265873164 0.000268852771993 +TACTAG 1.24764506993e-05 0.000103311964069 +CTTGGA 0.000163753415428 0.000192124003357 +CTTGGC 0.000251088570324 0.00019756147515 +GTATTG 8.88947112325e-05 0.000218103035257 +GTCCTT 0.000116966725306 0.000144999247816 +CTTGGG 4.05484647727e-05 0.000205415601073 +AGCCCT 3.74293520979e-05 0.000114791071188 +GGGAAT 0.000121645394318 0.000181853223303 +TTGAAT 0.000262005464685 0.000528038927465 +GTCCTA 7.79778168707e-05 8.21662404293e-05 +GTCCTC 0.000208980549213 0.000141374266621 +TGCACG 0.00018090853514 0.000122041033579 +CTTGGT 0.000134121845018 0.000186082368031 +GTCCTG 0.000544285161757 0.000159499172598 +TGCACC 0.000366495739292 0.000186686531564 +TTGAAA 0.0002167783309 0.00077635013935 +AGCCCG 9.35733802448e-05 9.96869828738e-05 +AGCCCA 0.000106049830944 0.000217498871725 +AGCCCC 0.000141919626705 0.000137145121893 +TTAACT 7.95373732081e-05 0.000447685177633 +ATAAAT 0.000190265873164 0.00122222282639 +TGGTCG 0.000152836521066 0.000115395234721 +TGGTCA 8.88947112325e-05 0.000206019764606 +TGGTCC 0.000140360070367 0.000124457687709 +ACGATT 0.000182468091477 0.000273081916721 +GAGACT 0.000288517922421 0.000127478505372 +GATTCA 0.000232373894275 0.00023985292243 +GAGGAG 0.0033889159212 0.000342560722966 +ATAAAG 0.00027760102806 0.000467622574208 +ATAAAA 0.000148157852054 0.00147838816419 +TCAGCC 0.000215218774563 0.000164332480859 +ATAAAC 0.000224576112587 0.000583621972461 +GAGACG 0.000612905640603 0.000111770253525 +ACGATG 0.000241731232299 0.000190915676292 +CTACAG 0.000232373894275 0.000113582744123 +GAGACC 0.00072363414056 0.000154061700805 +ACGATC 0.000169991640778 0.000143790920751 +GAGACA 0.000266684133698 0.000190915676292 +ACGATA 0.000118526281643 0.000196957311617 +GGTTAC 0.000202742323864 9.96869828738e-05 +ATGTGT 8.10969295455e-05 0.000425935290461 +GGTTAA 3.11911267483e-06 0.000225957161181 +ACCACT 0.000425758880114 0.00019514482102 +GCACTT 0.000129443176005 0.000288790168568 +CTTCCT 6.70609225088e-05 0.000162519990261 +TAAACT 0.0 0.000405997893886 +CTCAGT 0.000202742323864 0.000195748984552 +TCCGCT 0.000260445908348 0.000132311813632 +ACCACG 0.000524010929371 0.000184874040966 +ATGTGG 0.000251088570324 0.000178228242108 +ACCACC 0.00104022407705 0.000286373514437 +GAGGAA 0.00104802185874 0.000259186155472 +ACCACA 0.000491260246285 0.000294831803893 +GGTTAT 0.000165312971766 0.000180040732705 +TCCGCC 0.000598869633567 0.000157686682 +GACGTG 0.000495938915297 0.000144395084284 +TCCGCG 0.000146598295717 8.51870580921e-05 +AACGTC 0.000221456999913 0.000137749285426 +AACGTA 0.000146598295717 0.000279123552047 +AACGTG 0.000483462464598 0.000154665864337 +CGTGAC 0.000171551197115 6.40413344522e-05 +AGTGTG 0.000394567753366 0.000256165337809 +TCTTCG 0.000213659218226 0.000166749134989 +AGTGTA 0.000104490274607 0.000266436117863 +TCTTCA 0.000113847612631 0.000276102734384 +AGTGTC 0.000210540105551 0.000138957612491 +TCTTCC 0.000166872528103 0.000166144971456 +CGTGAT 0.000198063654851 0.000154061700805 +TAAGTC 0.0 0.000174603260912 +AACGTT 0.000201182767526 0.00028697767797 +CCGTGA 1.55955633741e-06 0.000100895309939 +TCTTCT 9.8252049257e-05 0.000295435967426 +AGTGTT 0.000154396077404 0.00035041484889 +GTGCGC 0.000651894549039 0.000141374266621 +TTGGCA 0.000232373894275 0.000380623025518 +AAAGGT 0.000135681401355 0.000241061249495 +TTGGCC 0.000913900013724 0.000309935892208 +GAGTAG 2.02742323864e-05 0.000146207574882 +TTAACG 5.14653591346e-05 0.000222936343518 +TTGGCG 0.000276041471722 0.000167353298521 +GACTTC 0.000731431922247 0.000154665864337 +TCGAGT 0.000165312971766 0.000251332029548 +GACTTA 0.000112288056294 0.000188499022161 +GACTTG 0.00041016331674 0.000189103185694 +AAAGGG 3.43102394231e-05 0.00022354050705 +AAAGGA 0.000238612119624 0.000435601906982 +TTGGCT 0.000329066387194 0.000287581841503 +AAAGGC 0.000169991640778 0.000317790018131 +GAGTAC 0.000821886189817 0.000148624229012 +GTATGG 5.61440281469e-05 0.000128686832437 +GACTTT 0.000636298985665 0.000312352546338 +TAAACC 0.0 0.000259790319004 +CAAGAA 0.000269803246372 0.000477893354262 +GACGTT 0.000198063654851 0.000100291146406 +CAAGAC 0.000173110753453 0.000181249059771 +GATTCG 0.000408603760402 0.000193936493954 +CAAGAG 0.00036181707028 0.000250727866016 +TTCTTG 0.000237052563287 0.00030570674748 +TTCTTC 0.000718955471547 0.000323227489924 +TTCTTA 6.08226971591e-05 0.000392102132637 +CAAGAT 0.000213659218226 0.00023985292243 +AGGGCT 8.88947112325e-05 8.51870580921e-05 +TACGGG 9.66924929196e-05 7.37079509733e-05 +TACGGA 0.000305673042133 0.000127478505372 +TACGGC 0.00053648738007 0.000102707800537 +CTCAGA 7.64182605332e-05 0.000151645046675 +TAGAGT 0.0 0.000175811587977 +TACGGT 0.000179348978803 0.00011056192646 +AGGGCG 0.000102930718269 7.5520441571e-05 +AGGGCA 9.51329365822e-05 0.000163124153793 +AGGGCC 0.000165312971766 8.21662404293e-05 +TCGAGA 9.35733802448e-05 0.000204207274008 +CTGTGC 0.000478783795586 0.000175207424445 +TAGAGA 0.0 0.000225352997648 +GGCGAG 0.00111664233759 0.000117207725318 +CCGTGT 6.86204788462e-05 0.000127478505372 +GTGCTG 0.00104958141508 0.000219311362322 +GTGCTA 0.000208980549213 0.000160707499663 +ATCTGT 0.000140360070367 0.000230790469441 +CGAATT 0.000113847612631 0.000360081465411 +GCTCGT 0.000151276964729 0.000143790920751 +ATCTGA 6.23822534965e-06 0.000242873740092 +CGAATC 0.000127883619668 0.0002893943321 +ATCTGC 0.000393008197028 0.000227165488246 +CGAATA 6.55013661714e-05 0.000291810986231 +CGAATG 0.000204301880201 0.000211457236399 +ATCTGG 0.000238612119624 0.000193936493954 +ATCATA 0.00027136280271 0.000336519087641 +GATAGC 0.00035557884493 0.000169769952652 +AAGCAG 0.00135681401355 0.000427747781058 +GATAGA 9.51329365822e-05 0.00017641575151 +AAGCAA 0.000531808711058 0.000662163231695 +CAAACT 0.000212099661888 0.000385456333779 +AAGCAC 0.000648775436364 0.000257373664874 +TTTACC 0.000190265873164 0.000267644444928 +CATTTC 0.000163753415428 0.000402372912691 +CTAGGT 4.67866901224e-05 8.51870580921e-05 +CATTTA 9.20138239074e-05 0.000568517884147 +CATTTG 0.000330625943532 0.000443456032905 +GTTATA 0.000135681401355 0.00026824860846 +CAAACA 0.000213659218226 0.000675454829411 +AAGCAT 0.00036181707028 0.000364914773671 +CAAACC 0.000269803246372 0.000313560873403 +CTCGGG 3.89889084353e-05 8.57912216247e-05 +ATCATT 0.000436675774476 0.000392102132637 +CAAACG 0.000305673042133 0.000323227489924 +CTAGGG 1.8714676049e-05 7.06871333105e-05 +GTTATT 0.000179348978803 0.000406602057418 +TGGAGC 0.000185587204152 0.000206019764606 +CTAGGC 0.000109168943619 6.76663156477e-05 +CATTTT 0.000115407168969 0.000873620468094 +CTAGGA 7.48587041958e-05 0.000115395234721 +AAGTTT 0.000431997105463 0.000589663607787 +TACAGT 0.000205861436539 0.00019514482102 +CTCGAA 0.000191825429502 0.000228373815311 +CTAACG 0.000160634302754 0.000111770253525 +CTAAGG 0.000110728499956 0.00013654095836 +CTAAGA 7.32991478584e-05 0.000169769952652 +CTAAGC 0.000204301880201 0.00019514482102 +TACAGG 0.000145038739379 9.18328569504e-05 +AAGTTG 0.000552082943444 0.00032926912525 +AAGTTA 0.000127883619668 0.000427747781058 +TACAGC 0.000399246422378 0.000145603411349 +AAGTTC 0.000951329365822 0.00026099864607 +TACAGA 9.98116055944e-05 0.000237436268299 +CTAAGT 0.000127883619668 0.000214478054062 +GTATGA 6.23822534965e-06 0.000146811738414 +GGGGCA 9.8252049257e-05 0.000146207574882 +GGGGCG 0.000101371161932 8.15620768967e-05 +TTCAAC 0.000606667415254 0.000364914773671 +TATATC 0.000145038739379 0.000327456634652 +TATATA 0.00012632406333 0.00169044956413 +TGTATA 4.05484647727e-05 0.000583621972461 +TATATG 0.000244850344974 0.000682704791802 +TCCGGT 0.000272922359047 8.57912216247e-05 +GGTCCA 0.000193384985839 0.0001317076501 +TATATT 0.000149717408392 0.000945515928469 +GGACTA 0.000118526281643 0.000106936945265 +TCTTAC 6.55013661714e-05 0.000146811738414 +TGATGG 0.0 0.000152249210207 +CTCGGT 0.000104490274607 0.000100895309939 +ATGAGT 0.000182468091477 0.000214478054062 +GCCCAC 0.000469426457561 0.000212061399931 +TGATGA 0.0 0.000367331427802 +TGCCAT 0.000149717408392 0.000247707048353 +ACTCTT 0.000120085837981 0.000233811287104 +TGTTTC 0.000109168943619 0.000300873439219 +CACATA 0.000190265873164 0.000325039980522 +TCATAA 4.67866901224e-06 0.000316581691066 +CACATG 0.00048814113361 0.000172790770315 +TGTTTG 0.000118526281643 0.000455539303557 +ATGAGA 9.98116055944e-05 0.000245894557755 +ATGAGC 0.000463188232212 0.000169165789119 +ATGAGG 0.000151276964729 0.000148624229012 +GTGGCT 0.000516213147684 0.000158290845533 +ACTCTG 0.000339983281556 0.000165540807924 +TGCCAG 0.000458509563199 0.000262811136667 +ACTCTA 0.000102930718269 0.000185478204498 +ATTGTT 0.000260445908348 0.000660350741097 +CACATT 0.000318149492832 0.000349810685357 +TGCCAC 0.000241731232299 0.000254957010744 +AAATTC 0.000330625943532 0.000653100778706 +AAATTA 0.00013256228868 0.00127538921725 +AAATTG 0.000346221506906 0.000732850365005 +TCCACT 0.000257326795673 0.000193332330422 +TTCAAT 0.000556761612456 0.00063739252686 +CTTCGA 8.42160422203e-05 0.000247707048353 +CGTACC 0.000110728499956 8.94162028201e-05 +TTAATC 8.88947112325e-05 0.000312352546338 +AAATTT 0.000188706316827 0.00133459724344 +TTCGAA 0.000315030380157 0.000491789115511 +CTTAGT 8.42160422203e-05 0.000180644896238 +GGGAAC 9.98116055944e-05 0.000203603110476 +GGTGCG 0.000185587204152 0.000109957762927 +GCGTAT 9.51329365822e-05 0.000144395084284 +GGGAAG 9.045426757e-05 0.000130499323035 +GGTGCC 0.000547404274432 0.000127478505372 +TCGTCC 0.000305673042133 0.000231394632974 +GCCCAT 0.000300994373121 0.000175207424445 +AGAGCG 4.83462464598e-05 0.000244082067158 +GCGACT 8.26564858829e-05 0.000138353448958 +AGAGCC 0.000146598295717 0.000146207574882 +AGAGCA 7.95373732081e-05 0.000313560873403 +GCGTAG 3.11911267483e-06 9.84786558086e-05 +GGTGCT 0.000315030380157 0.000148624229012 +TCCCCA 9.98116055944e-05 0.000190311512759 +GCGTAC 0.000151276964729 7.61246051036e-05 +TAATTT 0.0 0.00110139011987 +GCGTAA 1.24764506993e-05 0.000173394933847 +GCGACG 0.000193384985839 0.000111770253525 +AGAGCT 7.64182605332e-05 0.000183061550368 +GCGACA 0.000141919626705 0.000160103336131 +TATCGG 0.000106049830944 0.000126874341839 +GCGACC 0.000176229866128 9.72703287435e-05 +TCCGGC 0.000524010929371 0.000153457537272 +GCAGGA 0.000329066387194 0.000225352997648 +GCAGGC 0.000191825429502 0.000164936644391 +GCAGGG 6.55013661714e-05 9.48536746132e-05 +CAATTT 0.000159074746416 0.000708683823703 +GCACTA 9.045426757e-05 0.0001317076501 +CCGTGC 0.000102930718269 0.000111770253525 +GCTCGG 9.35733802448e-05 0.000101499473471 +TATTCT 7.32991478584e-05 0.000386060497311 +CCGTGG 0.000106049830944 0.000116603561786 +CTTTAG 9.35733802448e-06 0.000194540657487 +GCAGGT 0.000152836521066 0.00011297858059 +CTTCGT 7.48587041958e-05 0.000154061700805 +TATTCC 0.00017467030979 0.000235019614169 +CAATTG 0.000430437549126 0.000456143467089 +TATTCA 4.83462464598e-05 0.000369143918399 +CAATTA 8.57755985577e-05 0.000532872235725 +TATTCG 0.000123204950656 0.000251936193081 +CAATTC 0.000251088570324 0.000404789566821 +TGTAGA 1.71551197115e-05 0.000212061399931 +GGCTCA 0.000177789422465 0.000111770253525 +AGCATT 0.000419520654764 0.000381831352583 +GGCTCC 0.000524010929371 0.000114791071188 +AATCCT 0.000254207682998 0.000214478054062 +TGTAGG 1.71551197115e-05 0.000112374417058 +GGCTCG 0.000247969457649 0.000101499473471 +TTCGGA 0.000322828161845 0.000209040582269 +TTCGGC 0.000425758880114 0.000174603260912 +CTTTAC 0.000137240957692 0.000195748984552 +TTCAAG 0.00101994984467 0.000297852621556 +TTCGGG 0.000127883619668 0.000141374266621 +AGCATG 0.000431997105463 0.000166144971456 +GGCTCT 0.000177789422465 0.000141978430154 +AGCATC 0.000481902908261 0.000200582292813 +TTTACT 7.01800351836e-05 0.000404185403288 +AGCATA 0.000204301880201 0.000323227489924 +TTACAT 3.58697957605e-05 0.000406602057418 +AGACCT 6.08226971591e-05 0.000103916127602 +TTCGGT 0.000213659218226 0.000225352997648 +GTTTAT 0.000112288056294 0.000454330976491 +GGGGAG 0.000249529013986 9.84786558086e-05 +GCACTC 0.000135681401355 0.000190915676292 +GCCTTT 0.0004460331125 0.00021870719879 +TGCTTG 0.000162193859091 0.000196957311617 +GTTTAA 1.71551197115e-05 0.000512330675618 +GAGGTG 0.0012881935347 0.0001317076501 +AACTTG 0.000366495739292 0.000354039830085 +GTTTAG 9.35733802448e-06 0.000254957010744 +GAGGTA 0.000218337887238 0.000108749435862 +CTGGCT 0.00050061758431 0.000181249059771 +AGGCGG 7.64182605332e-05 0.000142582593686 +AGGCGA 8.10969295455e-05 0.000178228242108 +AGGCGC 0.000152836521066 0.000121436870046 +AGGAGT 0.000106049830944 0.000196353148085 +ACAGCC 0.000354019288593 0.000224144670583 +ACAGCA 0.000311911267483 0.000418081164537 +TCCAGC 0.0006752878941 0.000236832104767 +ACAGCG 0.000202742323864 0.000175207424445 +AGGAGC 0.000198063654851 0.000252540356614 +AGGCGT 9.045426757e-05 0.000117811888851 +AGGAGA 3.89889084353e-05 0.000252540356614 +CTGGCG 0.000611346084266 0.000135936794828 +AGGAGG 6.55013661714e-05 0.000245290394223 +CTGGCA 0.000374293520979 0.000252540356614 +CTGGCC 0.00188550361193 0.000187894858629 +GCGCCC 0.000407044204065 8.57912216247e-05 +GCATCG 0.000302553929458 0.000193332330422 +ACAGCT 0.000221456999913 0.000221728016453 +GGAAAA 0.000324387718182 0.000641621671588 +AACGCT 0.000208980549213 0.000163124153793 +AATTTT 0.000202742323864 0.00129834743149 +AAACAG 0.000497498471635 0.000489976624913 +TGTCAC 9.35733802448e-05 0.000157686682 +AAACAC 0.00017467030979 0.000532872235725 +GCTCTA 0.000166872528103 0.000131103486567 +AAACAA 0.00027760102806 0.00139199277904 +AACGCG 0.000188706316827 0.000153457537272 +AATTTA 0.000168432084441 0.00111891086232 +AATTTC 0.000288517922421 0.000627121746806 +AACGCC 0.000612905640603 0.000172186606782 +TTTCAC 0.000159074746416 0.000302081766284 +ATTGGA 0.000399246422378 0.000288186005035 +AATTTG 0.00041016331674 0.00068693393653 +ATTTAC 0.000246409901311 0.000494205769641 +ATTTAA 2.33933450612e-05 0.00112857747884 +ATTTAG 7.79778168707e-06 0.000412643692744 +CCCTCG 0.000366495739292 9.90828193412e-05 +TAAATA 0.0 0.00115274402014 +GGAAAG 0.000480343351923 0.000230186305909 +TGACCA 0.0 0.000200582292813 +CATTCT 4.99058027972e-05 0.000191519839824 +TAAATC 0.0 0.000517163983879 +TGACCG 0.0 0.000100291146406 +GCGCCA 0.000296315704108 0.000167353298521 +CATTCG 0.000182468091477 0.000221728016453 +CATTCC 0.000166872528103 0.000214478054062 +CATTCA 8.26564858829e-05 0.000322623326391 +TCCATA 0.000193384985839 0.000254957010744 +GCCTTC 0.000790695063068 0.000151040883142 +ACAAAT 0.000227695225262 0.000785412592339 +GATTAA 2.80720140734e-05 0.000332894106445 +GTAGGC 9.66924929196e-05 0.000140165939556 +GATTAC 0.000366495739292 0.000198165638682 +GTAGGA 5.92631408217e-05 0.000102707800537 +GTAGGG 1.24764506993e-05 6.46454979848e-05 +GATTAG 2.18337887238e-05 0.000184874040966 +GCGGGT 0.000212099661888 7.37079509733e-05 +ACAAAC 0.000226135668925 0.000546163833442 +TTTCAA 0.000177789422465 0.000690558917726 +ACAAAA 0.00018090853514 0.00124518104062 +ACAAAG 0.000299434816783 0.00044466435997 +GCGGGC 0.000469426457561 0.000105728618199 +GTCGGT 0.000168432084441 0.000113582744123 +GCGGGA 0.000276041471722 0.000123249360644 +CCCTCA 0.000123204950656 0.00011056192646 +GATTAT 0.000254207682998 0.000285769350905 +GTAGGT 5.92631408217e-05 0.000114791071188 +AACCTG 0.000704919464511 0.000153457537272 +TAATGA 0.0 0.000383643843181 +GATGTT 0.000407044204065 0.000207228091671 +TAATGC 0.0 0.000248311211886 +GGGATG 9.35733802448e-05 0.000122645197111 +GCCCTT 0.000240171675962 0.000119020215916 +GCGCCG 0.000302553929458 0.000116603561786 +GGCGTA 0.000169991640778 9.66661652109e-05 +GCCCTC 0.000408603760402 9.12286934178e-05 +GATGTG 0.00122425172487 0.00022112385292 +GCCCTA 0.000194944542177 0.000120832706514 +GATGTA 0.000258886352011 0.000196353148085 +GCCCTG 0.000954448478497 0.00010814527233 +GATGTC 0.000555202056119 0.00013412430423 +AACCTC 0.000196504098514 0.000111770253525 +TGAAAT 0.0 0.000708683823703 +GCTCTT 0.000166872528103 0.000203603110476 +TTCTAA 1.09168943619e-05 0.00037397722666 +ATCCGT 0.00017467030979 0.000159499172598 +TTCTAC 0.000569238063156 0.000228373815311 +GGCTTA 9.20138239074e-05 0.000170374116184 +TTGGAT 0.000630060760315 0.000262811136667 +TGAAAC 0.0 0.000360685628943 +TGAAAG 1.55955633741e-06 0.000354039830085 +CCCTCC 0.000258886352011 0.000100895309939 +ATCCGC 0.000304113485796 0.000142582593686 +ATCCGA 0.000141919626705 0.000227165488246 +ATCCGG 0.000199623211189 0.000151645046675 +TGGCTC 0.000120085837981 0.000138957612491 +TGGCTA 0.000121645394318 0.000192124003357 +TGGCTG 0.000524010929371 0.000228977978843 +GGGTGA 1.55955633741e-06 8.03537498316e-05 +AGTTGT 9.66924929196e-05 0.000288790168568 +AGCTAT 0.00027760102806 0.000213269726997 +ATTAGT 0.000131002732343 0.000315977527533 +GTCGGC 0.000279160584397 0.000130499323035 +TGGCTT 8.10969295455e-05 0.000256769501342 +AGTTGG 8.42160422203e-05 0.000254352847211 +AGCTAA 6.23822534965e-06 0.000299060948621 +AGCTAC 0.000439794887151 0.000165540807924 +AGTTGC 0.000134121845018 0.000257373664874 +AGTTGA 1.55955633741e-06 0.000328060798185 +AGCTAG 7.79778168707e-06 0.000198769802215 +TCCTAG 1.09168943619e-05 9.42495110807e-05 +TCCAGT 0.000469426457561 0.00020239478341 +CTAATT 0.000152836521066 0.000400560422093 +CAGTGT 0.000104490274607 0.00017641575151 +TAACAA 0.0 0.000572142865342 +TCAGGT 6.39418098339e-05 0.000131103486567 +TCAAGG 7.48587041958e-05 0.000164332480859 +GTCGGG 3.74293520979e-05 0.000153457537272 +TCAAGC 0.000116966725306 0.000256165337809 +CTGCAC 0.000751706154633 0.00019997812928 +CTAATA 0.000102930718269 0.000311144219273 +CAGTGC 0.000450711781512 0.000285769350905 +CTAATC 0.000184027647815 0.000173394933847 +CAGTGA 2.33933450612e-05 0.000221728016453 +CAGTGG 0.000300994373121 0.000277311061449 +CTAATG 0.000304113485796 0.000201790619878 +TCAAGT 0.000118526281643 0.000292415149763 +CTGCAA 0.000630060760315 0.00035041484889 +GAGCCC 0.000569238063156 0.000132915977165 +TCAGGC 7.32991478584e-05 9.00203663527e-05 +CGAGCT 0.000129443176005 0.00013412430423 +TCCTAA 3.11911267483e-06 0.000186082368031 +GATCAG 0.000902983119362 0.000208436418736 +CAACCA 0.000232373894275 0.000398143767963 +GTTCAT 0.000112288056294 0.000235623777702 +TTCCCT 6.55013661714e-05 0.000184269877433 +TAACAG 0.0 0.000201186456345 +CGAGCG 0.000112288056294 0.000151040883142 +GTTAGT 0.000106049830944 0.000282748533242 +CGAGCA 0.000107609387282 0.000204811437541 +GAGCCG 0.000589512295542 0.000127478505372 +CGAGCC 0.000196504098514 0.000138353448958 +GTTCAG 0.000315030380157 0.000149832556077 +TTCCCC 0.000311911267483 0.00019756147515 +GTTCAC 0.000169991640778 0.00013654095836 +GTTCAA 0.000169991640778 0.000471247555403 +TGAGCA 0.0 0.000235019614169 +GGACCT 0.000198063654851 9.66661652109e-05 +TACATG 0.000469426457561 0.000200582292813 +AAGTAC 0.000909221344712 0.000227769651778 +AAGTAA 2.6512457736e-05 0.000500247404967 +TACATC 0.000489700689948 0.000258581991939 +AAGTAG 3.74293520979e-05 0.00021870719879 +TACATA 0.000221456999913 0.000750975270982 +GCACAC 0.000110728499956 0.000265227790798 +GGACCG 0.000247969457649 0.0001317076501 +ATTGGT 0.000240171675962 0.000277311061449 +GGACCA 0.000324387718182 0.000128082668904 +TCATCA 0.000190265873164 0.00030812340161 +GGACCC 0.000489700689948 7.61246051036e-05 +CACACG 0.000336864168881 0.000224144670583 +TACATT 0.000263565021023 0.000457955957687 +CACACC 0.000378972189991 0.000198769802215 +GCACAA 0.000185587204152 0.000275498570851 +CACACA 0.000168432084441 0.000794475045327 +TCTGCC 0.000238612119624 0.000184874040966 +TCTGCA 0.000168432084441 0.000258581991939 +TCTGCG 0.000116966725306 0.000148020065479 +TAGATT 0.0 0.000279123552047 +CCCGTC 0.00031347082382 7.91454227665e-05 +TTAAGG 4.36675774476e-05 0.000227769651778 +CCCGTA 0.000112288056294 7.24996239082e-05 +CGTCGA 0.000106049830944 0.000138353448958 +CCCGTG 0.000436675774476 8.3978731027e-05 +TTAAGC 9.66924929196e-05 0.000382435516116 +TTACGT 2.80720140734e-05 0.000175207424445 +TAGATC 0.0 0.000164332480859 +TGTCAA 8.26564858829e-05 0.000292415149763 +TAGATA 0.0 0.000280936042644 +TTTCGC 0.000208980549213 0.000325644144054 +TAGATG 0.0 0.000243477903625 +TGGTTA 3.11911267483e-05 0.000296040130958 +TCTGCT 0.000218337887238 0.000227769651778 +TTACGC 5.61440281469e-05 0.000121436870046 +TTAAGT 9.35733802448e-05 0.000492997442576 +TTACGA 3.43102394231e-05 0.000226561324713 +TTACGG 2.18337887238e-05 0.000100895309939 +CCCGTT 0.000268243690035 0.000111166089993 +AGTGAC 0.000227695225262 0.000166144971456 +AGTGAA 0.000283839253409 0.000496622423771 +AGTGAG 0.00031970904917 0.000188499022161 +GCGGTC 0.000210540105551 9.00203663527e-05 +CGGGCG 0.000159074746416 0.000161311663196 +CCCTCT 0.00013256228868 0.000122645197111 +TAAGAG 0.0 0.00019514482102 +CCTGAT 0.000112288056294 0.00013412430423 +GAAAGT 0.000300994373121 0.000367331427802 +CGGGCA 0.00012632406333 0.000129895159502 +TAAGAC 0.0 0.000167353298521 +ATTTAT 0.000131002732343 0.00110803591873 +AGTGAT 0.000316589936495 0.000207832255204 +TAAGAT 0.0 0.000271269426123 +CCTGAA 0.000171551197115 0.000180644896238 +CCTGAC 8.10969295455e-05 0.000122041033579 +CCTGAG 0.00018090853514 9.72703287435e-05 +GTTAGG 6.08226971591e-05 0.000117207725318 +ATTGGG 0.000106049830944 0.000191519839824 +TATACT 8.10969295455e-05 0.000260394482537 +GTTAGC 0.000190265873164 0.000154061700805 +CTCCAG 0.00039768686604 0.000175811587977 +GTTAGA 4.67866901224e-05 0.000173394933847 +TGTAGC 3.89889084353e-05 0.00019514482102 +GCCAAA 0.00071739591521 0.000513539002683 +TATACG 0.000159074746416 0.00017641575151 +GGTAGC 0.000218337887238 0.000104520291134 +TATACC 0.000120085837981 0.000204207274008 +TATACA 5.45844718095e-05 0.000695392225986 +ATTGGC 0.000609786527929 0.000256769501342 +TCTTGT 3.89889084353e-05 0.000279123552047 +TCCACG 0.000586393182867 0.000132311813632 +ACTAGT 9.51329365822e-05 0.000141374266621 +TTGACT 0.00012632406333 0.000216894708192 +GTAATA 7.64182605332e-05 0.000317790018131 +AACGCA 0.000199623211189 0.000313560873403 +GTAATC 0.000149717408392 0.000202998946943 +TCTAAC 8.10969295455e-05 0.000188499022161 +CGTTCT 8.57755985577e-05 0.000151040883142 +CCACCG 0.000655013661714 0.000161311663196 +GGTAGG 2.6512457736e-05 0.000137145121893 +CCACCA 0.000750146598296 0.000322019162859 +CCACCC 0.000374293520979 0.000187894858629 +GTTCGT 0.000109168943619 0.000171582443249 +TTTATG 0.000221456999913 0.000549788814637 +ACTAGG 3.89889084353e-05 9.06245298853e-05 +TTTATC 0.0002292547816 0.000387268824376 +ACTAGC 0.000140360070367 0.000146811738414 +TTTATA 0.000176229866128 0.000962432507381 +ACTAGA 3.74293520979e-05 0.000181249059771 +CCACCT 0.000268243690035 0.000181249059771 +CGTTCG 0.000188706316827 0.00015527002787 +TCGTTG 0.000235493006949 0.000206623928138 +CGTTCA 6.70609225088e-05 0.000149832556077 +CGTTCC 0.000201182767526 0.000122645197111 +TCTAAT 9.20138239074e-05 0.000324435816989 +TCGTCT 0.000104490274607 0.000141978430154 +CTTTGT 4.99058027972e-05 0.000316581691066 +AAACAT 0.000151276964729 0.000682704791802 +ACTGGA 0.000297875260446 0.000214478054062 +ACTGGC 0.000352459732255 0.000210853072866 +TAGTCC 0.0 0.000128082668904 +GTGACC 0.000636298985665 0.000114186907655 +TAGTCA 0.0 0.000148624229012 +GTGACA 0.00018714676049 0.000138353448958 +TAGTCG 0.0 0.00013654095836 +GTGACG 0.00031970904917 0.00010814527233 +TGTCAT 4.36675774476e-05 0.00017883240564 +ACTGGT 0.000230814337937 0.00015527002787 +CTTTGG 0.000106049830944 0.000214478054062 +CCTATG 7.95373732081e-05 0.000132311813632 +CTTTGC 0.000143479183042 0.000282144369709 +CTACGC 0.000113847612631 9.12286934178e-05 +GTGACT 0.000162193859091 0.000142582593686 +TAGTCT 0.0 0.000138353448958 +GTGCAC 0.000433556661801 0.000164936644391 +CGACGG 9.35733802448e-05 0.000103311964069 +ACCTAG 4.67866901224e-06 9.12286934178e-05 +CGACGA 0.000155955633741 0.000202998946943 +ACCTAA 1.09168943619e-05 0.000216290544659 +CGACGC 0.000262005464685 0.000116603561786 +ACCTAC 0.00049437935896 0.000151040883142 +CCGCAG 0.000790695063068 0.000154665864337 +CCGCAA 0.000316589936495 0.000180040732705 +CCGCAC 0.000332185499869 0.000117207725318 +CTACGG 4.05484647727e-05 7.18954603756e-05 +ACCTAT 0.000291637035096 0.000179436569173 +CGACGT 0.000140360070367 8.57912216247e-05 +CGAAGA 8.57755985577e-05 0.000250727866016 +CCCCGT 0.000102930718269 7.67287686362e-05 +CGAAGG 0.000113847612631 0.000172186606782 +GCGCTC 0.000221456999913 0.000126270178307 +GTGCAT 0.000230814337937 0.00024468623069 +GCGCTA 8.57755985577e-05 9.30411840155e-05 +CCGCAT 0.000268243690035 0.000149228392544 +GTTCGC 0.000191825429502 0.000142582593686 +GAAGAT 0.000631620316652 0.000315373364001 +GGATCT 0.000272922359047 0.000143790920751 +TCATGA 1.24764506993e-05 0.000122645197111 +TCGGAA 0.000364936182955 0.000209040582269 +TCGGAC 0.000400805978715 0.000119020215916 +GAATCT 0.0002167783309 0.000217498871725 +TCGGAG 0.000747027485621 0.000142582593686 +GGATCG 0.00045227133785 0.000151645046675 +GAAGAG 0.000784456837719 0.000271873589656 +GAAGAA 0.000469426457561 0.000450705995296 +CGAGAA 0.000135681401355 0.000277915224981 +GAAGAC 0.000411722873077 0.000208436418736 +GGATCA 0.000255767239336 0.000199373965748 +GAATCA 0.000219897443575 0.000287581841503 +GAATCC 0.000411722873077 0.000235019614169 +TCGGAT 0.00050061758431 0.000189707349226 +GAATCG 0.000438235330813 0.0002634153002 +ATGGTC 0.000324387718182 0.000139561776023 +CAGATG 0.000750146598296 0.000233811287104 +ATGGTA 0.000146598295717 0.000193936493954 +TGACCC 1.55955633741e-06 0.000123249360644 +ATGGTG 0.000559880725131 0.000158895009065 +CAGATC 0.000679966563112 0.000238040431832 +ACGCCC 0.00072363414056 0.000157686682 +CAGATA 0.000255767239336 0.000315373364001 +TGTCAG 0.00012632406333 0.000144999247816 +ACGCCT 0.000171551197115 0.00011297858059 +CAGATT 0.000417961098427 0.000286373514437 +TGATTA 0.0 0.000325039980522 +CTCGAC 0.000304113485796 0.000135332631295 +ATGGTT 0.000219897443575 0.000259790319004 +TCGCAA 0.000285398809747 0.000253748683679 +TTGCGC 0.000269803246372 0.000147415901947 +GCTATT 0.000151276964729 0.000209644745801 +TGCGTC 0.000177789422465 0.000117811888851 +GCGTGG 7.32991478584e-05 0.000157686682 +TTGTAG 1.40360070367e-05 0.000242873740092 +AATGGT 0.000385210415341 0.000266436117863 +TCCACC 0.000616024753278 0.000182457386836 +CTACGT 4.5227133785e-05 0.000100895309939 +AATGGC 0.000882708886976 0.000294227640361 +AATGGA 0.000653454105376 0.000396935440897 +AATGGG 0.000199623211189 0.000195748984552 +CCCCGA 8.26564858829e-05 0.000120832706514 +AATAGA 9.66924929196e-05 0.000354643993618 +AATAGC 0.00031970904917 0.0002893943321 +GTGAAT 0.00040392509139 0.000337727414706 +AGCCAG 0.000630060760315 0.000281540206177 +GGAGGG 5.92631408217e-05 8.63953851573e-05 +AAAAGT 0.000165312971766 0.000784204265274 +AGCCAC 0.000310351711145 0.000245290394223 +AGCCAA 0.000258886352011 0.000424726963396 +TGGTAG 4.67866901224e-06 0.000151040883142 +CGCCAT 0.000179348978803 0.000163728317326 +GAGTGT 0.000194944542177 0.000189707349226 +TGGTAC 0.000210540105551 0.000112374417058 +TGGTAA 4.67866901224e-06 0.000246498721288 +AAAAGG 0.000112288056294 0.000473664209534 +GGAGGT 0.000294756147771 0.000115999398253 +AGCCAT 0.000154396077404 0.000267644444928 +AAAAGC 0.000243290788636 0.000623496765611 +AAAAGA 0.000134121845018 0.000808974970109 +TCTATA 6.23822534965e-05 0.000287581841503 +GAGTGG 0.000352459732255 0.000205415601073 +CGCCAG 0.000818767077142 0.000174603260912 +TGGTAT 0.000121645394318 0.000198165638682 +CGCCAA 0.000254207682998 0.000200582292813 +GAGTGC 0.000606667415254 0.000192728166889 +CGCCAC 0.000283839253409 0.000166144971456 +GAGTGA 9.35733802448e-06 0.000196353148085 +GCCAGG 0.000315030380157 0.000175811587977 +GCCAGA 0.000226135668925 0.000217498871725 +GCCAGC 0.00113223790096 0.000347998194759 +GCCCGT 0.000169991640778 9.36453475481e-05 +TACCGA 0.000124764506993 0.000145603411349 +TACCGC 0.000324387718182 0.000105124454667 +TACTGA 1.09168943619e-05 0.000193936493954 +CTTCAT 0.000109168943619 0.000230790469441 +GCCCGC 0.000350900175918 8.8207875755e-05 +GCCAGT 0.000718955471547 0.000193936493954 +GCCCGA 0.000127883619668 0.000121436870046 +GCCCGG 0.000141919626705 9.72703287435e-05 +TGCGAT 0.000344661950568 0.000181249059771 +CTTCAC 0.000129443176005 0.000170374116184 +CTTCAA 0.000188706316827 0.000339539905303 +CTTCAG 0.000366495739292 0.000198769802215 +TCCGAG 0.000586393182867 0.00013654095836 +GTGGGC 0.00108545121084 0.000185478204498 +TCCGAC 0.000369614851967 0.000127478505372 +AATTCA 0.000202742323864 0.000552205468767 +GGACTG 0.000503736696984 0.000141374266621 +AATTCC 0.00027136280271 0.00033168577938 +AATTCG 0.000321268605507 0.00033410243351 +TCCGAT 0.000545844718095 0.000251936193081 +TAGCGG 0.0 9.48536746132e-05 +AATTCT 0.000137240957692 0.000494809933173 +TAGCGC 0.0 0.000101499473471 +GTCATC 0.000583274070193 0.000181853223303 +GTCATA 0.000148157852054 0.000180040732705 +GTCATG 0.000286958366084 9.36453475481e-05 +AGCAGG 0.00013880051403 0.000188499022161 +CCAGGT 0.000188706316827 0.000122041033579 +TTGGAA 0.000378972189991 0.000343769050031 +AGCAGC 0.00142075582338 0.000719558767289 +AGCAGA 0.000101371161932 0.000383643843181 +GGGGTG 8.10969295455e-05 6.52496615174e-05 +CCAGGG 4.83462464598e-05 9.30411840155e-05 +AGCAGT 0.000566118950481 0.000249519538951 +CCAGGA 0.000327506830857 0.000186686531564 +CCAGGC 0.000188706316827 0.000138957612491 +TCCCCG 0.00017467030979 0.00015285337374 +ATAGAG 0.000213659218226 0.000226561324713 +CATCTG 0.000564559394144 0.000212061399931 +ACGGTC 0.000185587204152 0.00011056192646 +CATCTA 0.000106049830944 0.000190915676292 +ATAGAC 0.000157515190079 0.000168561625587 +CATCTC 0.000127883619668 0.000173394933847 +CAAGCT 0.00017467030979 0.00022354050705 +TCCAAC 0.000572357175831 0.000225352997648 +GGGGTC 7.01800351836e-05 6.04163532568e-05 +CAAGCC 0.0002167783309 0.000193332330422 +CATCTT 0.000113847612631 0.000246498721288 +CAAGCA 0.000182468091477 0.000404185403288 +CAAGCG 0.0002292547816 0.000226561324713 +GTTCTT 0.000110728499956 0.000294227640361 +ACATGA 6.23822534965e-06 0.000217498871725 +CGTAAT 0.000106049830944 0.00019756147515 +ACATGC 7.64182605332e-05 0.000164332480859 +TCTTGA 1.55955633741e-06 0.000230186305909 +ACATGG 8.42160422203e-05 0.000149228392544 +AGGGAT 0.000266684133698 0.000132311813632 +GTACAT 5.3024915472e-05 0.000291206822698 +TGGCGG 9.51329365822e-05 0.000140165939556 +CGTAAG 0.000149717408392 0.000134728467763 +GAGTCA 0.000241731232299 0.000143790920751 +ACATGT 5.3024915472e-05 0.000224748834115 +CGTAAC 9.8252049257e-05 0.000128686832437 +CGTAAA 8.73351548951e-05 0.000259790319004 +AGGGAG 0.000346221506906 0.000104520291134 +TTATTT 9.98116055944e-05 0.00128988914203 +AGGGAC 0.00017467030979 0.000106936945265 +AGGGAA 0.000232373894275 0.000243477903625 +CCCTTT 0.000255767239336 0.000225957161181 +TGTTGT 3.43102394231e-05 0.000512330675618 +TCTTGG 5.92631408217e-05 0.00019756147515 +GTTGTT 0.000166872528103 0.000506893203825 +ATCCTT 0.000221456999913 0.000229582142376 +GCGGGG 8.26564858829e-05 6.64579885825e-05 +TTGTCC 0.000296315704108 0.000200582292813 +TTGTCA 0.000106049830944 0.000231998796506 +ATCCTC 0.000332185499869 0.000189103185694 +CTATGA 7.79778168707e-06 0.000154061700805 +ATCCTA 0.000134121845018 0.000172790770315 +GTTGTC 0.00022301655625 0.000190915676292 +ATCCTG 0.000829683971504 0.000193936493954 +GTTGTG 0.000274481915385 0.00021870719879 +TTGTCT 9.35733802448e-05 0.000271269426123 +CTATGC 7.79778168707e-05 0.000167353298521 +CTGTGG 0.000300994373121 0.000158290845533 +AAGGGG 0.000118526281643 9.96869828738e-05 +TGACTC 0.0 0.000116603561786 +TGGGCA 9.66924929196e-05 0.00019997812928 +TGGGCG 0.00012632406333 0.000157082518468 +TGACTG 0.0 0.000162519990261 +AAGCCG 0.000655013661714 0.000179436569173 +AAGCCC 0.000720515027885 0.000183665713901 +AAGCCA 0.000538046936408 0.00041868532807 +CAAAAT 0.000380531746329 0.000849453926791 +CTATGG 6.86204788462e-05 0.000112374417058 +TGGGCT 8.10969295455e-05 0.000135936794828 +CAAAAC 0.000374293520979 0.000721975421419 +CAAAAA 0.000316589936495 0.00131284735627 +AAGCCT 0.000207420992876 0.000173394933847 +CAAAAG 0.000726753253235 0.000605371859633 +TCAATG 0.000177789422465 0.000293623476828 +CCCTTA 4.83462464598e-05 0.000108749435862 +TCAATC 9.35733802448e-05 0.000275498570851 +CGTCGT 0.000212099661888 0.000128686832437 +CAGTTC 0.00077198038702 0.000230790469441 +CAGTTA 0.000102930718269 0.000227165488246 +CAGTTG 0.000678407006775 0.000302685929817 +GACCCC 0.000155955633741 8.27704039619e-05 +CTCTGT 9.35733802448e-05 0.000193332330422 +TCAATT 0.000131002732343 0.000587246953656 +TGGCGT 0.000109168943619 0.000148624229012 +GTTTGG 0.000143479183042 0.000204811437541 +CTCTGC 0.000297875260446 0.000206019764606 +GACCCT 5.61440281469e-05 8.76037122224e-05 +CTCTGA 7.79778168707e-06 0.000151645046675 +CTCTGG 0.000135681401355 0.000148624229012 +CAGTTT 0.000364936182955 0.000407810384484 +GCGAAG 0.000276041471722 0.000187290695096 +CTACTC 0.000151276964729 0.000125666014774 +TGCTGG 0.000159074746416 0.000210248909334 +GCGAAC 0.000221456999913 0.000189103185694 +GAGTCT 0.000188706316827 0.000123249360644 +GCGAAA 0.000146598295717 0.000391497969104 +TAGGGA 0.0 0.000102103637004 +GCTGGT 0.000354019288593 0.000194540657487 +GTGATT 0.00040392509139 0.000267040281395 +CTACTA 7.48587041958e-05 0.000215082217594 +TGCTGT 0.000118526281643 0.000248311211886 +GCGAAT 0.000249529013986 0.000248311211886 +TAATGG 0.0 0.000251936193081 +GTGATG 0.000647215880026 0.000153457537272 +CTACTG 0.000254207682998 0.000166144971456 +GTGATC 0.000611346084266 0.000163728317326 +GCTGGC 0.000586393182867 0.000205415601073 +GTGATA 0.000191825429502 0.000190915676292 +GCTGGA 0.000463188232212 0.000193332330422 +TGATAA 0.0 0.000412039529212 +TGTAGT 3.43102394231e-05 0.000213269726997 +TGATAC 0.0 0.000164332480859 +TGATAG 0.0 0.000166749134989 +CTTGTT 9.8252049257e-05 0.000302081766284 +GACGGT 0.000204301880201 8.09579133642e-05 +GCTAGG 5.14653591346e-05 7.49162780385e-05 +GCTAGA 6.86204788462e-05 0.0001317076501 +ATGAAT 0.000366495739292 0.000414456183342 +CTATGT 2.80720140734e-05 0.00019514482102 +AAGGGT 0.000449152225175 0.000146207574882 +TGATAT 0.0 0.000434997743449 +GTAGTG 0.000157515190079 0.000141978430154 +GCTAGT 0.000146598295717 0.000138957612491 +ATGAAC 0.000661251887063 0.000252540356614 +GACGGG 8.73351548951e-05 5.3166390866e-05 +ATGAAA 0.000338423725219 0.000741308654461 +GACGGA 0.00027760102806 0.000147415901947 +ATGAAG 0.000860875098252 0.000248915375418 +GACGGC 0.000364936182955 0.000119624379449 +CTTGTC 0.000107609387282 0.00013654095836 +CTAACT 9.51329365822e-05 0.000202998946943 +CCATAG 4.67866901224e-06 0.000137749285426 +GGTGAA 0.000286958366084 0.000185478204498 +GGTGAC 0.000286958366084 0.000107541108797 +GCGTCT 5.61440281469e-05 0.000119020215916 +TGTAAA 6.55013661714e-05 0.000657329923434 +GGTGAG 0.000318149492832 0.000103916127602 +GGAATG 0.000330625943532 0.000184269877433 +CGTCCT 0.00013256228868 9.30411840155e-05 +GGAATC 0.00030723259847 0.000217498871725 +GGAATA 0.000143479183042 0.000227769651778 +GCGTCA 5.61440281469e-05 0.000128082668904 +GCGTCC 0.000202742323864 8.45828945596e-05 +GGTGAT 0.000380531746329 0.000114791071188 +GCGTCG 9.98116055944e-05 0.000189707349226 +CGTCCG 0.000215218774563 0.000158895009065 +GGAATT 0.0002167783309 0.000278519388514 +CGTCCC 0.000237052563287 7.12912968431e-05 +CGTCCA 0.000145038739379 0.000175207424445 +TTTCGA 0.000106049830944 0.000410227038614 +CCGTAT 0.000190265873164 0.000104520291134 +GAAGTC 0.000247969457649 0.000216894708192 +CTACTT 8.57755985577e-05 0.000221728016453 +CGTGGT 0.000157515190079 0.000105124454667 +GCCGTA 0.000166872528103 0.000107541108797 +CCGTAC 0.000226135668925 9.72703287435e-05 +CCGTAA 4.67866901224e-06 0.000144395084284 +CCGTAG 1.55955633741e-06 6.34371709197e-05 +CTAACA 0.000106049830944 0.000199373965748 +GCCGTT 0.000308792154808 0.000143790920751 +TTGGTG 0.000449152225175 0.000230186305909 +GAAGTT 0.000274481915385 0.000253144520146 +GGCGCG 0.000121645394318 0.000122041033579 +ATGCCC 0.000577035844843 0.00015043671961 +TTAGTT 6.70609225088e-05 0.000584830299526 +ATGCCA 0.000321268605507 0.000306310911012 +GGCGCC 0.000637858542002 0.000114186907655 +ATGCCG 0.000382091302666 0.000120832706514 +GGCGCA 0.000219897443575 0.000146207574882 +TATTAC 0.000131002732343 0.000292415149763 +ACTATG 0.000115407168969 0.000173394933847 +CCATAT 0.00013880051403 0.000337727414706 +TTTAAG 0.000427318436451 0.000533476399258 +ACTATC 0.000106049830944 0.000177019915043 +TTTAAA 0.000154396077404 0.00132674311752 +ACTATA 7.1739591521e-05 0.000362498119541 +TTTAAC 0.000240171675962 0.000465205920078 +TATTAT 0.000123204950656 0.000886307902278 +TTAGTG 6.08226971591e-05 0.0002634153002 +GGCGCT 0.000268243690035 0.000123249360644 +TTAGTC 5.77035844843e-05 0.000166144971456 +ATGCCT 0.000127883619668 0.000153457537272 +TTAGTA 5.14653591346e-05 0.000314165036936 +TTTAAT 0.000260445908348 0.00106091116319 +ACTATT 0.000135681401355 0.000326852471119 +GGGCAT 8.88947112325e-05 0.000154061700805 +TCGATA 0.00013256228868 0.000249519538951 +TTCTAG 1.40360070367e-05 0.000194540657487 +TTTCCA 0.000173110753453 0.000496622423771 +TTTCCC 0.000297875260446 0.000306310911012 +GGGACT 3.11911267483e-05 8.51870580921e-05 +TATCGA 0.000109168943619 0.000228373815311 +GTTTGT 8.26564858829e-05 0.000421706145733 +TTTCCT 0.000101371161932 0.000363706446606 +GGGCAG 0.000163753415428 0.000157082518468 +GTCTGT 6.55013661714e-05 0.000170978279717 +GGGCAA 9.8252049257e-05 0.000212665563464 +GGGCAC 0.000110728499956 0.000112374417058 +GTACGG 4.99058027972e-05 9.78744922761e-05 +GTTTGA 4.67866901224e-06 0.000260394482537 +GTACGC 9.51329365822e-05 7.18954603756e-05 +GTACGA 5.61440281469e-05 0.000129895159502 +CCGCTC 0.000249529013986 0.000140770103088 +GCATGT 4.21080211102e-05 0.000179436569173 +CCGCTA 0.000141919626705 9.2437020483e-05 +CCGCTG 0.000782897281381 0.000146207574882 +TGAAAA 0.0 0.0009739116145 +GCGCGG 9.51329365822e-05 0.000123249360644 +GCGCGC 0.000194944542177 9.60620016784e-05 +TGTTTT 6.86204788462e-05 0.000965453325044 +GCGCGA 0.000109168943619 0.000103916127602 +GCATGG 5.14653591346e-05 0.000148020065479 +TGGATC 0.000198063654851 0.000169165789119 +GCATGC 8.26564858829e-05 0.000146207574882 +CCGCTT 0.000135681401355 0.000204207274008 +GCATGA 6.23822534965e-06 0.000158290845533 +TTTCAG 0.000291637035096 0.00030570674748 +GTCGAC 0.000288517922421 0.000102707800537 +GCGCGT 0.000116966725306 0.000119624379449 +GTCGAG 0.000505296253322 0.000166749134989 +CGTGGC 0.000305673042133 0.000182457386836 +CTTCCG 0.000109168943619 0.000101499473471 +TGCGCT 7.64182605332e-05 0.000144395084284 +GAGCTC 0.000480343351923 0.000141374266621 +ATTGAT 0.000421080211102 0.000434997743449 +GAGCTG 0.00203522102032 0.000213269726997 +AAACGA 0.000193384985839 0.000586038626591 +AAACGC 0.000291637035096 0.000311144219273 +GTAACA 7.64182605332e-05 0.000183061550368 +GTAGTA 3.74293520979e-05 0.000149832556077 +AAACGG 0.000196504098514 0.000287581841503 +ATTGAA 0.000425758880114 0.000598121897243 +TGCGCG 5.77035844843e-05 0.000106936945265 +ATTGAC 0.000347781063243 0.000202998946943 +GAGCTT 0.000315030380157 0.000166749134989 +TGCGCC 0.000327506830857 0.000133520140698 +ATTGAG 0.000592631408217 0.00024710288482 +CGCTGA 1.55955633741e-06 0.000210248909334 +CACGCA 0.000101371161932 0.000209644745801 +AAACGT 0.000162193859091 0.000377602207855 +ATTTCG 0.000276041471722 0.000404789566821 +ATTTCA 0.000135681401355 0.00052924725453 +ATTTCC 0.000360257513942 0.00041626867394 +ATCAGT 0.000302553929458 0.000233811287104 +ATCGGG 0.000116966725306 0.000117207725318 +CATAGT 6.70609225088e-05 0.000158895009065 +ATCGGC 0.000407044204065 0.000166144971456 +ATCGGA 0.000316589936495 0.000213873890529 +ATTTCT 0.000104490274607 0.000544955506377 +TACGCC 0.000485022020935 9.72703287435e-05 +TGGGTC 9.35733802448e-05 0.000106332781732 +TTTCGG 0.000101371161932 0.000277311061449 +TGGGTA 4.05484647727e-05 0.000135936794828 +CATAGG 3.43102394231e-05 9.2437020483e-05 +ATCGGT 0.000240171675962 0.000201186456345 +ATCAGG 0.000146598295717 0.00017399909738 +CATAGC 0.000102930718269 0.000157082518468 +ATCAGC 0.000562999837806 0.000282144369709 +ACGGGT 0.000163753415428 6.82704791802e-05 +GTCTGA 1.09168943619e-05 0.000124457687709 +GATTCT 0.000163753415428 0.000222936343518 +CTTTTG 0.000288517922421 0.000387872987909 +GCGTTC 0.000155955633741 0.000142582593686 +CTTTTC 0.000204301880201 0.000374581390192 +CTTTTA 8.42160422203e-05 0.000491184951978 +GATTCC 0.000450711781512 0.000262206973135 +ACGGGG 7.32991478584e-05 6.58538250499e-05 +ACGGGA 0.000291637035096 0.000102707800537 +ACGGGC 0.000435116218138 7.61246051036e-05 +TACTAT 0.000282279697072 0.000294227640361 +CTTTTT 0.000109168943619 0.000768496013427 +CGTAGA 4.67866901224e-05 0.000113582744123 +TCACGA 6.08226971591e-05 0.000103916127602 +TCACGC 0.000116966725306 0.000107541108797 +TCACGG 4.36675774476e-05 9.42495110807e-05 +TCTCAT 5.92631408217e-05 0.00022112385292 +TCACGT 7.01800351836e-05 0.000114791071188 +AGTGGA 0.000372733964642 0.000273081916721 +TTCCAC 0.000360257513942 0.000232602960039 +TTCCAA 0.000251088570324 0.000409018711549 +TTCCAG 0.000762623048995 0.000257373664874 +CGAGGC 0.000109168943619 0.000111770253525 +ACCCTT 0.000131002732343 0.00015527002787 +TGAAGT 0.0 0.000264019463732 +ATGCTT 0.000202742323864 0.000256165337809 +TTCCAT 0.000235493006949 0.000358268974813 +ACCCTA 0.000134121845018 0.000129895159502 +ACCCTC 0.000182468091477 0.000105728618199 +TGAAGG 0.0 0.000173394933847 +ACCCTG 0.000617584309616 0.000109353599395 +CTGACT 0.0002167783309 0.000175207424445 +CCCCTT 9.66924929196e-05 0.000132915977165 +AGTTAT 9.66924929196e-05 0.000328060798185 +CTGACG 0.000382091302666 8.88120392875e-05 +CTGACC 0.000793814175743 0.000137749285426 +CTGACA 0.000194944542177 0.000166749134989 +AGTTAA 7.79778168707e-06 0.000421706145733 +CCCCTG 0.000347781063243 0.000104520291134 +AGTTAC 0.000104490274607 0.000209644745801 +CCCCTC 0.000112288056294 0.000111166089993 +AGTTAG 9.35733802448e-06 0.000210853072866 +CCCCTA 6.08226971591e-05 7.73329321687e-05 +ACACAG 0.000290077478759 0.000280936042644 +ACACAA 0.000141919626705 0.000561267921756 +ACACAC 0.000112288056294 0.000709892150768 +AATGTT 0.000300994373121 0.000607788513764 +CTGCAG 0.00183091914012 0.000244082067158 +CATGGG 6.55013661714e-05 9.90828193412e-05 +CATGGA 0.000230814337937 0.000181853223303 +CATGGC 0.000315030380157 0.000141374266621 +AATGTC 0.000377412633654 0.000254352847211 +ACACAT 0.000120085837981 0.00037156057253 +AATGTA 0.0002292547816 0.000486351643718 +AATGTG 0.000853077316565 0.000366727264269 +CATGGT 0.000190265873164 0.000123853524177 +CTGCAT 0.000433556661801 0.00022354050705 +GCACAG 0.000335304612544 0.000201790619878 +ACATTT 0.000154396077404 0.000748558616852 +CGAGAT 0.000210540105551 0.000160103336131 +ATTACT 8.10969295455e-05 0.000300269275686 +ACATTG 0.000213659218226 0.00026824860846 +GTACCA 0.000112288056294 0.00013412430423 +ACATTA 6.55013661714e-05 0.000386664660844 +ACATTC 0.000235493006949 0.000264623627265 +ATTACA 0.000120085837981 0.000364914773671 +ATGCTC 0.000360257513942 0.000166749134989 +ATTACC 0.000230814337937 0.000248915375418 +CGAGAC 9.51329365822e-05 0.000112374417058 +ATTACG 0.000169991640778 0.000179436569173 +AAGTCT 0.000177789422465 0.000198165638682 +ATGCTA 0.000219897443575 0.000194540657487 +GGACAT 0.000201182767526 0.000182457386836 +GCTATA 0.000106049830944 0.000187290695096 +GCTATC 0.000157515190079 0.000137145121893 +AAGTCG 0.00062070342229 0.000212665563464 +AAGTCA 0.000285398809747 0.000354643993618 +GCTATG 0.00018714676049 0.000117811888851 +AAGTCC 0.000628501203977 0.000196353148085 +GGACAG 0.000428877992789 0.000155874191403 +GGACAC 0.00027760102806 0.000178228242108 +GGACAA 0.000213659218226 0.000217498871725 +GCACCT 0.000145038739379 0.000129895159502 +AGGGTT 7.1739591521e-05 0.000146207574882 +GAGGCG 0.000854636872902 0.000127478505372 +TGTTGG 3.89889084353e-05 0.000283352696775 +GAGGCA 0.000547404274432 0.000189103185694 +AGCGCT 0.000165312971766 0.000172790770315 +GAGGCC 0.00108701076718 0.000113582744123 +TGTTGC 9.35733802448e-05 0.00031295670987 +TTCTCA 0.000145038739379 0.000234415450637 +ACTCGA 7.79778168707e-05 0.000189103185694 +TAAAAT 0.0 0.00119261881329 +ACTCGC 0.000137240957692 0.000138353448958 +ACTCGG 6.70609225088e-05 0.000106936945265 +AGCGCA 0.000141919626705 0.000202998946943 +GAGGCT 0.000519332260359 0.000103916127602 +AGCGCC 0.000505296253322 0.000135936794828 +AGGGTG 0.000121645394318 0.000106332781732 +AGGGTA 3.58697957605e-05 9.30411840155e-05 +AGCGCG 0.000101371161932 0.000130499323035 +AGGGTC 6.08226971591e-05 9.18328569504e-05 +TAAAAG 0.0 0.000533476399258 +ACTCGT 9.51329365822e-05 0.000121436870046 +TAAAAC 0.0 0.000754600252178 +TAAAAA 0.0 0.00151222132202 +ACGTTC 0.000198063654851 0.000148020065479 +AGCTGC 0.000343102394231 0.000296040130958 +ACGTTA 5.3024915472e-05 0.000146811738414 +AGCTGA 4.67866901224e-06 0.000300873439219 +ACGTTG 0.000173110753453 0.000166749134989 +AGCTGG 0.000201182767526 0.000248311211886 +TAGGGG 0.0 7.43121145059e-05 +TAAATG 0.0 0.000581809481863 +AGTGCG 0.000212099661888 0.000220519689387 +AGTGCA 0.000257326795673 0.000333498269978 +AGAGTT 5.77035844843e-05 0.000276102734384 +AGTGCC 0.000647215880026 0.00019756147515 +CCAAAA 0.000224576112587 0.000728621220277 +TTCGTG 0.000477224239248 0.000163728317326 +TAGGGT 0.0 0.000107541108797 +GCATAG 9.35733802448e-06 0.000146207574882 +AGCTGT 0.000152836521066 0.000262206973135 +ACGTTT 0.000141919626705 0.000281540206177 +AGTGCT 0.000247969457649 0.000251332029548 +AGAGTC 2.6512457736e-05 0.000132311813632 +AGAGTA 2.80720140734e-05 0.000193332330422 +AGAGTG 9.045426757e-05 0.000177019915043 +TTCGTA 0.000127883619668 0.000226561324713 +GTTAAA 0.000194944542177 0.000523809782737 +CCTGGC 0.000247969457649 0.000165540807924 +GTTAAC 0.000266684133698 0.000227769651778 +CCTGGA 0.000230814337937 0.000163124153793 +CCTGGG 4.05484647727e-05 0.000124457687709 +GTTAAG 0.000385210415341 0.000229582142376 +TTCGTC 0.000269803246372 0.000141374266621 +GCACCC 0.000338423725219 0.000129895159502 +GTCTGG 0.000129443176005 0.000123249360644 +GTCTGC 0.000280720140734 0.000156478354935 +GTTAAT 0.000201182767526 0.000390893805572 +CCTGGT 0.000168432084441 0.000121436870046 +TCTAAA 7.48587041958e-05 0.000387268824376 +GGAAGG 9.66924929196e-05 0.000161915826728 +CCACAT 0.000191825429502 0.000238040431832 +GGAAGC 0.00026512457736 0.000175207424445 +GCACCG 0.000300994373121 0.000111166089993 +GGAAGA 0.000120085837981 0.000233811287104 +GCGTTA 5.77035844843e-05 0.000120832706514 +CAATGA 7.79778168707e-06 0.00026583195433 +CCCGCA 0.000140360070367 0.000104520291134 +CAATGC 0.000127883619668 0.000231394632974 +GCGTTG 0.00017467030979 0.000170978279717 +CAATGG 0.00013880051403 0.000235019614169 +CCACAG 0.000522451373033 0.000205415601073 +GGAAGT 0.000227695225262 0.000203603110476 +CCACAC 0.000160634302754 0.000288186005035 +CCACAA 0.000288517922421 0.000442851869373 +ATTTGT 0.000109168943619 0.000731037874408 +TCCTCG 0.000587952739205 0.000198769802215 +TGTATG 7.48587041958e-05 0.000360081465411 +CAATGT 8.57755985577e-05 0.000295435967426 +GCGTTT 0.000116966725306 0.000248915375418 +TCCTCC 0.000561440281469 0.000224748834115 +TGTATC 5.61440281469e-05 0.000205415601073 +CGTTAG 7.79778168707e-06 9.90828193412e-05 +TAACCG 0.0 0.000143186757219 +TCAAAC 0.000162193859091 0.000324435816989 +TCAGGG 4.05484647727e-05 9.2437020483e-05 +CGTTAC 0.000123204950656 0.000111770253525 +TAACCC 0.0 0.000140770103088 +CGTTAA 6.23822534965e-06 0.000235623777702 +TAACCA 0.0 0.000305102583947 +ACCGCT 0.000208980549213 0.000182457386836 +TCGCGA 0.000177789422465 0.000119624379449 +TAGTGT 0.0 0.000227165488246 +TCAGGA 0.000141919626705 0.000181249059771 +TTCGTT 0.000199623211189 0.0003951229503 +TACCCT 2.02742323864e-05 0.000128082668904 +TAACCT 0.0 0.000137749285426 +CGTTAT 0.000120085837981 0.000145603411349 +GTGAAA 0.000249529013986 0.000491184951978 +GTGAAC 0.000506855809659 0.000198769802215 +TGCATA 0.000148157852054 0.000304498420414 +ACCGCG 0.000115407168969 0.000134728467763 +ACCGCA 0.000176229866128 0.000169165789119 +CCCTAT 0.000246409901311 0.000169769952652 +ACCGCC 0.0006752878941 0.000166144971456 +AGATCC 0.000155955633741 0.00017399909738 +AGGCTT 4.36675774476e-05 0.000167957462054 +CCTTCT 6.70609225088e-05 0.000154061700805 +GGGACG 4.83462464598e-05 6.28330073871e-05 +TGTCGC 0.000154396077404 0.000140165939556 +GTTGCC 0.000332185499869 0.00024468623069 +GTGCCG 0.000483462464598 0.000121436870046 +GTGCCA 0.000396127309703 0.00022354050705 +GTTGCG 9.35733802448e-05 0.000162519990261 +GTGCCC 0.00081408840813 0.000140165939556 +CCGCCG 0.000846839091215 0.000161311663196 +AGGCTA 5.77035844843e-05 0.000108749435862 +CCTTCC 0.000127883619668 0.000125061851242 +AGGCTC 5.92631408217e-05 9.78744922761e-05 +CCGCCC 0.000622262978628 0.000155874191403 +TGTCGA 5.3024915472e-05 0.000179436569173 +CCGCCA 0.000684645232124 0.000192124003357 +AGGCTG 0.000134121845018 0.000103311964069 +GTGCCT 0.000198063654851 0.000134728467763 +TCCAAA 0.000329066387194 0.000462185102415 +GTTGCT 0.000182468091477 0.000261602809602 +GGATAT 0.00022301655625 0.00023985292243 +TCGGGT 0.000208980549213 7.67287686362e-05 +ATTTGA 1.09168943619e-05 0.000514143166216 +GCAGTT 0.000208980549213 0.000285165187372 +GAAGCT 0.000330625943532 0.000182457386836 +GGATAA 9.35733802448e-06 0.000207832255204 +GGATAC 0.000297875260446 0.000153457537272 +TCGGGG 7.64182605332e-05 9.2437020483e-05 +GGATAG 4.67866901224e-06 0.000120832706514 +GAAGCG 0.000297875260446 0.000199373965748 +CCGCCT 0.000279160584397 0.000123853524177 +GCAGTG 0.000388329528016 0.000326248307587 +GAAGCC 0.00053024915472 0.000222332179985 +GCAGTA 9.51329365822e-05 0.00017399909738 +GAAGCA 0.000266684133698 0.000301477602752 +GCAGTC 0.000131002732343 0.000198769802215 +CGCCCG 0.000110728499956 9.54578381458e-05 +GAGAGA 0.000152836521066 0.00028697767797 +CGCCCC 0.00012632406333 0.000142582593686 +CGCCCA 0.000107609387282 0.000208436418736 +AGTTTG 0.00027136280271 0.000314165036936 +ACGCAG 0.000411722873077 0.000229582142376 +ACGCAC 0.000241731232299 0.000185478204498 +ACGCAA 0.000201182767526 0.000238040431832 +CTCGCA 5.14653591346e-05 0.000155874191403 +ACGTAC 0.000168432084441 0.000121436870046 +CCCTAG 1.24764506993e-05 8.21662404293e-05 +ACGCAT 0.000143479183042 0.000193332330422 +AGTTTC 0.000198063654851 0.000328060798185 +GCCGGG 0.000106049830944 6.94788062454e-05 +TACCCG 0.000131002732343 9.66661652109e-05 +GCCGGC 0.000729872365909 0.000122041033579 +CATGTT 9.51329365822e-05 0.000233207123571 +GCCGGA 0.000558321168794 0.000141978430154 +TTGGGG 7.64182605332e-05 0.00013412430423 +AGATCT 7.1739591521e-05 0.000198769802215 +ACGTAG 9.35733802448e-06 0.000108749435862 +GCCACT 0.000506855809659 0.000212061399931 +CATGTA 6.70609225088e-05 0.000187290695096 +GTTGCA 0.000149717408392 0.000299060948621 +CATGTC 0.000159074746416 0.000137749285426 +GCCGGT 0.000357138401268 9.96869828738e-05 +CATGTG 0.000455390450525 0.000183665713901 +GTGCGA 0.000297875260446 0.000193936493954 +TACCCC 9.98116055944e-05 0.000103311964069 +GATTGG 0.000257326795673 0.000203603110476 +GGCCTG 0.000628501203977 0.000100291146406 +GGCCTC 0.000240171675962 0.000160103336131 +GGCCTA 0.000131002732343 8.3978731027e-05 +TTTGTT 0.0002292547816 0.000925578531895 +AATCGG 0.00018090853514 0.000221728016453 +AATCGC 0.000531808711058 0.000289998495633 +AATCGA 0.000227695225262 0.000459768448285 +TTCCCA 0.000129443176005 0.000317790018131 +GGCCTT 0.000110728499956 0.000135936794828 +TCACTA 8.10969295455e-05 0.000133520140698 +TCACTC 7.79778168707e-05 0.000154061700805 +AATCGT 0.000212099661888 0.000276102734384 +AGCCGA 9.66924929196e-05 0.000198165638682 +CCTTCA 0.000110728499956 0.00015527002787 +AGCCGC 0.000212099661888 0.000162519990261 +TAATTG 0.0 0.00050810153089 +AAAAAT 0.000349340619581 0.00171642859603 +AGCCGG 0.000137240957692 0.000130499323035 +GTTTTT 0.00017467030979 0.000869995486898 +TGGTGA 6.23822534965e-06 0.000172186606782 +GATGGA 0.000519332260359 0.000187290695096 +TGGTGC 0.000120085837981 0.000184269877433 +CCAAGT 0.000166872528103 0.000296040130958 +GGGCTG 0.000149717408392 0.000105728618199 +TGGTGG 0.000176229866128 0.000202998946943 +GATGGG 0.00017467030979 0.000166144971456 +AAAAAA 0.000366495739292 0.00304558836768 +AAAAAC 0.000338423725219 0.00125726431127 +GAGCGA 0.000520891816696 0.000214478054062 +AGCCGT 7.32991478584e-05 0.000133520140698 +AAAAAG 0.000823445746154 0.00095034923673 +TGACAG 0.0 0.000149228392544 +CCAAGG 9.8252049257e-05 0.00019514482102 +CTACAT 6.08226971591e-05 0.000201790619878 +TCTCAG 0.000155955633741 0.000142582593686 +CCAAGC 0.000146598295717 0.000275498570851 +TGGTGT 4.21080211102e-05 0.000190915676292 +CCAAGA 7.1739591521e-05 0.000278519388514 +AGAACT 6.86204788462e-05 0.000264623627265 +GCATTA 8.26564858829e-05 0.000254352847211 +CCTTCG 0.000194944542177 0.000165540807924 +TACCAT 0.00018090853514 0.000227769651778 +AGATAG 9.35733802448e-06 0.000168561625587 +CACTGT 8.57755985577e-05 0.000151645046675 +ACCCGA 0.000115407168969 0.00015527002787 +ATGTCT 0.000152836521066 0.000173394933847 +ACCCGC 0.000241731232299 0.000103311964069 +AGATAA 4.67866901224e-06 0.00035524815715 +TACCAC 0.000293196591434 0.000219311362322 +AGAACG 7.1739591521e-05 0.000207228091671 +TACCAA 0.000194944542177 0.000313560873403 +AGAACA 7.95373732081e-05 0.000384852170246 +TACCAG 0.000566118950481 0.000158895009065 +AGAACC 0.000115407168969 0.000172790770315 +ATGTCA 0.000199623211189 0.000207228091671 +CACTGG 0.000165312971766 0.00017641575151 +AGATAT 7.79778168707e-05 0.000354039830085 +TGACAA 0.0 0.000298456785089 +ACCAGG 0.000191825429502 0.00013412430423 +CACTGC 0.000279160584397 0.000196353148085 +ATGTCG 0.000389889084353 0.000149832556077 +CACTGA 6.23822534965e-06 0.000206623928138 +TAGGTT 0.0 0.000163728317326 +GTTTAC 0.000266684133698 0.000254352847211 +AATTAT 0.000137240957692 0.000914703588308 +CTGTTG 0.000424199323776 0.000253748683679 +TGCTAT 0.000148157852054 0.000194540657487 +CTGTTC 0.000566118950481 0.000208436418736 +CGGGTT 6.86204788462e-05 0.000115999398253 +CTGTTA 8.88947112325e-05 0.000167353298521 +AATTAC 0.000193384985839 0.000401768749158 +AATTAA 1.71551197115e-05 0.00127176423606 +AATTAG 9.35733802448e-06 0.000384852170246 +TAGGTC 0.0 7.79370957013e-05 +CGGGTC 8.42160422203e-05 5.13539002683e-05 +CTGTTT 0.000227695225262 0.000379414698453 +CGGGTA 4.05484647727e-05 6.46454979848e-05 +CGGGTG 0.000233933450612 8.69995486898e-05 +CCAAAC 0.000232373894275 0.000392102132637 +CTTCGG 6.55013661714e-05 0.000151040883142 +CACACT 0.000134121845018 0.000282144369709 +TGGATG 0.000260445908348 0.000187894858629 +AGCAAA 0.000436675774476 0.000697808880116 +ATATAT 0.000102930718269 0.0017605325339 +AGCAAC 0.000846839091215 0.000478497517794 +TTAGGC 5.77035844843e-05 0.000137145121893 +AAAGCT 0.00018714676049 0.00035283150302 +AGCAAG 0.000879589774301 0.000265227790798 +AAAGCA 0.000208980549213 0.000636788363327 +ATATAG 3.11911267483e-06 0.000415664510407 +TTAGGT 3.43102394231e-05 0.000143790920751 +ATATAA 9.35733802448e-06 0.000974515778033 +AGCAAT 0.000673728337762 0.000428351944591 +ATATAC 0.000163753415428 0.000638600853925 +ATAGCA 0.000121645394318 0.000264019463732 +ATAGCC 0.00036181707028 0.000175811587977 +TACTTG 0.000282279697072 0.00023985292243 +ATAGCG 0.00013880051403 0.0001317076501 +TCTCAC 7.64182605332e-05 0.000122041033579 +TGCTAA 6.23822534965e-06 0.000297248458024 +ATAGCT 0.00013880051403 0.000247707048353 +TACTTT 0.000304113485796 0.000436206070514 +GTTTTC 0.000221456999913 0.000514747329748 +CGAGTT 9.045426757e-05 0.000209644745801 +CTCGCC 0.000233933450612 0.000138957612491 +CGTACT 4.05484647727e-05 0.000103916127602 +TCCCGT 0.00013880051403 7.67287686362e-05 +ACATAC 0.000127883619668 0.000389081314974 +CTTAAG 0.000363376626617 0.000281540206177 +ACATAA 9.35733802448e-06 0.000540122198116 +CTTAAA 0.000188706316827 0.000613830149089 +AGGGGT 7.48587041958e-05 7.18954603756e-05 +CTTAAC 0.000237052563287 0.000276706897916 +CGTACA 3.89889084353e-05 0.000130499323035 +CGAGTG 0.000237052563287 0.000215082217594 +TTGTTA 4.99058027972e-05 0.000514143166216 +CGAGTA 5.61440281469e-05 0.000154665864337 +CGAGTC 9.66924929196e-05 0.00015285337374 +AGGGGA 7.95373732081e-05 9.66661652109e-05 +CTTAAT 0.000191825429502 0.00037397722666 +AGGGGC 0.000129443176005 9.78744922761e-05 +TATGCA 0.000162193859091 0.000315977527533 +AGGGGG 2.02742323864e-05 7.79370957013e-05 +TCATTA 5.77035844843e-05 0.000338935741771 +CGACTT 8.88947112325e-05 0.000204207274008 +TCGGGC 0.00041016331674 0.00019997812928 +TTGTAA 2.18337887238e-05 0.000604163532568 +TTGTAC 0.000226135668925 0.000291810986231 +GTACCT 5.61440281469e-05 0.000105728618199 +TCATTG 0.000121645394318 0.000233811287104 +TGAATC 0.0 0.000240457085962 +CGACTA 7.48587041958e-05 0.000132915977165 +TGAATA 0.0 0.000512330675618 +CGACTC 0.000121645394318 0.000120832706514 +TGAATG 0.0 0.000386664660844 +AAGTAT 0.000352459732255 0.000411435365679 +CGACTG 0.000417961098427 0.000148624229012 +TTGCCA 0.000335304612544 0.000370352245464 +TTGTAT 0.00013256228868 0.000544351342844 +ATCGTG 0.000495938915297 0.00017641575151 +GCTTCG 0.000188706316827 0.000145603411349 +ATCGTC 0.000293196591434 0.000160103336131 +ATCGTA 0.000129443176005 0.000180644896238 +TGGGAG 0.00036181707028 0.000143790920751 +TGGGAA 0.000176229866128 0.000315977527533 +TGGGAC 0.000221456999913 0.000111770253525 +GCTTCT 0.000137240957692 0.000177019915043 +ATCGTT 0.000212099661888 0.000256769501342 +GATTGA 1.09168943619e-05 0.000257373664874 +TGGGAT 0.000227695225262 0.000155874191403 +GGGCTA 5.45844718095e-05 7.79370957013e-05 +CTAGCC 0.000184027647815 0.000113582744123 +CTAGCA 7.01800351836e-05 0.000175811587977 +CTAGCG 9.20138239074e-05 7.5520441571e-05 +CGGTAG 1.55955633741e-06 9.2437020483e-05 +ACTTAG 4.67866901224e-06 0.000230186305909 +ACTTAA 6.23822534965e-06 0.000557642940561 +CGGTAC 0.000113847612631 7.12912968431e-05 +ACTTAC 0.000116966725306 0.000222936343518 +CGGTAA 1.55955633741e-06 0.000151645046675 +GACCAG 0.000559880725131 0.000152249210207 +GACCAC 0.000224576112587 0.000164332480859 +TTGCCC 0.000389889084353 0.000183665713901 +CTAGCT 0.000101371161932 0.000158895009065 +TTCGAG 0.000965365372859 0.000289998495633 +ACTTAT 7.48587041958e-05 0.000326852471119 +CGGTAT 7.79778168707e-05 0.000119020215916 +TACACA 0.000198063654851 0.000405393730353 +TACACG 0.000310351711145 0.000137749285426 +GCGAGA 5.77035844843e-05 0.000194540657487 +GAGAGC 0.000712717246198 0.000230790469441 +GCGAGC 0.000157515190079 0.000147415901947 +GGCCGT 0.000116966725306 8.69995486898e-05 +GCGAGG 6.55013661714e-05 0.000111166089993 +TTAAGA 5.45844718095e-05 0.000356456484215 +CAGCCG 0.000778218612369 0.000196957311617 +CAGCCA 0.000625382091303 0.000364310610139 +CAGCCC 0.000491260246285 0.000151040883142 +TCCGGG 0.000124764506993 9.54578381458e-05 +GCGAGT 0.000109168943619 0.000161311663196 +CAGCCT 0.000207420992876 0.000138957612491 +GAACTC 0.000255767239336 0.000184269877433 +GAACTA 0.000263565021023 0.000236832104767 +GGACTT 0.000182468091477 0.000163728317326 +CACCTA 8.73351548951e-05 0.000154665864337 +AACTAT 0.000411722873077 0.000425935290461 +CACCTC 0.000140360070367 0.000193936493954 +CACCTG 0.000581714513855 0.000168561625587 +TGATCC 0.0 0.000160707499663 +AACACT 0.000257326795673 0.000288790168568 +TGATCA 0.0 0.000201790619878 +TGATCG 0.0 0.000138957612491 +TGCATG 0.000282279697072 0.000209040582269 +AACTAG 1.24764506993e-05 0.000226561324713 +GATTTA 0.000168432084441 0.00035524815715 +CACCTT 9.20138239074e-05 0.000158290845533 +AACTAC 0.000739229703934 0.000341352395901 +AACTAA 2.33933450612e-05 0.000611413494959 +AACACG 0.00040392509139 0.000194540657487 +AACACA 0.000293196591434 0.000509309857955 +AACACC 0.000616024753278 0.000362498119541 +TGATCT 0.0 0.000184269877433 +CCAGCA 0.000276041471722 0.000351019012422 +CTATAC 0.000106049830944 0.00017399909738 +CAACAG 0.00135525445721 0.000381831352583 +GACAGT 0.000321268605507 0.000164332480859 +CAACAA 0.000636298985665 0.00095034923673 +CCAGCC 0.000324387718182 0.000218103035257 +CAACAC 0.00027760102806 0.000317790018131 +GGCCGG 0.000102930718269 6.82704791802e-05 +CTATAA 1.24764506993e-05 0.000294227640361 +GACAGG 0.000176229866128 0.000101499473471 +CAACAT 0.00026512457736 0.000409622875081 +GACAGC 0.000611346084266 0.000192124003357 +GACAGA 0.000116966725306 0.000224144670583 +GTATTA 5.14653591346e-05 0.000347394031227 +GTATTC 0.000179348978803 0.000220519689387 +GTAAGT 4.99058027972e-05 0.000227165488246 +CCAGCG 0.000343102394231 0.000170978279717 +AGAGGT 5.14653591346e-05 0.000161311663196 +GGTGGC 0.000948210253147 0.000175811587977 +TCCCCT 6.70609225088e-05 0.000118416052383 +GGTGGA 0.000577035844843 0.000193936493954 +CGTCAT 0.00012632406333 0.000135936794828 +GGTGGG 0.000123204950656 0.000114186907655 +AGAGGC 6.70609225088e-05 0.000143790920751 +GTAAGA 2.49529013986e-05 0.000191519839824 +AGAGGA 0.000104490274607 0.000257977828407 +CGCCCT 4.5227133785e-05 0.000102103637004 +AGAGGG 1.71551197115e-05 0.000153457537272 +GTAAGG 5.3024915472e-05 0.000102103637004 +CGTCAA 0.000184027647815 0.000145603411349 +CGTCAC 0.000185587204152 0.000106936945265 +TCCCCC 0.000127883619668 0.000105728618199 +GGTGGT 0.000587952739205 0.000180644896238 +CGTCAG 0.000378972189991 8.88120392875e-05 +GTCCGG 6.39418098339e-05 9.18328569504e-05 +GTCGCA 7.64182605332e-05 0.000153457537272 +GTCGCC 0.000279160584397 0.000132311813632 +CCGTCT 9.20138239074e-05 9.00203663527e-05 +TCGGTA 0.000106049830944 0.000111166089993 +CTCGCG 3.89889084353e-05 6.76663156477e-05 +TCGGTG 0.0006752878941 0.000156478354935 +GTCGCT 9.51329365822e-05 0.000154665864337 +TACCTA 0.000127883619668 0.000169165789119 +GTAACT 0.000102930718269 0.00019997812928 +CCGTCG 0.000255767239336 0.000120228542981 +CCGTCA 9.98116055944e-05 9.2437020483e-05 +CCGTCC 0.00013880051403 8.8207875755e-05 +GGCTGT 0.00017467030979 0.000126874341839 +CGCGTG 0.000282279697072 0.00017883240564 +CTATAT 0.000101371161932 0.000354643993618 +CGCGTC 0.00027760102806 0.000125061851242 +CGCGTA 7.1739591521e-05 9.90828193412e-05 +GTTTTA 0.000107609387282 0.000610205167894 +ATGCAG 0.000888947112325 0.000225352997648 +CCACTA 9.66924929196e-05 0.000146207574882 +CCACTC 0.000151276964729 0.000166749134989 +CGCGTT 0.000159074746416 0.000124457687709 +GGCTGG 0.000279160584397 0.000131103486567 +GGCTGA 1.55955633741e-06 0.00013412430423 +GGCTGC 0.000377412633654 0.000185478204498 +ATGCAT 0.00018714676049 0.000316581691066 +TTTAGA 6.23822534965e-05 0.000343164886499 +TTTCAT 9.35733802448e-05 0.000489976624913 +TTGACC 0.00027136280271 0.000160707499663 +ATGACG 0.000296315704108 0.000135332631295 +GGAGTT 0.000252648126661 0.000172790770315 +ATGACA 0.000205861436539 0.000218103035257 +ATGACC 0.000489700689948 0.000134728467763 +CGGGCC 0.000254207682998 7.43121145059e-05 +GGGCCT 2.02742323864e-05 0.000104520291134 +TAAGAA 0.0 0.000431372762254 +ATGACT 0.000171551197115 0.000187290695096 +GGAGTA 0.000116966725306 0.000124457687709 +GGAGTC 0.000205861436539 0.000141978430154 +GGAGTG 0.000460069119537 0.000186686531564 +GGGCCG 8.42160422203e-05 6.34371709197e-05 +GGGCCC 9.045426757e-05 6.1624680322e-05 +GGGCCA 0.000101371161932 0.000131103486567 +GGCAAG 0.00140983892902 0.000208436418736 +ACGAGT 8.10969295455e-05 0.000181249059771 +AGGCCA 9.35733802448e-05 0.000199373965748 +AGGCCC 0.000110728499956 9.54578381458e-05 +AGGCCG 6.70609225088e-05 6.64579885825e-05 +ACGAGG 7.01800351836e-05 0.000147415901947 +ACGAGC 0.000193384985839 0.000151040883142 +GACCCG 0.000104490274607 8.03537498316e-05 +ACGAGA 6.39418098339e-05 0.00019514482102 +GGCAAC 0.000949769809485 0.00028456102384 +AGGCCT 4.05484647727e-05 9.48536746132e-05 +AGACGG 2.49529013986e-05 0.000123249360644 +GGGTTG 4.21080211102e-05 0.000105124454667 +TTTGGG 9.66924929196e-05 0.000247707048353 +GGGTTA 2.18337887238e-05 9.18328569504e-05 +GGGTTC 6.70609225088e-05 0.000198165638682 +TTTGAT 0.000382091302666 0.000532268072193 +TGAACT 0.0 0.000320810835794 +CTACGA 8.26564858829e-05 0.000169769952652 +ATTGCT 0.000269803246372 0.000293623476828 +GGGTTT 5.45844718095e-05 0.000160707499663 +TACAAG 0.000943531584135 0.000220519689387 +TGCGAG 0.000603548302579 0.000192728166889 +AACGGC 0.000559880725131 0.000207832255204 +ATCCAT 0.000213659218226 0.000274894407319 +ATTGCA 0.000221456999913 0.000425331126928 +TTTGGC 0.000583274070193 0.000406602057418 +ATTGCG 0.000244850344974 0.000167957462054 +TTTGAC 0.000258886352011 0.000275498570851 +TTTGAA 0.000288517922421 0.000664579885825 +CGGGCT 0.000124764506993 9.54578381458e-05 +CTCGTA 4.36675774476e-05 0.000103916127602 +CTCGTC 0.000201182767526 0.000257977828407 +CCCTGG 0.000179348978803 0.000122041033579 +CCCTGA 1.55955633741e-06 9.66661652109e-05 +AAGCTT 0.00027136280271 0.000274894407319 +CCCTGC 0.000255767239336 0.00012929099597 +GTGTAA 6.23822534965e-06 0.000309331728675 +CCCTGT 9.045426757e-05 9.48536746132e-05 +AAGCTC 0.000444473556163 0.000191519839824 +GATATA 0.000305673042133 0.000453726812959 +AAGCTA 0.000371174408304 0.000291810986231 +GATATG 0.000393008197028 0.000213269726997 +AAGCTG 0.00160166435852 0.000292415149763 +TGACGC 0.0 0.000114791071188 +TACTCT 0.000115407168969 0.000179436569173 +CATCGT 0.000152836521066 0.000163124153793 +GTAGTC 0.000102930718269 0.00011056192646 +CAGGGT 0.000324387718182 8.94162028201e-05 +CCAGTC 0.000148157852054 0.000143790920751 +TACGAT 0.000664370999738 0.000201790619878 +CCAGTG 0.000442913999825 0.000224748834115 +TACTCG 0.000282279697072 0.000111166089993 +CATCGA 0.000137240957692 0.000235019614169 +CATCGC 0.00036181707028 0.000161915826728 +TACTCC 0.000419520654764 0.000135936794828 +TACTCA 0.000137240957692 0.000192728166889 +CATCGG 0.000155955633741 0.000210853072866 +CAGGGG 7.79778168707e-05 8.33745674944e-05 +CCAGTT 0.000218337887238 0.00024226957656 +CAGGGC 0.000642537211014 8.69995486898e-05 +AATCTG 0.00089830445035 0.000248915375418 +CGGACT 5.3024915472e-05 0.000106936945265 +AATCTC 0.000363376626617 0.000216894708192 +AATCTA 0.000257326795673 0.000347394031227 +TGAACG 0.0 0.000172790770315 +TCACAC 8.26564858829e-05 0.000234415450637 +TCACAA 0.000135681401355 0.000303290093349 +TAAATT 0.0 0.00112434833411 +GCCATA 0.000310351711145 0.000189103185694 +CGGACG 7.79778168707e-05 0.00011297858059 +ACATCG 0.000311911267483 0.00022354050705 +CGGACA 7.01800351836e-05 0.000163728317326 +CGGACC 7.32991478584e-05 9.12286934178e-05 +GCCATG 0.000949769809485 0.000163124153793 +TTTTAA 2.02742323864e-05 0.00124034773236 +TCACAT 0.000116966725306 0.000260394482537 +ATTAAC 0.000263565021023 0.000395727113832 +ATTAAA 0.000257326795673 0.00114791071188 +ATTAAG 0.000506855809659 0.000415664510407 +TTTTAC 0.00018714676049 0.000455539303557 +TCACTT 8.73351548951e-05 0.000248915375418 +ATTCGT 0.000169991640778 0.000213269726997 +GTGCAG 0.000871791992614 0.000234415450637 +TTACTA 5.3024915472e-05 0.000247707048353 +ATTAAT 0.00027760102806 0.000810183297174 +TACCTG 0.00054272560542 0.000126874341839 +GGTATG 0.000110728499956 0.00011056192646 +CTGAAT 0.000552082943444 0.000352227339487 +GGTATC 0.00013256228868 8.8207875755e-05 +CGAGAG 0.000241731232299 0.000198769802215 +CTGAAA 0.000422639767439 0.000391497969104 +CTGAAC 0.000795373732081 0.000195748984552 +TCGACC 0.000194944542177 0.000114186907655 +GGAACC 0.000324387718182 0.000135936794828 +CTGAAG 0.00135525445721 0.000253144520146 +CATTGC 0.000101371161932 0.000245290394223 +TATGAG 0.00030723259847 0.000206623928138 +CATTGA 6.23822534965e-06 0.000276102734384 +CTAGTT 6.08226971591e-05 0.000202998946943 +CATTGG 7.64182605332e-05 0.000204811437541 +TCAACT 0.000109168943619 0.000300269275686 +ATGTTA 6.08226971591e-05 0.000363102283074 +ACACCG 0.00031970904917 0.00020239478341 +CTGCCA 0.000509974922334 0.00026583195433 +CTGCCC 0.00120241793615 0.000188499022161 +ACACCC 0.000262005464685 0.00019514482102 +ACACCA 0.000276041471722 0.000283352696775 +CTGCCG 0.000709598133523 0.00017399909738 +CTAGTG 0.000151276964729 0.000146207574882 +TCAACC 0.000127883619668 0.000201186456345 +ATGTTC 0.000456950006862 0.000250123702483 +CTAGTC 0.000112288056294 0.000107541108797 +TCAACG 0.00018090853514 0.000221728016453 +CTAGTA 3.89889084353e-05 0.00010814527233 +CATTGT 7.79778168707e-05 0.000287581841503 +TATGAC 0.000237052563287 0.000122041033579 +GTAATG 0.000182468091477 0.000198165638682 +ACACCT 0.000118526281643 0.000221728016453 +CTGCCT 0.000219897443575 0.000138353448958 +CCTCGT 7.64182605332e-05 0.000180040732705 +TTGTGT 6.55013661714e-05 0.000444060196438 +AGACTG 0.000131002732343 0.000120228542981 +AGACTC 4.83462464598e-05 0.000117207725318 +AGACTA 3.74293520979e-05 0.000140165939556 +CCTCGG 4.67866901224e-05 0.000121436870046 +CCTCGC 0.000118526281643 0.000118416052383 +CCCGAC 0.000371174408304 9.12286934178e-05 +CCTCGA 6.86204788462e-05 0.000134728467763 +CGAGGA 0.000160634302754 0.000215686381127 +AGACTT 4.21080211102e-05 0.000222332179985 +CGAGGG 2.33933450612e-05 8.88120392875e-05 +CACCGT 7.1739591521e-05 0.000112374417058 +ACTTTA 7.64182605332e-05 0.000409622875081 +ACTTTG 0.000276041471722 0.00035041484889 +TAGCTA 0.0 0.000213269726997 +GACCTG 0.000587952739205 0.000100291146406 +GTCGCG 4.05484647727e-05 0.000109957762927 +GACCTA 0.000131002732343 0.000100291146406 +GACCTC 0.000202742323864 0.000132311813632 +CACCGG 8.26564858829e-05 8.88120392875e-05 +CTATTG 0.000151276964729 0.000199373965748 +CACCGA 9.51329365822e-05 0.00024710288482 +ACTTTT 0.000120085837981 0.000682704791802 +GACCTT 0.000131002732343 0.000122645197111 +GGACGC 0.000354019288593 8.51870580921e-05 +GGACGG 0.000109168943619 8.45828945596e-05 +CTATTA 3.11911267483e-05 0.000254957010744 +CACAGC 0.000411722873077 0.000261602809602 +CTTATC 0.000166872528103 0.000207832255204 +CACAGA 7.48587041958e-05 0.000288186005035 +CTTATA 0.000113847612631 0.000297248458024 +CACAGG 8.57755985577e-05 0.000132915977165 +CTTATG 0.000157515190079 0.000164936644391 +ACTCAT 0.000109168943619 0.000206019764606 +CGTATA 2.80720140734e-05 0.00017641575151 +GACGTA 0.000113847612631 8.45828945596e-05 +CGTATC 0.000115407168969 9.2437020483e-05 +CTATTC 0.000168432084441 0.000178228242108 +GAGGAC 0.00144882783746 0.000138353448958 +CGTATG 8.88947112325e-05 0.000163728317326 +AGCGAT 0.000631620316652 0.000216894708192 +TTAACA 7.32991478584e-05 0.000450705995296 +ACTCAC 9.66924929196e-05 0.000184269877433 +TTAACC 0.000131002732343 0.000244082067158 +ACTCAA 0.000146598295717 0.000338331578238 +ACTCAG 0.000255767239336 0.00015527002787 +CTTATT 0.000140360070367 0.000346789867694 +CACAGT 0.0002292547816 0.000195748984552 +AGCGAC 0.000517772704021 0.000167957462054 +TAAACA 0.0 0.000671829848216 +AGCGAA 0.000502177140647 0.000384248006713 +GAGGAT 0.00166560616836 0.000177019915043 +AGCGAG 0.000731431922247 0.000208436418736 +CGTATT 8.88947112325e-05 0.000209644745801 +TAAACG 0.0 0.000308727565142 +AGTTCC 0.000325947274519 0.000222332179985 +GTATGT 2.33933450612e-05 0.000377602207855 +AGTTCA 0.000146598295717 0.000262206973135 +TACTTA 5.14653591346e-05 0.000415664510407 +AGTTCG 0.000308792154808 0.00022354050705 +TAGCCC 0.0 0.000111770253525 +CTGTGA 9.35733802448e-06 0.000185478204498 +CGGGGT 7.1739591521e-05 7.12912968431e-05 +TAGGAC 0.0 0.000102103637004 +GCAAAG 0.000280720140734 0.000361893956008 +TAGGAA 0.0 0.000236832104767 +GCAAAA 0.000185587204152 0.000648267470446 +TAGGAG 0.0 0.000121436870046 +GCAAAC 0.000188706316827 0.000436206070514 +CGGGGC 0.000127883619668 8.15620768967e-05 +CGGGGA 9.51329365822e-05 0.000106332781732 +CTGTGT 0.000118526281643 0.000200582292813 +CGGGGG 2.18337887238e-05 4.8937246138e-05 +GTATGC 6.86204788462e-05 0.000146207574882 +AGTTCT 0.000123204950656 0.000248915375418 +GCAAAT 0.000219897443575 0.000518372310944 +TAGGAT 0.0 0.000134728467763 +TAATTA 0.0 0.00079991251712 +CTATTT 0.00012632406333 0.000436810234047 +TGCTTT 0.00018090853514 0.000376998044323 +GTCTAT 0.000198063654851 0.000153457537272 +GTAATT 0.000121645394318 0.000479101681327 +GTTACC 0.00022301655625 0.000114791071188 +GTTACA 0.000113847612631 0.000214478054062 +TAATTC 0.0 0.000383039679648 +GTTACG 0.000120085837981 0.000111770253525 +TATCGC 0.000338423725219 0.000172186606782 +TATAGT 4.83462464598e-05 0.00032926912525 +GTCTAA 9.35733802448e-06 0.000194540657487 +AGGACG 9.045426757e-05 0.000120228542981 +GTCTAC 0.000491260246285 0.000116603561786 +TGCTTC 0.000341542837893 0.000188499022161 +TTCAAA 0.000290077478759 0.000845828945596 +GGCGTT 0.000329066387194 0.000144395084284 +GGGCTC 6.70609225088e-05 8.69995486898e-05 +TATAGC 0.000107609387282 0.000204207274008 +TATCGT 0.000107609387282 0.000163124153793 +TATAGA 5.3024915472e-05 0.000367331427802 +AGTGGG 0.000135681401355 0.000184269877433 +TATAGG 6.86204788462e-05 0.000192124003357 +GTGGCG 0.000486581577273 0.00017883240564 +GTTACT 0.000120085837981 0.000157686682 +GCTCTC 0.00018090853514 0.000202998946943 +AACCTT 0.000165312971766 0.000201790619878 +GATGGC 0.00098875871792 0.000179436569173 +GCTCTG 0.000608226971591 0.00017641575151 +CCACGT 0.000176229866128 0.000121436870046 +CAATAT 0.0002292547816 0.000462789265947 +CAGGAG 0.00169367818243 0.000210248909334 +GAATGG 0.000204301880201 0.00021870719879 +GAATGA 6.23822534965e-06 0.000264623627265 +ACTACT 0.000112288056294 0.000261602809602 +GAATGC 0.000218337887238 0.000245894557755 +CCACGA 0.000109168943619 0.000126874341839 +CAATAC 0.000169991640778 0.00024468623069 +CCACGC 0.000182468091477 0.000237436268299 +CAATAA 7.79778168707e-06 0.000653704942239 +GTTCCG 0.000198063654851 0.000119624379449 +CAATAG 1.40360070367e-05 0.000230186305909 +CCACGG 8.10969295455e-05 0.000141374266621 +AACCTA 0.000160634302754 0.000213873890529 +GAATGT 8.73351548951e-05 0.000280936042644 +ACTACC 0.000154396077404 0.000146811738414 +GGCTTC 0.00063317987299 0.000151040883142 +ACTACA 0.000134121845018 0.000262811136667 +ACTACG 9.8252049257e-05 0.000130499323035 +GGCTTG 0.000311911267483 0.000169165789119 +CGTTGA 3.11911267483e-06 0.000186082368031 +AGTCAG 0.000302553929458 0.000162519990261 +CGTTGC 0.000124764506993 0.000177019915043 +TAACAC 0.0 0.000201186456345 +TCCTAC 0.00039768686604 0.000108749435862 +AGTCAC 0.000149717408392 0.000160103336131 +CGTTGG 9.35733802448e-05 0.000145603411349 +AGTCAA 0.000171551197115 0.000315977527533 +TCATCG 0.000344661950568 0.000182457386836 +TGAGGG 0.0 0.000122041033579 +TCATCC 0.000258886352011 0.00017399909738 +GAGCGG 0.000499058027972 0.000166144971456 +CTTTCT 7.32991478584e-05 0.000267644444928 +AGTCAT 0.000131002732343 0.000206623928138 +TCCTAT 0.000232373894275 0.000166749134989 +TAACAT 0.0 0.000335914924108 +CGTTGT 5.61440281469e-05 0.000175207424445 +CTTTCC 0.00022301655625 0.000210853072866 +CTTTCA 0.000110728499956 0.000303894256882 +CTTTCG 0.000191825429502 0.000238040431832 +GAGCGC 0.000912340457387 0.000160707499663 +GTTGAT 0.000213659218226 0.000266436117863 +CGACCA 0.000152836521066 0.000229582142376 +CGACCC 0.000235493006949 9.66661652109e-05 +GCATAC 0.000102930718269 0.000203603110476 +CGACCG 0.000149717408392 8.15620768967e-05 +CCTTAT 6.39418098339e-05 0.000140165939556 +GTTGAC 0.00018090853514 0.000175811587977 +GTTGAA 0.0002167783309 0.000334706597043 +ACTGGG 9.66924929196e-05 0.000109957762927 +GTTGAG 0.000329066387194 0.000209040582269 +TTAGGG 1.71551197115e-05 0.000118416052383 +CCTTAC 7.1739591521e-05 9.48536746132e-05 +TTCGAT 0.000776659056032 0.000386060497311 +CCTTAA 4.67866901224e-06 0.000228977978843 +CCTTAG 6.23822534965e-06 9.66661652109e-05 +CGACCT 6.86204788462e-05 0.000131103486567 +CAACTC 0.000179348978803 0.000231998796506 +CAACTA 0.000171551197115 0.000311144219273 +CAACTG 0.000709598133523 0.000323831653457 +AATAGG 8.26564858829e-05 0.000218103035257 +GATGGT 0.000461628675874 0.000135936794828 +GGGGTT 4.21080211102e-05 8.94162028201e-05 +GGATGT 0.000102930718269 0.00015043671961 +AATAGT 0.00018090853514 0.000335914924108 +GGGCTT 4.83462464598e-05 0.0001317076501 +CAACTT 0.000149717408392 0.000435601906982 +GGATGC 0.000198063654851 0.000132915977165 +GGATGA 4.67866901224e-06 0.000183061550368 +GGATGG 0.000148157852054 0.000142582593686 +GGGGTA 3.74293520979e-05 7.43121145059e-05 +TCTCCG 0.000157515190079 0.000140770103088 +GAGTCC 0.000589512295542 0.000146207574882 +AAACTG 0.000706479020848 0.000477289190729 +TCTCCC 0.000127883619668 0.000125666014774 +AAACTA 0.000221456999913 0.000680288137672 +TCTCCA 0.000149717408392 0.000201790619878 +AAACTC 0.000212099661888 0.000326248307587 +ACGCGA 0.000148157852054 0.000125061851242 +AGTACG 7.79778168707e-05 0.000105728618199 +ACGCGC 0.000226135668925 9.30411840155e-05 +AGTACC 0.000129443176005 0.000116603561786 +ACGCGG 0.000149717408392 9.84786558086e-05 +AGTACA 8.10969295455e-05 0.000220519689387 +AAACTT 0.000182468091477 0.000566101230017 +CCCTTC 0.00045227133785 0.000128686832437 +TCTCCT 7.64182605332e-05 0.000168561625587 +CCCTTG 0.000212099661888 9.66661652109e-05 +TTCTAT 0.00031970904917 0.000352227339487 +AGTACT 8.26564858829e-05 0.000227165488246 +ACGCGT 0.000123204950656 0.000107541108797 +GCCGAA 0.000616024753278 0.000216894708192 +GTGGAG 0.00167496350638 0.00019514482102 +GCCGAC 0.000544285161757 0.000175207424445 +GTGGAC 0.00108233209816 0.000138353448958 +GCCGAG 0.00122581128121 0.000169165789119 +GTGGAA 0.000639418098339 0.000301477602752 +TATCAT 0.000134121845018 0.000242873740092 +GTGGAT 0.000862434654589 0.000175811587977 +GCCATC 0.00100747339397 0.000232602960039 +GCCGAT 0.000754825267308 0.000147415901947 +CCTATT 0.000109168943619 0.000204811437541 +ACCCGG 0.000101371161932 0.000100895309939 +ACCGAG 0.000670609225088 0.000145603411349 +AAATCG 0.000380531746329 0.000555830449963 +CACGCG 0.000106049830944 0.000111166089993 +TTATCG 9.35733802448e-05 0.000210853072866 +ACCGAC 0.000383650859004 0.000128082668904 +AAATCC 0.000396127309703 0.000430164435189 +ACCGAA 0.000455390450525 0.000363706446606 +AAATCA 0.000226135668925 0.000822266567826 +GGCTTT 0.000304113485796 0.000210248909334 +CTTTGA 9.35733802448e-06 0.000321414999326 +AGATAC 6.86204788462e-05 0.000277311061449 +TCCCGA 0.000104490274607 0.000153457537272 +CACGCT 8.88947112325e-05 0.000175811587977 +AAATCT 0.00018714676049 0.00050568487676 +ACCGAT 0.000524010929371 0.000192124003357 +GTAGTT 8.57755985577e-05 0.000226561324713 +ACCAGT 0.000393008197028 0.000173394933847 +GATGAT 0.000966924929196 0.000287581841503 +GGAGCA 0.000477224239248 0.000253748683679 +GGAGCC 0.000665930556075 0.000138353448958 +CCGGCT 0.000268243690035 0.000123853524177 +CCAAAT 0.000268243690035 0.000522601455672 +GGAGCG 0.000327506830857 0.000174603260912 +AAAACT 0.000213659218226 0.000869391323366 +GATGAC 0.000918578682736 0.000207832255204 +TGTCCC 0.000215218774563 0.000137749285426 +GATGAA 0.000751706154633 0.000294831803893 +GATGAG 0.00144726828112 0.000206623928138 +AAAACC 0.000374293520979 0.000625913419741 +AAAACA 0.00022301655625 0.00125182683948 +AAAACG 0.000300994373121 0.000647059143381 +GGAGCT 0.0004460331125 0.000173394933847 +CCAAAG 0.000472545570236 0.000372768899595 +TCGCGG 0.000129443176005 9.2437020483e-05 +CTAGAG 0.000279160584397 0.000120832706514 +TCGCGC 0.000304113485796 0.000108749435862 +CTACCG 9.20138239074e-05 9.72703287435e-05 +GCCACA 0.000525570485708 0.000267644444928 +AGATCA 7.32991478584e-05 0.000231998796506 +GCCACC 0.00136617135157 0.000203603110476 +AGAAAT 0.000188706316827 0.000686329772998 +TCCCGG 0.000141919626705 0.000151645046675 +GCGCTT 9.8252049257e-05 0.000158895009065 +GCCACG 0.000566118950481 0.000160707499663 +AGATCG 0.000106049830944 0.000217498871725 +ACCAGC 0.000695562126486 0.000249519538951 +TCGCGT 0.000160634302754 0.000158290845533 +GTGCAA 0.000360257513942 0.000291206822698 +TCCCAT 0.000213659218226 0.000266436117863 +ACCCGT 0.000118526281643 9.90828193412e-05 +AGAAAG 0.000207420992876 0.000378206371388 +CTAGAC 0.000151276964729 9.36453475481e-05 +AGAAAC 0.000194944542177 0.000466414247143 +ATGTCC 0.000559880725131 0.000145603411349 +AGAAAA 0.000118526281643 0.00101076558999 +CTACCC 0.000179348978803 9.00203663527e-05 +ATGTAC 0.000464747788549 0.00028456102384 +GACCGA 0.000102930718269 0.000139561776023 +ATGTAA 7.79778168707e-06 0.00046581008361 +TATGTG 0.000538046936408 0.000367331427802 +ATGTAG 1.40360070367e-05 0.000212061399931 +CCTATA 5.77035844843e-05 0.000161311663196 +CGAAGT 9.8252049257e-05 0.000192728166889 +CCGGCA 0.000272922359047 0.000154665864337 +GACGAC 0.000917019126399 0.000186082368031 +CCTATC 5.45844718095e-05 0.000123249360644 +ATGTAT 0.00018090853514 0.00064222583512 +ATTAGA 5.14653591346e-05 0.000269456935525 +GGCGTC 0.000413282429414 9.42495110807e-05 +GACCGG 0.000101371161932 7.12912968431e-05 +AGCACT 0.000285398809747 0.000256165337809 +TTAGAA 7.32991478584e-05 0.000328664961717 +CGGCTG 0.000316589936495 0.000139561776023 +TTAGAC 7.64182605332e-05 0.000123249360644 +CGGCTC 0.000135681401355 0.000135936794828 +TTAGAG 8.57755985577e-05 0.000189707349226 +CGGCTA 7.64182605332e-05 0.000125061851242 +AGCACC 0.000690883457474 0.000186686531564 +AGCACA 0.00030723259847 0.000278519388514 +AGCACG 0.000268243690035 0.000133520140698 +GGTCGG 7.79778168707e-05 8.57912216247e-05 +CGGCTT 4.5227133785e-05 0.000144395084284 +TTAGAT 0.000140360070367 0.000361289792476 +AAAGAC 0.000210540105551 0.000299665112154 +CGAAGC 8.73351548951e-05 0.000192124003357 +AAAGAA 0.000299434816783 0.000970286633305 +AAAGAG 0.000435116218138 0.000412643692744 +ATATCA 0.000120085837981 0.000372164736062 +GCGCTG 0.000545844718095 0.000160707499663 +TCGCTA 0.000157515190079 0.000126270178307 +TGTGAG 0.000159074746416 0.000194540657487 +TGTGAA 0.000123204950656 0.000389081314974 +CTACCT 5.14653591346e-05 0.000122041033579 +TGTGAC 0.000129443176005 0.000164936644391 +ATATCT 8.73351548951e-05 0.000380018861985 +CCAGCT 0.000375853077317 0.000301477602752 +AAAGAT 0.000318149492832 0.000447081014101 +TGTGAT 0.000140360070367 0.000298456785089 +TTATTG 7.48587041958e-05 0.000503872386162 +TTATTA 5.14653591346e-05 0.000810183297174 +GAAAGC 0.000335304612544 0.000347394031227 +TTATTC 7.01800351836e-05 0.000359477301878 +ACATCT 0.000148157852054 0.000218103035257 +TTTTAT 0.000116966725306 0.00127538921725 +CTTACT 7.79778168707e-05 0.00015285337374 +AGACGC 9.045426757e-05 0.000109353599395 +TACAAA 0.000338423725219 0.000678475647074 +AGACGA 6.86204788462e-05 0.000216894708192 +AAGTGA 2.96315704108e-05 0.000433789416384 +CCTCTG 0.000263565021023 0.000166144971456 +AGGTGG 2.80720140734e-05 0.000151040883142 +CCTCTC 5.45844718095e-05 0.000164332480859 +AGGTGC 5.92631408217e-05 0.000121436870046 +CCTCTA 7.64182605332e-05 0.000115999398253 +AGGTGA 4.67866901224e-06 0.000157686682 +CTTACG 9.20138239074e-05 0.000106936945265 +TCCACA 0.000349340619581 0.000338935741771 +TTTTAG 7.79778168707e-06 0.000562476248821 +AGACGT 7.79778168707e-05 0.000111770253525 +CTTACC 0.000185587204152 0.000105728618199 +ACATCA 0.000204301880201 0.000313560873403 +CTTACA 7.1739591521e-05 0.000238040431832 +ACATCC 0.000268243690035 0.000188499022161 +AGGTGT 2.18337887238e-05 0.000129895159502 +CCTCTT 7.01800351836e-05 0.000185478204498 +GCGTGA 6.23822534965e-06 0.0001317076501 +CCCGGC 0.00045227133785 0.000120228542981 +CCCGGA 0.00031970904917 0.000114186907655 +CCCGGG 8.57755985577e-05 0.000126270178307 +TATGAA 0.000205861436539 0.000457955957687 +TGGAAT 0.000177789422465 0.000303290093349 +TAGGTG 0.0 0.000112374417058 +TTGTGC 0.000160634302754 0.000296040130958 +TATGAT 0.000226135668925 0.000375789717258 +TTGTGG 0.000141919626705 0.000251332029548 +CCCGGT 0.000198063654851 8.88120392875e-05 +GGCGTG 0.000541166049082 0.000141374266621 +TAGGTA 0.0 0.000212665563464 +GGTTTT 0.000116966725306 0.000378206371388 +TATCAC 0.000182468091477 0.000166144971456 +TGGAAA 0.000141919626705 0.000497226587304 +TGAGGA 0.0 0.000162519990261 +AAGAGG 0.000238612119624 0.000209040582269 +TGAGGC 0.0 0.000113582744123 +AAGAGA 0.000176229866128 0.000355852320683 +AAGAGC 0.000728312809572 0.000268852771993 +AAGCGT 0.000388329528016 0.000199373965748 +GCTTAG 6.23822534965e-06 0.000160103336131 +GCTTAC 0.000127883619668 0.000138353448958 +GCTTAA 7.79778168707e-06 0.000325039980522 +TGATTC 0.0 0.000250727866016 +AAGCGC 0.000792254619406 0.000227769651778 +AAGAGT 0.000343102394231 0.000297852621556 +AAGCGA 0.000508415365997 0.00035283150302 +AAGCGG 0.000475664682911 0.00019997812928 +GCTTAT 0.00013256228868 0.000248311211886 +GTTCGG 6.39418098339e-05 0.000145603411349 +CTGCTA 0.000279160584397 0.000165540807924 +GCGTGT 4.5227133785e-05 0.000249519538951 +CTGCTC 0.00098875871792 0.000219915525855 +ACTTCT 0.000104490274607 0.000234415450637 +GACCGT 0.000121645394318 7.73329321687e-05 +CTGCTG 0.00183559780914 0.000269456935525 +CGGTCA 4.36675774476e-05 9.66661652109e-05 +GGTCGT 0.000201182767526 9.18328569504e-05 +CGGTCC 8.88947112325e-05 9.30411840155e-05 +CTAGAA 0.000149717408392 0.000227165488246 +CGGTCG 6.70609225088e-05 9.06245298853e-05 +ACTTCG 0.000185587204152 0.000207832255204 +GACCGC 0.000247969457649 0.000163124153793 +TTTTGG 0.000115407168969 0.000574559519472 +ACTTCC 0.000224576112587 0.000172186606782 +CTGCTT 0.000335304612544 0.000177019915043 +ACTTCA 0.000159074746416 0.000282144369709 +CTAGAT 0.000179348978803 0.000131103486567 +CACAAA 0.00030723259847 0.000608392677296 +CGGTCT 3.11911267483e-05 8.3978731027e-05 +GGTCGC 0.000430437549126 0.000105728618199 +GGTCGA 0.000124764506993 9.78744922761e-05 +CGTCGC 0.000378972189991 0.000138353448958 +CAGCAT 0.000589512295542 0.000258581991939 +TGAAGC 0.0 0.000235623777702 +CGTCGG 0.000109168943619 0.000187290695096 +TGGTTG 0.000129443176005 0.000299665112154 +GCGTGC 7.32991478584e-05 0.000132915977165 +TGCTCA 9.35733802448e-05 0.000181249059771 +TTCATC 0.00068152611945 0.000235623777702 +TGCTCC 0.00036181707028 0.000181249059771 +TTCATA 0.000198063654851 0.000391497969104 +TTCATG 0.00048814113361 0.000184269877433 +TGCTCG 0.000185587204152 0.000184874040966 +GTACAA 9.045426757e-05 0.000251332029548 +CAGCAG 0.00409383538571 0.000655517432837 +TACACT 0.000120085837981 0.000226561324713 +GCTTTG 0.000360257513942 0.000231394632974 +CAGCAC 0.000795373732081 0.00024468623069 +CAGCAA 0.0015174483163 0.000569122047679 +GGGAGT 2.96315704108e-05 0.00010814527233 +GTACAC 8.42160422203e-05 0.000151645046675 +TGATTT 0.0 0.000700225534247 +TTCATT 0.000352459732255 0.000499643241434 +GTAGCC 0.000215218774563 0.00012929099597 +GCTGCC 0.000870232436277 0.000244082067158 +GCTGCA 0.000399246422378 0.000289998495633 +AACTCT 0.000179348978803 0.000270061099058 +GCTGCG 0.00027136280271 0.00017641575151 +TGGCCA 0.000131002732343 0.000360685628943 +GTAGCA 8.57755985577e-05 0.000160103336131 +TCATCT 0.000110728499956 0.000201790619878 +TGCTCT 9.8252049257e-05 0.000235019614169 +AACTCA 0.000243290788636 0.000308727565142 +TGGCCT 3.74293520979e-05 0.000204207274008 +AACTCC 0.000564559394144 0.000179436569173 +GCTGCT 0.000619143865953 0.000298456785089 +AACTCG 0.000573916732168 0.000189103185694 +CTACAA 0.000101371161932 0.000285165187372 +GGATCC 0.000550523387107 0.000168561625587 +TGAAGA 0.0 0.000349810685357 +AATATT 0.00030723259847 0.00117026476258 +GTTCGA 9.51329365822e-05 0.000326248307587 +CAACCT 9.045426757e-05 0.000143790920751 +TGGCCG 0.000101371161932 0.000178228242108 +GGCACG 0.000276041471722 0.000102707800537 +GGAGGA 0.000683085675787 0.000280331879112 +GGCACA 0.000300994373121 0.000182457386836 +GACAAT 0.000609786527929 0.000245894557755 +GGCACC 0.000790695063068 0.000126270178307 +CAACCG 0.000285398809747 0.000165540807924 +ACGACC 0.000191825429502 0.00017883240564 +AATATG 0.000383650859004 0.00043983105171 +CAACCC 0.0002292547816 0.000167353298521 +TTAGGA 6.23822534965e-05 0.000192124003357 +AATATC 0.000354019288593 0.000433789416384 +GACAAA 0.000466307344887 0.000388477151441 +GGCACT 0.000350900175918 0.000149228392544 +GACAAC 0.0009045426757 0.000228373815311 +ACGACA 0.000212099661888 0.000221728016453 +GACAAG 0.0014254344924 0.000192728166889 +TGGCCC 9.35733802448e-05 0.000159499172598 +GCAATC 0.000121645394318 0.000256769501342 +GCAATA 8.42160422203e-05 0.000366123100736 +GAATTT 0.000244850344974 0.000586642790124 +TTGAAC 0.000336864168881 0.000334706597043 +CGATGG 6.55013661714e-05 0.000132311813632 +GTTCCC 0.000255767239336 0.00015285337374 +CGATGC 7.64182605332e-05 0.000162519990261 +GTAAAT 0.00017467030979 0.000548580487572 +CGATGA 3.11911267483e-06 0.000193332330422 +GAATTA 0.000109168943619 0.000363102283074 +GAATTC 0.000382091302666 0.000275498570851 +GAATTG 0.000407044204065 0.000331081615847 +GTAAAC 0.000193384985839 0.000264019463732 +CGATGT 5.3024915472e-05 0.000175811587977 +GTAAAA 0.000140360070367 0.000631350891534 +GTAAAG 0.000293196591434 0.000259186155472 +TCCCAG 0.000469426457561 0.000163728317326 +GGATTT 0.000291637035096 0.000280936042644 +GTGCGT 0.000266684133698 0.000216894708192 +ACGCCG 0.000414841985752 0.000133520140698 +GGATTC 0.000514653591346 0.000187290695096 +GGATTA 0.000106049830944 0.000184269877433 +GGATTG 0.000360257513942 0.000165540807924 +AGCGGC 0.000653454105376 0.000190311512759 +ACGCCA 0.000354019288593 0.000148020065479 +AAAGCC 0.000272922359047 0.000346789867694 +TCTGTG 0.0002292547816 0.000254957010744 +CTATCC 0.000219897443575 0.000131103486567 +GTCCCG 5.14653591346e-05 9.12286934178e-05 +TACAAT 0.000511534478672 0.000443456032905 +AACCGG 0.000149717408392 0.000113582744123 +CTCCTT 0.000123204950656 0.000138353448958 +GGCGGA 0.000782897281381 0.000169165789119 +AACCGC 0.000333745056206 0.000168561625587 +AACCGA 0.000173110753453 0.000297248458024 +GTGGGA 0.000372733964642 0.000131103486567 +TACCCA 0.000106049830944 0.000192124003357 +TCTGTT 0.00012632406333 0.000282748533242 +AAAGCG 0.00018714676049 0.000384248006713 +CTCCTG 0.000433556661801 0.000166144971456 +AACCGT 0.000145038739379 0.000140770103088 +CTCCTA 8.42160422203e-05 9.00203663527e-05 +CTCCTC 0.000199623211189 0.000228977978843 +GGCGGT 0.000665930556075 0.000123249360644 +AACAAG 0.00124452595726 0.000425331126928 +CCCACG 0.000477224239248 0.000149228392544 +AACAAC 0.00147845940787 0.000833141511412 +AACAAA 0.000483462464598 0.00135151382236 +TACTTC 0.000575476288505 0.000199373965748 +AGCCTT 0.000116966725306 0.000156478354935 +CCGATT 0.000179348978803 0.000207832255204 +CCCACT 0.000389889084353 0.000187894858629 +AACAAT 0.000892066225 0.00070807966017 +GACGCT 0.000148157852054 8.15620768967e-05 +AGCGGT 0.000288517922421 0.000149228392544 +CCGATA 6.70609225088e-05 0.000194540657487 +AGCCTG 0.000531808711058 0.000123249360644 +AGCCTA 0.000134121845018 0.000127478505372 +AGCCTC 0.000148157852054 0.000115999398253 +GTACCC 0.000140360070367 9.18328569504e-05 +CTCCCG 5.77035844843e-05 7.18954603756e-05 +TAGGCT 0.0 0.000105124454667 +ACGAAT 0.000237052563287 0.000297248458024 +CGCCTT 0.000145038739379 0.00015285337374 +AGGCAT 6.86204788462e-05 0.000178228242108 +CAGAGT 0.000339983281556 0.000171582443249 +GCATCT 0.000185587204152 0.000192124003357 +CTCCCA 5.3024915472e-05 0.000144395084284 +ACGAAA 0.00018090853514 0.000587246953656 +CGCCTC 0.000208980549213 0.000126874341839 +AGGCAA 0.000102930718269 0.000282748533242 +CGCCTA 0.000113847612631 9.78744922761e-05 +AGGCAG 0.000221456999913 0.000270061099058 +CGCCTG 0.000656573218051 0.000132311813632 +ACGAAG 0.000268243690035 0.000203603110476 +CAGAGG 0.000212099661888 0.000208436418736 +GCATCC 0.000383650859004 0.000174603260912 +GCATCA 0.000198063654851 0.000198165638682 +CAGAGC 0.000667490112413 0.000190311512759 +CGGCCA 0.000112288056294 0.00022112385292 +CAGAGA 0.000166872528103 0.000236227941234 +CGGCAC 0.00018714676049 0.000121436870046 +CCTAGA 4.67866901224e-05 0.000111166089993 +CCTAGC 7.64182605332e-05 9.72703287435e-05 +TTTGCT 0.000219897443575 0.000436810234047 +TAGCTC 0.0 0.000189103185694 +TGCGGT 0.000154396077404 0.000125666014774 +CCTAGG 2.49529013986e-05 5.07497367357e-05 +TAAGCA 0.0 0.000369748081932 +GTCTAG 1.8714676049e-05 0.000146811738414 +TGCGGC 0.000419520654764 0.000170374116184 +TGCGGA 0.000244850344974 0.000149832556077 +CCTAGT 5.92631408217e-05 8.8207875755e-05 +TGCGGG 6.23822534965e-05 9.66661652109e-05 +TAAGCG 0.0 0.000171582443249 +TGAGTT 0.0 0.0002634153002 +TAGCTG 0.0 0.000215686381127 +ATCGCT 0.000179348978803 0.000194540657487 +TCTCGT 7.95373732081e-05 0.000186082368031 +ATCGCC 0.000625382091303 0.000193936493954 +TGAGTG 0.0 0.000205415601073 +ATCGCA 0.000185587204152 0.000248915375418 +TGAGTA 0.0 0.000241665413027 +ATCGCG 0.000120085837981 0.000144999247816 +TGAGTC 0.0 0.000106332781732 +AAGGTG 0.00111508278125 0.000188499022161 +ACGGCT 0.000237052563287 0.00011297858059 +CATCAT 0.000366495739292 0.000288790168568 +AAGGTA 0.000205861436539 0.000149832556077 +TTTATT 0.000246409901311 0.00128143085258 +ACGGCA 0.000266684133698 0.000222332179985 +CATCAC 0.000363376626617 0.000204811437541 +ACGGCC 0.000561440281469 0.000105124454667 +CATCAA 0.000208980549213 0.000365518937204 +CATCAG 0.000556761612456 0.000231394632974 +ACGGCG 0.00040392509139 0.000117811888851 +TCACCT 9.98116055944e-05 0.000144999247816 +TCTAAG 0.000166872528103 0.000230186305909 +TCGTTT 0.000123204950656 0.000380018861985 +CGGAAT 0.000207420992876 0.000193936493954 +CTCATG 0.000269803246372 0.00010814527233 +TCACCG 0.000238612119624 0.000188499022161 +TCACCA 0.000199623211189 0.000224144670583 +TCACCC 0.0002167783309 0.000143790920751 +CGGAAG 0.000241731232299 0.000193936493954 +AATATA 0.000272922359047 0.00101982804298 +CGGCCT 4.67866901224e-05 8.15620768967e-05 +CGGAAC 0.000165312971766 0.000132915977165 +CGGAAA 0.000163753415428 0.000286373514437 +TAGCTT 0.0 0.000241061249495 +ATGGCG 0.000335304612544 0.000128082668904 +TGTGTA 3.89889084353e-05 0.000405393730353 +ATGGCC 0.000893625781338 0.000205415601073 +ATGGCA 0.000280720140734 0.000288790168568 +TCCAAT 0.000586393182867 0.000276102734384 +ATGGCT 0.000316589936495 0.000190311512759 +TGTGTT 8.10969295455e-05 0.000417477001005 +TTGTCG 0.000199623211189 0.000181249059771 +GAACAT 0.000143479183042 0.000281540206177 +AGATTA 4.21080211102e-05 0.000256165337809 +AGATTC 0.000116966725306 0.000250727866016 +GAAACA 0.000296315704108 0.000503872386162 +AGATTG 9.66924929196e-05 0.000245894557755 +CTGAGT 0.000318149492832 0.000141978430154 +GAAACT 0.000296315704108 0.0003951229503 +AGAATG 9.20138239074e-05 0.000228977978843 +AGAATA 5.61440281469e-05 0.000390289642039 +AGAATC 9.20138239074e-05 0.000256165337809 +CTGAGC 0.000550523387107 0.000172186606782 +CTGAGA 0.000159074746416 0.000171582443249 +CTGAGG 0.000241731232299 9.78744922761e-05 +CATACC 7.79778168707e-05 0.000216290544659 +CATACA 7.48587041958e-05 0.000412643692744 +CATACG 0.00012632406333 0.000166749134989 +TCAAAT 0.000237052563287 0.00063980918099 +GTCCAT 0.000148157852054 0.000151645046675 +ACATAG 1.40360070367e-05 0.000189707349226 +CATACT 5.45844718095e-05 0.000187290695096 +GTCCAG 0.000522451373033 0.000128686832437 +TCAAAG 0.00030723259847 0.000383643843181 +CACGAG 0.000603548302579 0.000108749435862 +GTCCAC 0.000219897443575 0.000198165638682 +GCAATG 0.000251088570324 0.000237436268299 +CGGCGG 0.000127883619668 0.000175811587977 +TTGATG 0.000247969457649 0.000311144219273 +CTCATT 0.000219897443575 0.000238644595364 +CGGCGC 0.000199623211189 8.76037122224e-05 +TTGATC 0.000243290788636 0.00020239478341 +CGGCGA 0.000151276964729 0.000148020065479 +TCCAAG 0.00104958141508 0.0002634153002 +CTATCT 8.88947112325e-05 0.000165540807924 +TTGATT 0.0002167783309 0.000567913720614 +CGGCGT 9.98116055944e-05 9.72703287435e-05 +TATAAG 0.000177789422465 0.000358873138346 +TGACTT 0.0 0.000253144520146 +TCACAG 0.000297875260446 0.000190311512759 +CGGTTA 3.74293520979e-05 0.000180040732705 +GGTTTA 5.92631408217e-05 0.000202998946943 +CGGTTC 0.000104490274607 0.000180644896238 +GCTGTT 0.000230814337937 0.000303290093349 +TTTACA 9.8252049257e-05 0.000502059895564 +CGGTTG 0.000106049830944 0.000137749285426 +GGTTTG 0.000149717408392 0.000162519990261 +TATAAA 8.10969295455e-05 0.000984786558086 +CGTACG 6.86204788462e-05 6.82704791802e-05 +GCTGTG 0.000491260246285 0.000180040732705 +GCTGTC 0.000221456999913 0.000144395084284 +CGGTTT 6.23822534965e-05 0.000182457386836 +GCTGTA 0.000131002732343 0.000173394933847 +GGGAGG 1.71551197115e-05 9.36453475481e-05 +TCATAG 3.11911267483e-06 0.000142582593686 +TATGCG 0.00026512457736 0.00015043671961 +TCTCAA 9.35733802448e-05 0.000260394482537 +AGGTTG 5.61440281469e-05 0.000135332631295 +GCATTT 0.000198063654851 0.00053166390866 +AGGTTC 7.48587041958e-05 0.000128686832437 +AGGTTA 4.21080211102e-05 0.000138957612491 +ACTCCT 0.000168432084441 0.00012929099597 +TTTTTC 0.000157515190079 0.000792058391197 +GAGGGC 0.00101839028833 0.000107541108797 +TTTTTA 7.48587041958e-05 0.00128505583377 +TCCTCT 0.000148157852054 0.000196353148085 +TTTTGT 7.64182605332e-05 0.000987807375749 +GAGGGG 0.000124764506993 8.8207875755e-05 +TTAAAT 0.000159074746416 0.00122101449932 +AGGTTT 5.3024915472e-05 0.000230790469441 +ACATAT 0.000134121845018 0.000688142263595 +TTAAAC 0.000124764506993 0.000540122198116 +TTAAAA 0.000143479183042 0.00132009731866 +ACTCCG 0.00030723259847 0.00010814527233 +TTAAAG 0.000190265873164 0.0005268306004 +ACTCCA 0.000241731232299 0.000231394632974 +TTTTTT 9.51329365822e-05 0.00233690454397 +ACTCCC 0.000238612119624 0.000111770253525 +CTGTAA 1.40360070367e-05 0.000274290243786 +GTCACG 0.000169991640778 8.57912216247e-05 +CTGTAC 0.000555202056119 0.000149832556077 +GCAACT 0.000184027647815 0.00028697767797 +GCAATT 0.000146598295717 0.000500851568499 +CTGTAG 1.24764506993e-05 0.000127478505372 +GTCACA 0.000177789422465 0.000216290544659 +GCAACG 0.00022301655625 0.000243477903625 +GTCACT 0.000188706316827 0.000132915977165 +TAGGCG 0.0 7.91454227665e-05 +GCAACC 0.00017467030979 0.000191519839824 +CTGTAT 0.0002167783309 0.000189707349226 +GCAACA 0.000394567753366 0.000538913871051 +TAGGCC 0.0 6.70621521151e-05 +TGGTTC 0.000310351711145 0.000216290544659 +ATTCAT 0.000159074746416 0.000352227339487 +TCCCAC 0.000246409901311 0.000164332480859 +GCGATT 0.000193384985839 0.000190915676292 +TCGTGT 6.08226971591e-05 0.000181853223303 +TGAATT 0.0 0.000491789115511 +GTCTCT 0.000101371161932 0.000146811738414 +TCCGTG 0.000511534478672 0.000146811738414 +ATTCAG 0.000441354443488 0.000257977828407 +ATTCAA 0.000255767239336 0.000648267470446 +ATTCAC 0.000232373894275 0.000241061249495 +GTCTCC 0.000299434816783 0.000106936945265 +GTCTCA 0.000115407168969 0.000106332781732 +GCGATG 0.000251088570324 0.000138353448958 +GTCTCG 0.000193384985839 7.61246051036e-05 +GCGATA 7.79778168707e-05 0.000154061700805 +TCGTGC 0.000120085837981 0.000109957762927 +GCGATC 0.000202742323864 0.000156478354935 +CGATTG 0.000232373894275 0.000227165488246 +TCTACA 9.51329365822e-05 0.000195748984552 +GAAGGA 0.000262005464685 0.000276706897916 +CGATTC 0.000262005464685 0.000229582142376 +CGATTA 4.36675774476e-05 0.000235623777702 +CAATCT 0.000148157852054 0.000210248909334 +GGAACA 0.000191825429502 0.000277311061449 +GGAACG 0.000233933450612 0.000143186757219 +TGACGA 0.0 0.000146811738414 +TACTGC 0.000299434816783 0.000148020065479 +CGATTT 0.000131002732343 0.000356456484215 +GAAGGT 0.000212099661888 0.000154665864337 +CTCGCT 8.57755985577e-05 0.000201186456345 +CAATCG 0.000332185499869 0.00024710288482 +CAATCA 0.000179348978803 0.000358873138346 +CAATCC 0.000252648126661 0.000208436418736 +TCCTGA 6.23822534965e-06 0.000169769952652 +AGTCCA 0.000224576112587 0.000184874040966 +TCCTGC 0.000290077478759 0.000160103336131 +AGTCCC 0.00041016331674 0.000131103486567 +ACTAAA 0.000124764506993 0.000507497367357 +TCCTGG 0.000227695225262 0.000199373965748 +ACTAAC 8.42160422203e-05 0.000217498871725 +TAACGC 0.0 0.000146207574882 +CAGCTG 0.00158139012614 0.000383039679648 +CAGCTA 0.000315030380157 0.000225352997648 +CAGCTC 0.000389889084353 0.000216894708192 +ACTAAT 0.000120085837981 0.00031295670987 +AGTCCT 0.000188706316827 0.000183665713901 +TCCTGT 0.000118526281643 0.000184874040966 +TAATGT 0.0 0.000373373063127 +CAGCTT 0.000266684133698 0.00022112385292 +GGTTAG 1.24764506993e-05 0.000200582292813 +TAACGT 0.0 0.000154665864337 +ACGGAC 0.00045227133785 0.000122041033579 +GTACTT 6.08226971591e-05 0.000285769350905 +CGACAT 0.000115407168969 0.000153457537272 +TTGTTC 0.000191825429502 0.000347394031227 +CCGAGG 8.57755985577e-05 0.000120228542981 +CCGAGA 6.39418098339e-05 0.000154665864337 +ACGGAA 0.000408603760402 0.000253748683679 +CCGAGC 0.000166872528103 0.000144395084284 +CCGCGT 0.000185587204152 0.000141978430154 +CGACAC 0.00013880051403 0.000107541108797 +CGACAA 0.000123204950656 0.000231998796506 +CGACAG 0.000327506830857 0.000186686531564 +GTACTA 4.99058027972e-05 0.00013654095836 +CCGCGC 0.000280720140734 8.51870580921e-05 +ACGGAG 0.000757944379983 0.000131103486567 +CCGCGA 0.000137240957692 7.79370957013e-05 +TACGCA 0.000106049830944 0.000181249059771 +CCGCGG 7.79778168707e-05 7.5520441571e-05 +TTGTTT 0.00013256228868 0.000929807676623 +GCATAT 0.000101371161932 0.00035283150302 +TCCGTT 0.000213659218226 0.000160707499663 +AGGAAC 0.000230814337937 0.000218103035257 +AATAAC 0.0004460331125 0.000474872536599 +AATAAA 0.000224576112587 0.00174422011852 +AATAAG 0.00031970904917 0.000391497969104 +TACTAA 2.02742323864e-05 0.000253144520146 +TGGAAG 0.000396127309703 0.000218103035257 +AATAAT 0.000433556661801 0.00123430609704 +ACGGTG 0.000514653591346 0.000114186907655 +CCAACC 0.000182468091477 0.000219915525855 +CCAACA 0.000227695225262 0.000328060798185 +TTATGC 4.21080211102e-05 0.000226561324713 +CCAACG 0.000268243690035 0.000246498721288 +GGGATT 7.32991478584e-05 0.00013654095836 +CGCCGC 0.000332185499869 0.000180040732705 +AGTAAA 0.000131002732343 0.000520788965074 +CGCCGA 0.000102930718269 0.000143186757219 +AGTAAC 0.000127883619668 0.000215082217594 +CGCCGG 0.000160634302754 0.000116603561786 +GAGTAA 1.55955633741e-05 0.000248311211886 +AGTAAG 9.8252049257e-05 0.000175811587977 +CTCAGG 0.000102930718269 9.30411840155e-05 +CTCAGC 0.000430437549126 0.000178228242108 +CCAACT 0.000237052563287 0.00031054005574 +GAGTAT 0.000368055295629 0.000170374116184 +GCTTCA 0.000143479183042 0.000224748834115 +GTGTAG 6.23822534965e-06 0.000151645046675 +TCGAGC 0.000240171675962 0.000184874040966 +AGTAAT 0.000168432084441 0.000303290093349 +GTGTAC 0.00030723259847 0.000175811587977 +CGCCGT 0.000152836521066 0.000126270178307 +GTGGCA 0.000430437549126 0.000250727866016 +CATATT 0.000107609387282 0.00059510107958 +CTCGGA 0.000135681401355 0.000121436870046 +CTCGGC 0.000255767239336 0.000129895159502 +GCCGCC 0.0013505757882 0.00019514482102 +GCCGCA 0.000377412633654 0.000170978279717 +GCCGCG 0.000196504098514 0.000108749435862 +TATTGA 9.35733802448e-06 0.000356456484215 +ACACTT 9.20138239074e-05 0.000356456484215 +CATATG 0.00013880051403 0.000407206220951 +TCGCTC 0.000227695225262 0.000184269877433 +CATATC 0.00013256228868 0.000254352847211 +CATATA 8.42160422203e-05 0.000627725910339 +GTCAGC 0.000417961098427 0.000138957612491 +TACTAC 0.000545844718095 0.000180040732705 +ACACTC 0.000104490274607 0.000186082368031 +ACACTA 9.35733802448e-05 0.000222936343518 +ACACTG 0.000335304612544 0.000193936493954 +GCCGCT 0.000503736696984 0.000146811738414 +ACCGGA 0.000252648126661 0.000111770253525 +AAATAA 3.11911267483e-05 0.00160949165076 +ACCGGC 0.000411722873077 8.63953851573e-05 +AAATAC 0.000352459732255 0.000668204867021 +CTTTAA 9.35733802448e-06 0.00051051818502 +ACCGGG 6.86204788462e-05 6.04163532568e-05 +AAATAG 2.02742323864e-05 0.000463997593012 +AACTTA 8.42160422203e-05 0.000425331126928 +CAGGTA 0.000137240957692 0.000103311964069 +AACTTC 0.000650334992701 0.000314769200468 +CAGGTC 0.000346221506906 9.90828193412e-05 +CACGAC 0.000399246422378 0.000102103637004 +CACGAA 0.000299434816783 0.000229582142376 +CAGGTG 0.00103866452072 0.000126874341839 +CTTTAT 0.000102930718269 0.000341956559434 +AATCCC 0.000731431922247 0.000237436268299 +AATCCA 0.000357138401268 0.000365518937204 +AAATAT 0.000263565021023 0.00137205538246 +AATCCG 0.000587952739205 0.000235019614169 +CACGAT 0.000449152225175 0.000119624379449 +CAGGTT 0.00022301655625 0.000148624229012 +AACTTT 0.000555202056119 0.00059751773371 +TCTGTC 7.64182605332e-05 0.000180644896238 +GGGATA 2.96315704108e-05 0.000112374417058 +GGGATC 9.98116055944e-05 0.000128686832437 +GGGTGT 2.33933450612e-05 0.000122041033579 +GGAGAT 0.000336864168881 0.00017883240564 +GATGCT 0.00041016331674 0.000146207574882 +CGCAGT 0.000335304612544 0.000172790770315 +GAGTCG 0.000495938915297 0.000121436870046 +GAGCCT 0.00018714676049 9.96869828738e-05 +GGAGAC 0.000215218774563 0.000144999247816 +GGCGGC 0.0011337974573 0.000206019764606 +GGAGAA 0.000286958366084 0.000261602809602 +GGGTGC 3.11911267483e-05 0.000103916127602 +GGAGAG 0.000371174408304 0.000166749134989 +TGGAAC 0.000276041471722 0.000205415601073 +GGGTGG 2.02742323864e-05 0.000124457687709 +GAGCCA 0.000469426457561 0.000201186456345 +CGCAGG 0.000154396077404 0.000105728618199 +GATGCG 0.000475664682911 0.000135332631295 +CGCAGA 0.000110728499956 0.00020239478341 +GATGCA 0.000413282429414 0.000199373965748 +CGCAGC 0.000553642499782 0.000256769501342 +GATGCC 0.00115095257701 0.000177019915043 +CGGATT 0.000109168943619 0.000175811587977 +GGTAGT 0.000135681401355 0.000111770253525 +ACCCCT 4.05484647727e-05 0.000100895309939 +GCCAAT 0.000974722710883 0.000276102734384 +GGCGGG 0.00017467030979 8.57912216247e-05 +AGAAGT 9.98116055944e-05 0.000280331879112 +ACCCCA 0.000115407168969 0.000193936493954 +GGTAGA 3.27506830857e-05 0.000143186757219 +ACCCCC 0.000143479183042 0.000106936945265 +CGGATG 0.00018090853514 0.000146207574882 +GCCAAG 0.00196816009782 0.000278519388514 +CGGATA 6.55013661714e-05 0.000157686682 +ACCCCG 0.000131002732343 7.43121145059e-05 +CGGATC 0.000155955633741 0.000181853223303 +AGAAGA 8.73351548951e-05 0.000411435365679 +AGAAGC 9.66924929196e-05 0.000303290093349 +GCAAGA 6.23822534965e-05 0.000234415450637 +AGAAGG 6.39418098339e-05 0.000279123552047 +CACTCC 0.000238612119624 0.00015527002787 +CACTCA 0.000129443176005 0.00024710288482 +CACTCG 0.000363376626617 0.000146207574882 +TGCGTT 0.000121645394318 0.000214478054062 +TCAGCT 0.000208980549213 0.000218103035257 +AGCTTT 0.000310351711145 0.000285769350905 +CCCCGG 5.61440281469e-05 9.78744922761e-05 +TGCGTA 6.23822534965e-05 0.000155874191403 +CCCCGC 0.000143479183042 0.000102103637004 +CACTCT 0.000109168943619 0.000148624229012 +AGCTTC 0.000485022020935 0.000177624078575 +AGCTTA 8.57755985577e-05 0.000256165337809 +AGCTTG 0.000263565021023 0.000181853223303 +TTAGCT 8.88947112325e-05 0.000271873589656 +TTAGCC 0.000110728499956 0.000161311663196 +GCACTG 0.000430437549126 0.000182457386836 +TTAGCA 6.55013661714e-05 0.000288186005035 +GACCAT 0.000149717408392 0.00019514482102 +TTAGCG 5.3024915472e-05 0.000132915977165 +TACCGG 9.98116055944e-05 6.70621521151e-05 +TCATAT 7.95373732081e-05 0.000507497367357 +ATAGGT 6.70609225088e-05 0.00022354050705 +ATAAGG 6.86204788462e-05 0.000187894858629 +TGTGCT 0.000112288056294 0.000236227941234 +AAGGGC 0.000851517760228 0.000145603411349 +ATAAGC 0.000135681401355 0.000254352847211 +AAGGGA 0.000511534478672 0.000216290544659 +ATAAGA 6.86204788462e-05 0.000348602358292 +CCAGAC 0.000157515190079 0.000140770103088 +TCTGTA 8.88947112325e-05 0.000219311362322 +CCAGAA 0.000302553929458 0.000328060798185 +CCAGAG 0.000458509563199 0.000169165789119 +GCAAGT 0.000162193859091 0.000264019463732 +TGTGCG 8.57755985577e-05 0.000241665413027 +ATAAGT 0.000101371161932 0.000394518786767 +ATAGGG 2.6512457736e-05 9.60620016784e-05 +TGTGCC 0.000304113485796 0.000198769802215 +ATAGGA 7.01800351836e-05 0.000182457386836 +TGTGCA 8.26564858829e-05 0.000297248458024 +ATAGGC 9.8252049257e-05 9.42495110807e-05 +GCTCGC 0.000255767239336 0.000168561625587 +TAAGTT 0.0 0.000432581089319 +TTTGCG 0.000227695225262 0.000258581991939 +CCAGAT 0.000310351711145 0.000238040431832 +AGACAA 7.48587041958e-05 0.000309935892208 +GACGCA 0.000168432084441 0.000146207574882 +AGACAC 7.1739591521e-05 0.000207228091671 +TCCATT 0.000470986013899 0.000309331728675 +AGACAG 7.48587041958e-05 0.000189707349226 +GCTTTT 0.000165312971766 0.000373373063127 +TTTTCT 7.1739591521e-05 0.00072922538381 +CGCTGT 0.000107609387282 0.000163124153793 +GACGCC 0.000467866901224 0.000111770253525 +TCAAGA 5.61440281469e-05 0.000282748533242 +CGTGGG 4.36675774476e-05 9.30411840155e-05 +CTATAG 4.67866901224e-06 0.00017399909738 +AGACAT 4.36675774476e-05 0.000204207274008 +CGTGGA 0.000185587204152 0.000128082668904 +TTTTCG 0.000201182767526 0.000506893203825 +CCCACA 0.000435116218138 0.000272477753188 +CGCTGC 0.000346221506906 0.00020239478341 +TTTTCC 0.000145038739379 0.000538913871051 +TTTTCA 7.79778168707e-05 0.000694183898921 +CGCTGG 0.000252648126661 0.000132915977165 +TATGGA 0.000269803246372 0.000193332330422 +TATGGC 0.000358697957605 0.000196353148085 +CGTAGG 3.89889084353e-05 6.28330073871e-05 +CCCACC 0.000683085675787 0.000179436569173 +TATGGG 8.10969295455e-05 0.000143790920751 +CGTAGC 8.26564858829e-05 9.30411840155e-05 +TACCGT 9.98116055944e-05 8.8207875755e-05 +TGGACT 8.42160422203e-05 0.000179436569173 +CGTAGT 6.08226971591e-05 0.000127478505372 +GCTCGA 0.000118526281643 0.000152249210207 +TATGGT 0.000140360070367 0.000224144670583 +TGGACA 0.000115407168969 0.000212665563464 +TGGACC 0.000249529013986 0.000112374417058 +TGGACG 0.000152836521066 0.000152249210207 +CCGGTT 0.000176229866128 0.000114186907655 +GACCAA 0.0002292547816 0.000238644595364 +GCTTGA 4.67866901224e-06 0.000170978279717 +TGAGAC 0.0 0.000161311663196 +GCTTGC 9.98116055944e-05 0.00015285337374 +TGAGAA 0.0 0.000303894256882 +TGAGAG 0.0 0.000181249059771 +GCTTGG 0.000107609387282 0.000225957161181 +CCTGTT 0.000145038739379 0.00015285337374 +CCGGTG 0.000491260246285 0.000109957762927 +CCGGTC 0.000116966725306 5.86038626591e-05 +CCGGTA 9.35733802448e-05 7.31037874408e-05 +CCTGTC 8.26564858829e-05 0.00013654095836 +CCTGTA 7.79778168707e-05 0.000130499323035 +CCTGTG 0.000210540105551 0.000119624379449 +GCTTGT 7.1739591521e-05 0.000195748984552 +TGAGAT 0.0 0.000199373965748 +TCCGAA 0.000322828161845 0.000277915224981 +GGTTGT 8.57755985577e-05 0.000180644896238 +TCCTCA 0.000243290788636 0.000156478354935 +TATCAG 0.000371174408304 0.000214478054062 +TATTGC 0.000102930718269 0.000287581841503 +TAGCGT 0.0 0.000103916127602 +GGTTGG 0.000123204950656 0.000154061700805 +GGTTGA 3.11911267483e-06 0.000122645197111 +TTCACG 0.000272922359047 0.000137145121893 +GGTTGC 0.000127883619668 0.00021870719879 +GGAAAC 0.000400805978715 0.000305102583947 +TCCCGC 0.000297875260446 9.84786558086e-05 +GTAACG 0.000104490274607 0.000141978430154 +CAGCGT 0.000352459732255 0.000208436418736 +GTAACC 0.000137240957692 0.000114186907655 +CGTTTT 0.000106049830944 0.000410831202146 +ATTCTC 0.000237052563287 0.000279727715579 +ATTCTA 0.000137240957692 0.000327456634652 +ATTCTG 0.000609786527929 0.000282144369709 +TCATTT 0.000102930718269 0.000547372160507 +CAGCGA 0.000458509563199 0.000265227790798 +CAGCGC 0.000695562126486 0.00015285337374 +TGCTAG 1.40360070367e-05 0.000126874341839 +CAGCGG 0.000461628675874 0.000205415601073 +CCTTGT 3.58697957605e-05 0.00013654095836 +TCATTC 0.000120085837981 0.000203603110476 +CGTTTG 0.000254207682998 0.000256769501342 +CGTTTA 4.5227133785e-05 0.000225957161181 +ATTCTT 0.000141919626705 0.000405393730353 +CGTTTC 0.000244850344974 0.000219311362322 +TGGCAA 0.000127883619668 0.000454935140024 +CCGATC 0.000165312971766 0.00021870719879 +TGGCAG 0.000316589936495 0.000261602809602 +GCTGAG 0.000425758880114 0.000180040732705 +GCTGAA 0.000316589936495 0.000353435666552 +GCTGAC 0.000240171675962 0.000151040883142 +TGGCAT 0.000120085837981 0.000252540356614 +GCTGAT 0.000357138401268 0.000221728016453 +CCGATG 0.000282279697072 0.000130499323035 +GGCCGC 0.000199623211189 9.48536746132e-05 +GAACTT 0.000233933450612 0.000311748382805 +GGCCGA 0.000104490274607 0.000143790920751 +GGCAAT 0.000742348816609 0.000306310911012 +CTGATT 0.000396127309703 0.000264623627265 +CCATCG 0.000393008197028 0.000171582443249 +CTAAAT 0.000260445908348 0.000386060497311 +CCATCC 0.000241731232299 0.000202998946943 +CCATCA 0.000166872528103 0.000270061099058 +CTGATC 0.000810969295455 0.000157686682 +CTGATA 0.000210540105551 0.000241061249495 +GAGTTT 0.000467866901224 0.000303894256882 +CTGATG 0.000809409739117 0.000171582443249 +GGCAAA 0.000555202056119 0.000407810384484 +CTAAAG 0.000564559394144 0.000255561174276 +CCATCT 0.000148157852054 0.000211457236399 +CTAAAC 0.000262005464685 0.000262206973135 +CTAAAA 0.000204301880201 0.000578184500668 +GACACC 0.000636298985665 0.000143186757219 +GACACA 0.000260445908348 0.000215082217594 +GACACG 0.00040392509139 0.000120228542981 +GCGCCT 0.000140360070367 0.000114186907655 +TTCACC 0.000616024753278 0.000187290695096 +TAGCGA 0.0 0.000145603411349 +TACGAG 0.000929495577098 0.000137145121893 +TACGAC 0.000605107858916 0.000165540807924 +GACACT 0.000241731232299 0.000160707499663 +GGCGAT 0.000832803084179 0.000130499323035 +TGTTAC 4.36675774476e-05 0.000177624078575 +ACGCTG 0.00075950393632 0.000186686531564 +ACGCTA 0.000112288056294 8.88120392875e-05 +ACGCTC 0.000274481915385 9.72703287435e-05 +TCGATG 0.00035557884493 0.000204207274008 +TCGAGG 9.98116055944e-05 0.000121436870046 +TCGATC 0.000199623211189 0.000205415601073 +ACGCTT 0.000148157852054 0.000132311813632 +TCTGGG 8.42160422203e-05 0.000141374266621 +GCATTG 0.0002292547816 0.000272477753188 +GAACTG 0.00100123516862 0.000257373664874 +TCGATT 0.000230814337937 0.00037639388079 +TCGGCA 0.000282279697072 0.000166749134989 +AGGCAC 9.20138239074e-05 0.000122645197111 +TTTTGA 7.79778168707e-06 0.000670017357618 +AACCAA 0.000375853077317 0.000689954754193 +TGCCGG 0.000129443176005 0.000117207725318 +AACCAC 0.000371174408304 0.000291206822698 +ACGAAC 0.000243290788636 0.000219915525855 +AACCAG 0.000701800351836 0.0002893943321 +TGCCGA 8.73351548951e-05 0.00021870719879 +TTGGAC 0.000527130042046 0.000184874040966 +TCTGGA 0.000252648126661 0.000199373965748 +GAGTTC 0.000990318274257 0.000231394632974 +TGCCGT 7.79778168707e-05 0.000143186757219 +AACCAT 0.000177789422465 0.000294831803893 +AACAGA 0.00013256228868 0.000433789416384 +ACGTCG 0.000196504098514 0.000112374417058 +AACAGC 0.000818767077142 0.000439226888177 +ACGTCC 0.000171551197115 9.18328569504e-05 +AACAGG 0.000127883619668 0.000185478204498 +ACGTCA 9.20138239074e-05 0.000111166089993 +CCCAAG 0.00114939302067 0.000219311362322 +GTCCCT 2.96315704108e-05 0.000122645197111 +GACGAA 0.00059107185188 0.000215686381127 +CCCAAC 0.000706479020848 0.000204811437541 +CCCAAA 0.000439794887151 0.000429560271656 +ACGTCT 5.45844718095e-05 0.000100895309939 +GGGAGA 1.09168943619e-05 0.000131103486567 +AACAGT 0.000341542837893 0.000253144520146 +GGGAGC 8.42160422203e-05 0.000119624379449 +CCCAAT 0.000625382091303 0.000247707048353 +GACGAT 0.000881149330638 0.000159499172598 +GGGCGC 9.045426757e-05 0.000130499323035 +GGGCGA 4.83462464598e-05 8.94162028201e-05 +GGGCGG 6.08226971591e-05 0.000125061851242 +TCTCTT 7.32991478584e-05 0.000294831803893 +TGGGGG 2.96315704108e-05 0.00011056192646 +GTACAG 0.000157515190079 0.000125666014774 +TCATAC 7.32991478584e-05 0.000163124153793 +CAGAAT 0.000608226971591 0.00024468623069 +ACGACT 9.66924929196e-05 0.000142582593686 +TCTCTC 4.99058027972e-05 0.0002634153002 +TCTCTA 5.77035844843e-05 0.000217498871725 +TCTCTG 0.000238612119624 0.000192124003357 +GGGCGT 5.77035844843e-05 0.000112374417058 +CAGAAA 0.000460069119537 0.000584830299526 +CAGAAC 0.00076574216167 0.000242873740092 +ACGACG 0.000268243690035 0.000155874191403 +CAGAAG 0.00126480018964 0.000302685929817 +CACTTT 0.000280720140734 0.000399352095028 +CCTAAT 9.51329365822e-05 0.000196957311617 +GCATTC 0.000233933450612 0.000266436117863 +GTCATT 0.000244850344974 0.000193332330422 +TGCCCC 0.000194944542177 0.000135332631295 +CACTTG 0.000325947274519 0.000241665413027 +CCTAAC 6.08226971591e-05 0.000109957762927 +CCTAAA 7.48587041958e-05 0.000269456935525 +CACTTC 0.00045227133785 0.000226561324713 +CCTAAG 0.000102930718269 0.000143790920751 +CACTTA 6.08226971591e-05 0.000306915074545 +GTCCCA 5.14653591346e-05 0.000131103486567 +GTCCCC 6.08226971591e-05 9.2437020483e-05 +AAGATT 0.000548963830769 0.000405997893886 +ATCGAT 0.000831243527841 0.000326852471119 +GACTCG 0.000436675774476 0.000108749435862 +CTCATC 0.000509974922334 0.000173394933847 +GACTCC 0.000562999837806 0.000109353599395 +CTCATA 0.000120085837981 0.000222332179985 +GACTCA 0.000171551197115 0.00013412430423 +AAGATA 0.000286958366084 0.000338935741771 +AAGATC 0.00102306895734 0.000258581991939 +AAGATG 0.000793814175743 0.000268852771993 +GACTCT 0.00017467030979 0.00013654095836 +ATCGAG 0.00115251213335 0.000190915676292 +TTGAGA 9.8252049257e-05 0.000267040281395 +ATCGAA 0.000575476288505 0.000450101831763 +ATCGAC 0.000587952739205 0.000161915826728 +ATATTT 0.000198063654851 0.00130620155741 +ACGGTA 9.98116055944e-05 0.000146207574882 +GGACTC 0.000238612119624 0.000128686832437 +ATAGTT 0.000106049830944 0.000352227339487 +CTACCA 9.51329365822e-05 0.000180040732705 +TACTGT 0.000106049830944 0.000206623928138 +ATATTC 0.000354019288593 0.000482726662522 +ATATTA 8.42160422203e-05 0.000758829396906 +ATATTG 0.000184027647815 0.000532872235725 +ATAGTA 6.70609225088e-05 0.000265227790798 +ATAGTC 0.000109168943619 0.000166749134989 +TACTGG 0.000230814337937 0.0001317076501 +ATAGTG 0.00022301655625 0.000209644745801 +GATCGA 0.000255767239336 0.000205415601073 +CAGGCA 0.000455390450525 0.000181853223303 +AGGAAG 0.00027136280271 0.000230186305909 +CATCCG 0.00049437935896 0.000144999247816 +AGGAAA 0.000154396077404 0.000458560121219 +CATCCA 0.0002292547816 0.00026099864607 +CAGGCG 0.000839041309528 0.000137145121893 +CATCCC 0.00031970904917 0.000144999247816 +TTATGA 1.55955633741e-06 0.000332289942913 +ATAGAA 0.000162193859091 0.000410831202146 +CGGAGT 7.32991478584e-05 0.000127478505372 +TTATGG 4.5227133785e-05 0.000227769651778 +CATCCT 0.000141919626705 0.000189103185694 +AGGAAT 0.000184027647815 0.000228977978843 +CAGGCT 0.000414841985752 9.66661652109e-05 +ACGGAT 0.000533368267395 0.000186686531564 +CGGAGA 5.77035844843e-05 0.00015527002787 +CGGAGC 0.00012632406333 0.000163124153793 +TTATGT 3.11911267483e-05 0.000490580788445 +CAGGCC 0.000878030217964 9.66661652109e-05 +CGGAGG 6.86204788462e-05 0.000135332631295 +ATGGAA 0.000520891816696 0.000347394031227 +ATGGAC 0.00077198038702 0.000160103336131 +ATGGAG 0.00114315479532 0.000175811587977 +CGCTTG 0.000233933450612 0.000196957311617 +CGCTTA 5.14653591346e-05 0.000177019915043 +CGCTTC 0.000508415365997 0.000175811587977 +TGAACA 0.0 0.00031295670987 +ATGGAT 0.000623822534965 0.000277311061449 +TTTGCA 0.000163753415428 0.000459768448285 +CGCTTT 0.000215218774563 0.000222936343518 +GCCGTG 0.000567678506818 0.000123853524177 +GATCGT 0.000354019288593 0.000171582443249 +TTTGCC 0.00062694164764 0.000366727264269 +TTGTGA 1.24764506993e-05 0.000332289942913 +GAAAAG 0.00103242629537 0.000602955205503 +GAAAAC 0.000586393182867 0.000670621521151 +ATAGAT 0.000196504098514 0.000299665112154 +GAAAAA 0.000427318436451 0.00112616082471 +GTTCCT 0.000157515190079 0.00017641575151 +TAAGTA 0.0 0.000381227189051 +TAGGGC 0.0 6.58538250499e-05 +ACGGTT 0.000155955633741 0.000117811888851 +GAAAAT 0.000485022020935 0.000932224330753 +ACAAGG 6.70609225088e-05 0.000161915826728 +ACAAGA 5.92631408217e-05 0.000354039830085 +ACAAGC 0.000143479183042 0.000289998495633 +ACACGT 7.1739591521e-05 0.00015043671961 +CTGCGT 0.000428877992789 0.00013654095836 +GACGTC 0.000293196591434 8.57912216247e-05 +GCGGAG 0.000876470661626 0.000169165789119 +CATAAG 0.000148157852054 0.000195748984552 +GCGGAA 0.00040392509139 0.000200582292813 +CATAAA 0.000109168943619 0.000579392827733 +GCCAAC 0.00117902459108 0.000322019162859 +CATAAC 9.8252049257e-05 0.000236227941234 +ACACGC 0.000131002732343 0.000203603110476 +ACAAGT 0.000134121845018 0.000306310911012 +ACACGA 7.01800351836e-05 0.000207832255204 +ACACGG 6.23822534965e-05 0.00011297858059 +CATAAT 0.000118526281643 0.000410831202146 +GCGGAT 0.000552082943444 0.000157082518468 +GCACAT 0.000131002732343 0.000207832255204 +CTGCGG 0.000358697957605 0.000135936794828 +CTGCGA 0.000439794887151 0.000172790770315 +CTGCGC 0.00106361742212 0.000144395084284 +CGGCAA 0.000155955633741 0.000255561174276 +ACCGGT 0.000201182767526 8.15620768967e-05 +CGGCAG 0.000364936182955 0.00020239478341 +GACGCG 0.000176229866128 8.57912216247e-05 +CCTCCT 0.000155955633741 0.000171582443249 +TGTTGA 0.0 0.000347394031227 +CGGCAT 0.000109168943619 0.000192124003357 +CCTCCC 0.000177789422465 0.000102103637004 +CCTCCA 0.000285398809747 0.000160707499663 +CACGCC 0.000396127309703 0.000170978279717 +CCTCCG 0.000233933450612 0.000124457687709 diff --git a/bin/cpat_model/fly_cutoff.txt b/bin/cpat_model/fly_cutoff.txt new file mode 100755 index 0000000..97cf2bc --- /dev/null +++ b/bin/cpat_model/fly_cutoff.txt @@ -0,0 +1,2 @@ +Coding Probability Cutoff: 0.39 +Achieved Sensitivity and Specificity: 0.963 diff --git a/bin/cpat_model/zebrafish_Hexamer.tsv b/bin/cpat_model/zebrafish_Hexamer.tsv new file mode 100755 index 0000000..5d9e176 --- /dev/null +++ b/bin/cpat_model/zebrafish_Hexamer.tsv @@ -0,0 +1,4097 @@ +hexamer coding noncoding +GAACGT 9.59993417188e-05 0.000127679058873 +CTTCTT 0.000278855230707 0.000339890460173 +CACCCT 0.000187427286213 0.000159378687283 +GAACGG 0.000114284930618 9.42183399961e-05 +GAACGC 0.000132570519516 0.000104784882799 +GAACGA 0.000173713094539 0.000123276332705 +CACCCA 0.000169141697314 0.000222777944103 +CTTCTA 0.000137141916741 0.000157617596816 +CACCCC 0.000189712984825 0.000155856506349 +CTTCTC 0.000308569312668 0.000315235193632 +CACCCG 0.000146284711191 8.71739781273e-05 +CTTCTG 0.000644567008683 0.000428825528767 +CGTGTG 0.000354283284915 0.000186675589525 +TAAGGT 2.28569861235e-06 0.000188436679992 +CGTGTC 0.000107427834781 0.000103904337566 +CGTGTA 6.39995611459e-05 6.95630734551e-05 +GGAAAT 0.000338283394628 0.000398886990825 +TAAGGG 2.28569861235e-06 0.00014881214448 +CGTGTT 0.000114284930618 0.000160259232517 +TAAGGC 0.0 0.000156737051582 +TAAGGA 0.0 0.000163781413451 +TCACTG 0.000313140709892 0.000378634450452 +GTCAAA 0.000669709693419 0.000362784636247 +CCCGCT 0.000150856108415 0.000110068154201 +GTCAAG 0.000557710461414 0.000193719951394 +CTGTCC 0.000637709912846 0.00024391102971 +TCAGAG 0.000703995172605 0.000390081538489 +CTGTCA 0.000438854133572 0.000392723174189 +CTGTCG 0.000189712984825 0.000121515242238 +GTATCT 9.14279444941e-05 0.000150573234947 +TCAGAA 0.000429711339122 0.000526566049698 +GTCAAT 0.000338283394628 0.000252716482046 +GTATCA 8.68565472694e-05 0.00014881214448 +GTATCC 8.68565472694e-05 8.98156138281e-05 +GTATCG 2.74283833482e-05 4.31467164468e-05 +CTGTCT 0.00061256722811 0.000387439902788 +GGTGTC 0.000253712545971 0.000185795044291 +GGTGTA 0.000182855888988 0.000174347956255 +GGTGTG 0.000591995940599 0.000332846098304 +TATCCT 0.000237712655685 0.000146170508779 +CCGGGG 8.68565472694e-05 4.4907806914e-05 +TTCTGT 0.000319997805729 0.000612859482592 +ATTCCT 0.000411425750223 0.000260641389148 +CCGGGC 0.000123427725067 7.13241639223e-05 +CCGGGA 0.00015542750564 7.22047091559e-05 +TATCCA 0.000239998354297 0.000201644858496 +TATCCC 0.000159998902865 0.000113590335136 +GTTCTG 0.000596567337824 0.000307310286529 +TACACC 0.000527996379453 0.000193719951394 +TATCCG 8.91422458817e-05 6.33992568198e-05 +GGTGTT 0.000356568983527 0.000283535565222 +ATTCCG 6.85709583706e-05 5.54743497173e-05 +TTCTGG 0.000386283065488 0.000372470633816 +CCGGGT 8.68565472694e-05 5.37132592501e-05 +ATTCCC 0.000267426737645 0.000250955391578 +TTCTGC 0.000399997257162 0.000467569519046 +ATTCCA 0.000269712436258 0.000251835936812 +GTTCTA 9.82850403312e-05 0.000122395787472 +TGCACT 0.000265141039033 0.000343412641107 +TATCTT 0.000141713313966 0.00024655266541 +CGCGGT 5.02853694718e-05 5.37132592501e-05 +CCCGCG 6.39995611459e-05 4.4907806914e-05 +CGCGGG 7.31423555953e-05 6.16381663526e-05 +CGCGGC 9.14279444941e-05 5.45938044837e-05 +CGCGGA 5.71424653088e-05 6.95630734551e-05 +ACCTGT 0.000322283504342 0.000274730112886 +TCATGT 0.000114284930618 0.000440272616804 +CCCGCC 0.000118856327842 7.30852543895e-05 +TTACAG 0.00027199813487 0.00043675043587 +CTCCGC 0.000180570190376 0.000133842875509 +TTACAA 9.82850403312e-05 0.00043675043587 +TTACAC 0.000105142136168 0.000327562826902 +CTCCGG 0.000102856437556 0.000124156877939 +TCATGC 9.82850403312e-05 0.000281774474755 +ACCTGG 0.000235426957072 0.00014881214448 +GGCGAA 0.000134856218129 7.83685257912e-05 +GTAAGC 7.08566569829e-05 0.000114470880369 +TCATGG 0.000146284711191 0.000228061215505 +ACCTGC 0.000429711339122 0.000276491203353 +ACCTGA 1.59998902865e-05 0.000324040645968 +GTATTT 9.14279444941e-05 0.000487822059419 +CTCGAT 0.000118856327842 8.01296162584e-05 +CTCCGT 9.37136431064e-05 0.000117993061304 +GCCTGA 1.82855888988e-05 0.000225419579804 +GCCTGC 0.000340569093241 0.000221016853636 +GCCTGG 0.00031085501128 0.000165542503918 +TCTTGC 7.54280542076e-05 0.000213091946533 +TAAAGG 0.0 0.000378634450452 +CAAATT 0.000150856108415 0.000467569519046 +TAAAGA 2.28569861235e-06 0.000511596780727 +GCATAA 1.14284930618e-05 0.000288818836624 +AAGGTC 0.0003885687641 0.000178750682423 +TATTAG 9.14279444941e-06 0.000326682281669 +GCCTGT 0.000267426737645 0.000242149939242 +TATCTA 0.000102856437556 0.000241269394009 +TAAAGT 0.0 0.000518641142595 +CAAATA 0.00015542750564 0.000560026768575 +CAAATC 0.000239998354297 0.000355740274378 +CGTGCT 0.000178284491763 9.42183399961e-05 +CAAATG 0.000262855340421 0.000461405702411 +AATACT 0.000118856327842 0.000280013384288 +TATCTC 0.000159998902865 0.000160259232517 +CCGGCG 0.000111999232005 5.28327140165e-05 +GGCATG 0.000569138954476 0.00016113977775 +GGCATA 0.00011657062923 0.000134723420742 +GGCGAC 0.000180570190376 9.24572495289e-05 +GGCATC 0.000635424214234 0.000137365056443 +AATACG 5.02853694718e-05 0.000108307063734 +AATACA 0.000141713313966 0.000482538788018 +AATACC 0.000109713533393 0.000132962330275 +GGCATT 0.000431997037735 0.000206047584664 +TATCTG 0.000500567996105 0.000255358117747 +TGCACA 0.00027199813487 0.000456122431009 +GAGCAC 0.000493710900268 0.000233344486906 +ACTGAT 0.000498282297493 0.000394484264657 +GAGCAG 0.00158627483697 0.000442914252505 +AGTAGT 6.62852597582e-05 0.000164661958685 +TCTCGC 0.000189712984825 0.000113590335136 +TCTCGA 0.000123427725067 8.71739781273e-05 +TCTCGG 7.99994514323e-05 0.000101262701865 +ACTGAG 0.000591995940599 0.000366306817181 +AGTAGA 6.62852597582e-05 0.000185795044291 +TTTGGT 0.000356568983527 0.000367187362415 +AGTAGG 4.11425750223e-05 0.000142648327845 +ACTGAA 0.00065142410452 0.00060757621119 +AAGGTT 0.000367997476589 0.000249194301111 +GGGTCA 0.000146284711191 0.000184033953824 +GTGTCG 0.000175998793151 9.6859975697e-05 +TCGAAC 3.65711777976e-05 8.98156138281e-05 +TCGAAA 7.99994514323e-05 9.86210661642e-05 +GTGTCC 0.000452568325246 0.000219255763169 +CGCACG 9.37136431064e-05 8.71739781273e-05 +GTGTCA 0.000331426298791 0.000254477572513 +GTTGTA 0.000105142136168 0.000221016853636 +CTTGCT 0.000281140929319 0.000250074846345 +TCGGTC 8.4570848657e-05 6.69214377543e-05 +GTGTCT 0.000527996379453 0.00027561065812 +TTGAAG 0.000422854243285 0.000419139531198 +TCGAAT 3.19997805729e-05 9.77405209306e-05 +CTTGCG 6.17138625335e-05 6.60408925206e-05 +CTTGCA 0.000255998244583 0.000216614127468 +CTTGCC 0.000164570300089 0.00012679851364 +CGAAAG 0.000141713313966 9.42183399961e-05 +CGAAAA 0.000139427615353 0.000154095415882 +CGAAAC 0.000159998902865 9.95016113978e-05 +AATCAT 0.000143999012578 0.000462286247645 +AAATGC 0.000324569202954 0.000486060968952 +CACGGC 0.000196570080662 0.000102143247099 +AAATGA 4.34282736347e-05 0.000826831974359 +ACAGTC 0.000274283833482 0.000264163570083 +CACGGG 0.000105142136168 5.6354894951e-05 +ACAGTA 0.000203427176499 0.000367187362415 +TAGAGC 0.0 0.000165542503918 +CGAAAT 9.82850403312e-05 0.000117993061304 +ACAGTT 0.000281140929319 0.000391842628956 +AATCAG 0.000479996708594 0.00040505080746 +CACGGT 0.000109713533393 0.000117993061304 +AAATGT 0.000262855340421 0.000969480302203 +AATCAC 0.000196570080662 0.000310832467464 +TTTGTA 0.000205712875112 0.000574115492313 +GCTCAA 0.000319997805729 0.000266805205783 +TCTAGT 0.000107427834781 0.000162900868218 +TTGCCG 7.54280542076e-05 7.39657996231e-05 +ATACGT 3.42854791853e-05 9.50988852297e-05 +GGGTAT 9.82850403312e-05 7.30852543895e-05 +ATACGA 6.17138625335e-05 8.71739781273e-05 +TCTAGG 4.11425750223e-05 8.71739781273e-05 +ATACGC 7.99994514323e-05 2.99385379427e-05 +TCTAGA 8.68565472694e-05 0.000165542503918 +GGGTAG 0.0 7.22047091559e-05 +TCTAGC 8.91422458817e-05 8.62934328936e-05 +ATACGG 5.02853694718e-05 6.95630734551e-05 +TGACTA 2.28569861235e-06 0.000205167039431 +TTTGTG 0.000822851500447 0.00057587658278 +TGGGCC 0.000178284491763 0.000151453780181 +AGATGT 0.00011657062923 0.000379514995685 +TCGCCC 0.000111999232005 5.98770758854e-05 +TCGCCA 0.000107427834781 9.24572495289e-05 +GGGTCG 5.02853694718e-05 5.72354401846e-05 +TCGCCG 7.31423555953e-05 7.13241639223e-05 +GGTAAT 0.000130284820904 0.000142648327845 +ACCCAT 0.000127999122292 0.000176989591955 +GTCAAC 0.000633138515622 0.000225419579804 +ACCCAC 0.000166855998702 0.000169945230086 +GGTAAC 0.000118856327842 0.000140006692144 +ACCCAA 0.000141713313966 0.000228941760738 +AGATGG 0.000107427834781 0.000325801736435 +ACCCAG 0.000351997586302 0.000210450310832 +AGATGA 1.37141916741e-05 0.000475494426149 +TCGCCT 0.000118856327842 8.18907067256e-05 +AGATGC 0.000111999232005 0.000330204462603 +TTTGAG 0.000975993307474 0.000501030237923 +CCCCAT 0.000127999122292 0.000151453780181 +TGGTCT 0.000178284491763 0.000216614127468 +TTGGGT 0.000242284052909 0.000215733582234 +CACTAG 6.85709583706e-06 0.000155856506349 +CACTAA 1.37141916741e-05 0.000268566296251 +CACTAC 0.000319997805729 0.000150573234947 +CCCCAA 0.000100570738944 0.00013912614691 +CCCCAC 8.68565472694e-05 0.000127679058873 +CCCCAG 0.000228569861235 0.000120634697004 +CACTAT 0.000178284491763 0.000207808675132 +CTATCG 2.05712875112e-05 3.25801736435e-05 +TCGGTT 0.000100570738944 0.000105665428033 +TTGGGA 0.000265141039033 0.000237747213074 +TTGGGC 0.000242284052909 0.000147931599246 +CTCGAG 0.000180570190376 9.86210661642e-05 +TAGTGG 4.5713972247e-06 0.000123276332705 +TTTCTG 0.000603424433661 0.000721166546325 +TGCAGC 0.000335997696016 0.000366306817181 +TGCCTC 0.000150856108415 0.000200764313263 +TGCCTA 6.39995611459e-05 0.000103904337566 +TGCAGG 0.000217141368173 0.000360143000546 +TTTCCG 6.62852597582e-05 0.000108307063734 +GTGGCC 0.000683423885093 0.000143528873078 +TCAGAC 0.000546281968352 0.000335487734005 +TTTCTT 0.000230855559848 0.000789849074547 +TGCCTT 0.000150856108415 0.000289699381857 +TGCAGT 0.000265141039033 0.000452600250075 +TAGTGC 0.0 0.000141767782611 +GTGAGC 0.000299426518218 0.000209569765599 +AAGGAA 0.0007771375282 0.000410334078862 +AAGGAC 0.00084570848657 0.00024391102971 +AAGGAG 0.0015359894675 0.000302907560361 +ATATGC 8.68565472694e-05 0.000287057746156 +GTTCCA 0.000278855230707 0.000184914499058 +ATATGA 1.37141916741e-05 0.000376873359985 +TACGAA 0.000143999012578 8.45323424264e-05 +ATATGG 0.000107427834781 0.000195481041861 +AAGGAT 0.000642281310071 0.000184914499058 +CTATCA 8.68565472694e-05 0.000162900868218 +TGGTTT 0.000269712436258 0.000460525157177 +ATATGT 7.08566569829e-05 0.000375112269517 +GGTCCC 0.000196570080662 8.71739781273e-05 +GCCCAG 0.000477711009982 0.000143528873078 +CCACTG 0.000354283284915 0.000344293186341 +GCCCAA 0.000171427395926 0.000110068154201 +GGTCCG 0.000100570738944 4.4907806914e-05 +AGGTCT 0.000132570519516 0.000199003222796 +TGTGTC 0.000223998464011 0.000335487734005 +AGACCC 0.000230855559848 0.000152334325414 +AGACCA 0.000217141368173 0.000257999753447 +AGACCG 0.000102856437556 9.42183399961e-05 +CTTCGC 0.000125713423679 7.92490710248e-05 +ATGCAA 0.000228569861235 0.000457883521476 +AGGTCC 7.31423555953e-05 9.6859975697e-05 +AGGTCA 0.000105142136168 0.000269446841484 +AGGTCG 4.5713972247e-05 6.5160347287e-05 +GGTCCT 0.0003885687641 0.000120634697004 +TCCGGA 0.000100570738944 6.86825282215e-05 +CTTAGC 6.39995611459e-05 8.62934328936e-05 +ATTGTG 0.000564567557251 0.000438511526337 +CTTAGA 6.39995611459e-05 0.000117993061304 +ATGCAC 0.000287998025156 0.000300265924661 +CTTAGG 4.79996708594e-05 7.74879805576e-05 +ATTGTC 0.000242284052909 0.000240388848775 +ATTGTA 0.000150856108415 0.000379514995685 +CGCTAC 0.00031085501128 6.69214377543e-05 +ATTTTG 0.000324569202954 0.00072468872726 +CGCTAA 4.5713972247e-06 7.39657996231e-05 +ATTTTA 0.000171427395926 0.00120810806051 +CGCTAG 0.0 4.75494426149e-05 +ATTTTC 0.000287998025156 0.000643678565768 +GAGGTC 0.000580567447538 0.00016113977775 +TAGCAG 4.5713972247e-06 0.000206928129898 +GATACT 0.000150856108415 0.000146170508779 +TAGCAC 0.0 0.000133842875509 +TAGCAA 0.0 0.000176989591955 +GAGGTT 0.000525710680841 0.000172586865787 +ATTTTT 0.000207998573724 0.00135956184069 +GCCTTG 0.000226284162623 0.000162020322984 +GTGGGG 0.000207998573724 0.000143528873078 +GCCTTA 9.82850403312e-05 0.000133842875509 +TTGACA 0.000173713094539 0.000407692443161 +CGCTAT 0.000146284711191 7.13241639223e-05 +GATACG 7.771375282e-05 5.54743497173e-05 +TAGCAT 0.0 0.00020252540373 +GATACC 9.37136431064e-05 6.5160347287e-05 +GATACA 0.00019428438205 0.000188436679992 +TGAGCT 2.28569861235e-06 0.000375112269517 +GTCACC 0.000463996818308 0.00017082577532 +GATCAT 0.000290283723769 0.000277371748587 +TGGCAC 0.000157713204252 0.000178750682423 +AAGACA 0.000383997366875 0.000481658242784 +AAGACC 0.000598853036436 0.000219255763169 +ACAATG 0.000239998354297 0.000381276086153 +AAGACG 0.000292569422381 0.000156737051582 +CCACTT 0.000164570300089 0.000328443372136 +TGAGCG 2.28569861235e-06 0.000157617596816 +GATCAC 0.000322283504342 0.000196361587095 +GATCAA 0.00027199813487 0.000263283024849 +AAGACT 0.00046628251692 0.000280893929521 +GAAGGG 0.000290283723769 0.000206047584664 +CTTCCA 0.000340569093241 0.000226300125037 +GCGGTA 4.11425750223e-05 5.54743497173e-05 +GCGGTG 0.00034971188769 0.00012679851364 +CGGTGT 6.39995611459e-05 0.000132962330275 +TCGTTA 2.74283833482e-05 8.1010161492e-05 +TCGTTC 9.14279444941e-05 8.62934328936e-05 +CTTCCC 0.000246855450134 0.000153214870648 +ACTTGT 0.000111999232005 0.000316115738866 +TTGGAG 0.000555424762802 0.000346054276808 +TGTGTG 0.000626281419785 0.0013886198334 +GCGGTT 0.000150856108415 8.71739781273e-05 +ACTTGC 0.000100570738944 0.000154975961115 +ATGTGA 3.42854791853e-05 0.000453480795308 +ACTTGA 2.28569861235e-06 0.000294982653259 +CGGTGG 5.25710680841e-05 0.000109187608967 +ACTTGG 6.62852597582e-05 0.000176989591955 +CGGTGA 6.85709583706e-06 0.0001074265185 +CGGTGC 2.05712875112e-05 9.15767042953e-05 +TGCCGC 0.000130284820904 0.000103904337566 +TTCCTA 0.000127999122292 0.000155856506349 +TAGACT 0.0 0.000175228501488 +TTCCTC 0.000555424762802 0.000380395540919 +ATGTGC 0.000173713094539 0.000302027015128 +TGGGTT 0.000141713313966 0.000273849567652 +TTCCTG 0.000891422458817 0.000388320448021 +TCCTTT 0.000207998573724 0.000365426271948 +GACCCA 0.000399997257162 0.000152334325414 +TTCCGT 0.000123427725067 8.27712519592e-05 +TAGACG 2.28569861235e-06 0.000110948699435 +TTCCTT 0.000242284052909 0.000324040645968 +TAGACC 2.28569861235e-06 9.50988852297e-05 +TAGACA 0.0 0.000198122677562 +TCCTTG 0.000166855998702 0.000175228501488 +TCCTTA 8.4570848657e-05 0.000162900868218 +TCCTTC 0.000411425750223 0.00025623866298 +ATCTCT 0.000406854352999 0.000313474103165 +TCCGCA 9.82850403312e-05 9.24572495289e-05 +ACCGTT 0.000107427834781 9.06961590617e-05 +TGGCGC 6.85709583706e-05 9.95016113978e-05 +TTTACG 6.39995611459e-05 0.00012679851364 +TGGCGA 3.42854791853e-05 0.000105665428033 +GGCCAT 0.000159998902865 0.000171706320554 +ATCTCG 0.000125713423679 8.62934328936e-05 +ATCTCA 0.000278855230707 0.000302907560361 +ATCTCC 0.000445711229409 0.000250074846345 +GGCCAA 0.000189712984825 0.000154095415882 +GGCCAC 0.000226284162623 0.000174347956255 +ACCGTG 0.000207998573724 8.27712519592e-05 +ACCGTA 7.31423555953e-05 5.45938044837e-05 +GGCCAG 0.00045713972247 0.000214853037001 +ACCGTC 0.000262855340421 9.42183399961e-05 +AGAATT 0.000127999122292 0.000369828998116 +GGCAGT 0.000441139832184 0.000165542503918 +CCATAA 0.0 0.000214853037001 +CAAAGG 0.000157713204252 0.00027561065812 +CCATAC 0.000219427066786 0.0001074265185 +CAAAGA 0.000228569861235 0.000454361340542 +GAAACG 0.000180570190376 0.0002122114013 +CAAAGC 0.00027199813487 0.000311713012697 +CAACGT 4.34282736347e-05 8.71739781273e-05 +GGCAGA 0.000319997805729 0.000300265924661 +GGCAGC 0.000587424543375 0.00020252540373 +CGTGAG 0.000269712436258 0.000113590335136 +GGCAGG 0.000217141368173 0.000184914499058 +CAACGC 5.02853694718e-05 0.000105665428033 +CAAAGT 0.000187427286213 0.000304668650829 +CAACGA 4.5713972247e-05 8.541288766e-05 +CTAACC 0.000102856437556 0.000100382156631 +CAACGG 5.48567666965e-05 5.72354401846e-05 +CGTGAA 0.00019428438205 0.000108307063734 +GGGGGT 0.00015542750564 0.000101262701865 +TACGCT 0.000157713204252 7.39657996231e-05 +GAAACC 0.000404568654386 0.000264163570083 +CGATCT 9.14279444941e-05 6.25187115862e-05 +CTCTAG 1.59998902865e-05 0.000140887237377 +TGAGCC 0.0 0.000179631227656 +CTCTAA 1.59998902865e-05 0.000204286494197 +CTCTAC 0.000482282407206 0.000160259232517 +GGGGGG 3.19997805729e-05 0.000169064684853 +CGATCC 6.17138625335e-05 7.6607435324e-05 +CGATCA 5.71424653088e-05 0.000103023792332 +GGGGGC 0.000139427615353 7.92490710248e-05 +CGATCG 2.74283833482e-05 3.52218093443e-05 +GGGGGA 0.000166855998702 0.000213091946533 +TACGCG 3.65711777976e-05 2.55358117747e-05 +CTCTAT 0.000253712545971 0.000163781413451 +TTCTGA 2.51426847359e-05 0.000423542257366 +CAGACC 0.000516567886392 0.000233344486906 +CAGACA 0.000411425750223 0.000504552418858 +CAGACG 0.000361140380752 0.000191958860927 +TGCGTG 0.000143999012578 0.000147051054013 +TAGTTG 0.0 0.000197242132328 +GCTAAT 0.000221712765398 0.000242149939242 +TAGTTC 0.0 0.000184914499058 +TAGTTA 0.0 0.000230702851205 +CAGACT 0.000390854462712 0.000354859729144 +GCTAAG 0.000251426847359 0.000103904337566 +TAGTTT 2.28569861235e-06 0.000484299878485 +GCTAAC 0.000207998573724 0.000117993061304 +GCTAAA 0.000521139283616 0.000292341017558 +TAAGTG 0.0 0.000232463941673 +TGCTGC 0.000294855120993 0.000424422802599 +AACCCT 0.00041828284606 0.000241269394009 +TATCAA 0.000123427725067 0.000267685751017 +AACCCC 0.000415997147448 0.000153214870648 +AACCCA 0.000443425530796 0.000293221562792 +AACCCG 0.000189712984825 6.60408925206e-05 +TGCATC 0.000374854572426 0.000311713012697 +AGATTT 0.00015542750564 0.000557385132874 +ACGTAA 2.28569861235e-06 0.000103023792332 +AACTGG 0.000441139832184 0.000237747213074 +AACTGA 3.19997805729e-05 0.000530968775866 +AACTGC 0.000399997257162 0.000328443372136 +CCCAGA 0.000326854901566 0.000200764313263 +CCCAGC 0.000459425421083 0.000222777944103 +CCCAGG 0.000150856108415 0.000142648327845 +AACTGT 0.000301712216831 0.000398006445591 +AGTTTT 0.000223998464011 0.000686825282215 +ACGTAT 0.000109713533393 8.62934328936e-05 +CCCAGT 0.00041828284606 0.000140006692144 +CTCCAA 0.000143999012578 0.000253597027279 +TTGACG 0.000127999122292 0.000191078315693 +TATAAT 0.000175998793151 0.000454361340542 +CGCATA 7.771375282e-05 5.98770758854e-05 +ATACTG 0.000212569970949 0.000261521934382 +CGCATC 0.000381711668263 8.80545233609e-05 +ATACTA 6.17138625335e-05 0.000167303594386 +ATACTC 8.91422458817e-05 0.000133842875509 +CGCATG 0.000338283394628 0.000131201239808 +GCGCAA 7.31423555953e-05 0.000108307063734 +ATACTT 9.59993417188e-05 0.00026592466055 +CGCATT 0.000201141477887 0.000118873606537 +GCGCAC 0.000125713423679 0.000127679058873 +GCTCCG 0.000166855998702 0.000110948699435 +TATTGT 7.54280542076e-05 0.000486941514186 +GCTCCC 0.000157713204252 0.000124156877939 +GCTCCA 0.000527996379453 0.000292341017558 +CCTACT 0.000164570300089 0.000103023792332 +TGTACT 6.85709583706e-05 0.000240388848775 +TTTAGT 0.000118856327842 0.000438511526337 +TATTGG 8.22851500447e-05 0.000221897398869 +GCTCCT 0.000468568215532 0.000238627758308 +GCGCAG 0.000299426518218 0.000134723420742 +GCTGGG 0.000274283833482 0.000190197770459 +TTTAGG 7.08566569829e-05 0.00030995192223 +TGTACG 4.79996708594e-05 9.24572495289e-05 +CCTACG 4.34282736347e-05 3.52218093443e-05 +TGTACA 8.4570848657e-05 0.000337248824472 +CCTACA 0.000187427286213 0.000199003222796 +TGTACC 6.39995611459e-05 0.00012944014934 +CCTACC 9.82850403312e-05 7.04436186887e-05 +TTCGCG 5.25710680841e-05 3.34607188771e-05 +TTCGCA 0.000114284930618 0.000102143247099 +TTCGCC 0.000322283504342 6.42798020534e-05 +AGTATC 0.000118856327842 0.000154095415882 +GACTAA 2.28569861235e-05 0.000186675589525 +AGTATA 7.99994514323e-05 0.000206047584664 +GACTAC 0.000585138844762 0.000112709789902 +AGTATG 0.000173713094539 0.000177870137189 +TTTGGA 0.000557710461414 0.000478136061849 +GACTAG 9.14279444941e-06 0.000110948699435 +TTCGCT 0.000201141477887 0.000120634697004 +GTGTAT 0.000347426189078 0.000300265924661 +GACTAT 0.000367997476589 0.000117993061304 +AGTATT 0.000125713423679 0.000301146469894 +TTACTC 9.14279444941e-05 0.000236866667841 +GGTCTT 0.000246855450134 0.000169064684853 +TTACTG 0.000244569751522 0.000339890460173 +ACCTTG 0.000123427725067 0.000194600496628 +ACCTTC 0.000489139503043 0.000181392318123 +ACCTTA 7.99994514323e-05 0.000173467411021 +TTACTT 5.48567666965e-05 0.000407692443161 +ACCTTT 0.000290283723769 0.000353979183911 +ACAGGG 0.000212569970949 0.000162900868218 +CTGGGA 0.000587424543375 0.000217494672701 +CTGGGC 0.000674281090644 0.000172586865787 +ACAGGC 0.000299426518218 0.000230702851205 +ACAGGA 0.000493710900268 0.000375992814751 +CTGGGG 0.000262855340421 0.000154095415882 +CAGGAC 0.000916565143553 0.00025623866298 +GCAGCT 0.000614852926723 0.000390081538489 +CAGGAA 0.000694852378155 0.000398886990825 +TCGCTT 5.71424653088e-05 0.000112709789902 +AGGACC 0.000134856218129 0.000111829244668 +AGGACA 0.000207998573724 0.000382156631386 +GCGCAT 5.48567666965e-05 8.89350685945e-05 +TTATAC 5.48567666965e-05 0.000259760843915 +ACAGGT 0.000290283723769 0.000253597027279 +CTGGGT 0.000454854023858 0.000184033953824 +TTATAG 4.5713972247e-06 0.000217494672701 +CTCCGA 0.000109713533393 0.000103023792332 +TCGCTG 0.00023314125846 0.000168184139619 +GAGTTG 0.000415997147448 0.000182272863357 +GCAGCG 0.000292569422381 0.000206928129898 +AGGACT 0.00015542750564 0.000220136308402 +GCAGCA 0.000683423885093 0.000452600250075 +GCAGCC 0.000527996379453 0.00019283940616 +CAGGAT 0.000685709583706 0.000320518465034 +TGCTGA 2.74283833482e-05 0.000520402233063 +CCTCAG 0.000653709803133 0.000296743743726 +CCTCAA 0.000338283394628 0.000247433210644 +CCTCAC 0.000265141039033 0.000209569765599 +TATGTT 0.000235426957072 0.000406811897927 +ATGGGC 0.00049599659888 0.000134723420742 +TCTTTG 0.000265141039033 0.000390081538489 +ATGGGA 0.000482282407206 0.000223658489337 +TCTTTA 0.000196570080662 0.000504552418858 +ATGGGG 0.000221712765398 0.000177870137189 +TCTTTC 0.000317712107117 0.000386559357554 +CCTCAT 0.000290283723769 0.000296743743726 +TCTTTT 0.000242284052909 0.000680661465579 +GCTAGC 0.000109713533393 7.13241639223e-05 +TATGTA 0.000109713533393 0.000334607188771 +ATGGGT 0.000367997476589 0.000174347956255 +TATGTC 0.000166855998702 0.000183153408591 +GAAAGA 0.000425139941898 0.000527446594932 +ATCAAG 0.000722280761503 0.000254477572513 +CATGAT 0.000365711777976 0.000330204462603 +ATCAAC 0.000751994843464 0.000252716482046 +GAAAGG 0.000228569861235 0.000261521934382 +ATCAAA 0.000742852049015 0.000584682035116 +GGAACT 0.000287998025156 0.000175228501488 +GTCAGG 0.000162284601477 0.000197242132328 +CATGAG 0.000509710790555 0.000221897398869 +ATCAAT 0.000436568434959 0.000342532095874 +GTCCAA 0.000150856108415 0.000183153408591 +CATGAC 0.000313140709892 0.000272088477185 +CATGAA 0.000361140380752 0.000442033707272 +TGACCT 4.5713972247e-06 0.000279132839054 +GTAGCG 8.4570848657e-05 5.10716235493e-05 +CAAGTT 0.000210284272336 0.000292341017558 +GCGGCT 0.000281140929319 0.000110948699435 +GCTTTA 0.000281140929319 0.000419139531198 +GCTTTC 0.000386283065488 0.000316115738866 +TTTCTC 0.000292569422381 0.000530968775866 +GCGGCG 0.000198855779275 9.15767042953e-05 +CAAGTG 0.000342854791853 0.000238627758308 +GTAGCT 0.000150856108415 0.000165542503918 +TTCTTT 0.000397711558549 0.000686825282215 +GCGGCC 0.000230855559848 9.24572495289e-05 +CAAGTC 0.000205712875112 0.000191078315693 +GCGGCA 0.000123427725067 8.45323424264e-05 +CAAGTA 7.771375282e-05 0.000171706320554 +AACGGT 0.000198855779275 0.000109187608967 +GGGGAA 0.000262855340421 0.000239508303542 +TAATCG 0.0 9.77405209306e-05 +CTTGTG 0.000303997915443 0.000301146469894 +TAATCA 2.28569861235e-06 0.00036366518148 +TAATCC 2.28569861235e-06 0.000184914499058 +TGCGAA 8.22851500447e-05 8.45323424264e-05 +CGGCCC 0.000118856327842 7.39657996231e-05 +AAAATT 0.000297140819606 0.000678019829879 +TTGATA 8.22851500447e-05 0.000290579927091 +CGGCCG 9.59993417188e-05 7.04436186887e-05 +CTTGTA 0.000121142026455 0.000245672120177 +GGTCTG 0.000655995501745 0.0002122114013 +TAATCT 0.0 0.000321399010267 +AGTTTA 0.000175998793151 0.000444675342972 +TGCGAC 0.000166855998702 7.57268900903e-05 +GGTCTC 0.000249141148746 0.000130320694574 +GGAGGC 0.000541710571128 0.000204286494197 +GGTCTA 0.000107427834781 9.06961590617e-05 +AAAATG 0.000614852926723 0.000964197030801 +AAAATC 0.000591995940599 0.000572354401846 +TCAATA 9.59993417188e-05 0.000352218093443 +AAAATA 0.000290283723769 0.00114823098463 +TCGTCA 0.000114284930618 0.000110948699435 +ATCCCT 0.000315426408505 0.000208689220365 +TTCAGG 0.000306283614055 0.000317876829333 +ATATCC 0.000105142136168 0.00017082577532 +TCGTCG 4.34282736347e-05 5.45938044837e-05 +TTTAGC 0.000137141916741 0.000327562826902 +TGCCAA 0.000146284711191 0.000279132839054 +TCGGGA 7.99994514323e-05 6.78019829879e-05 +GAACAA 0.000342854791853 0.000375112269517 +ATCCCG 0.000169141697314 7.74879805576e-05 +GAACAC 0.000319997805729 0.000300265924661 +ATCCCC 0.000212569970949 0.000108307063734 +GAACAG 0.000642281310071 0.000312593557931 +ATCCCA 0.000372568873813 0.000205167039431 +AACGGA 0.000333711997403 0.000127679058873 +CACCAC 0.000217141368173 0.000267685751017 +CACCAA 0.000159998902865 0.000257119208214 +CACCAG 0.000397711558549 0.000323160100734 +AACATT 0.000509710790555 0.000726449817727 +AACGGG 0.000185141587601 7.48463448567e-05 +TCCGTC 0.000185141587601 0.000116231970836 +GAGTTA 0.000171427395926 0.000178750682423 +TCCGTA 5.94281639212e-05 6.33992568198e-05 +ACCAGA 0.000290283723769 0.000314354648398 +CACCAT 0.000153141807028 0.000247433210644 +AACATG 0.00080685161016 0.000499269147456 +AACATA 0.000235426957072 0.000405931352694 +AACATC 0.00100342169082 0.000421781166899 +CAGTCG 0.000221712765398 0.000114470880369 +GCACGG 0.000132570519516 7.74879805576e-05 +GTCAGA 0.000324569202954 0.000347815367275 +GCACGA 0.000125713423679 9.59794304633e-05 +CAGTCC 0.000354283284915 0.000206928129898 +GCACGC 9.59993417188e-05 0.000105665428033 +CAGTCA 0.000278855230707 0.000367187362415 +GCACCA 0.00027199813487 0.000191958860927 +GCAAGC 0.000127999122292 0.000149692689713 +GCACGT 9.82850403312e-05 0.000110068154201 +CAGTCT 0.000518853585004 0.000309071376997 +TAAAGC 0.0 0.000366306817181 +GCAAGG 0.000107427834781 0.000114470880369 +GTCAGT 0.000297140819606 0.000282655019988 +GTATAC 6.85709583706e-05 0.000120634697004 +TTCCCG 0.000102856437556 0.000103023792332 +GTATAA 1.14284930618e-05 0.000235105577374 +GTATAG 1.14284930618e-05 0.000125917968406 +CCGGAA 0.000105142136168 5.81159854182e-05 +CCGGAC 0.000239998354297 8.01296162584e-05 +CCGGAG 0.000356568983527 0.000110948699435 +CGTCTT 0.000109713533393 0.000145289963545 +TATTTG 0.000201141477887 0.000569712766145 +TTCCGG 0.000114284930618 5.10716235493e-05 +TTCCGA 5.25710680841e-05 6.5160347287e-05 +TTCCGC 0.000239998354297 8.80545233609e-05 +GTATAT 8.22851500447e-05 0.000280013384288 +CGTCTC 0.000173713094539 0.000138245601677 +CGTCTA 5.48567666965e-05 9.50988852297e-05 +CGTCTG 0.000461711119695 0.000189317225226 +CCGGAT 0.000150856108415 8.01296162584e-05 +CTCTTC 0.000692566679543 0.000375112269517 +TGTTAT 0.00011657062923 0.000452600250075 +CTCTTA 9.82850403312e-05 0.00022453903457 +CTCTTG 0.000153141807028 0.000235105577374 +CCCGAT 0.000111999232005 4.4907806914e-05 +TACGTA 4.79996708594e-05 5.89965306518e-05 +TACGTC 0.00019428438205 6.5160347287e-05 +TCTGGT 0.000315426408505 0.000250074846345 +CGCGAT 3.65711777976e-05 4.22661712132e-05 +TACGTG 0.000223998464011 8.62934328936e-05 +TGTTAG 2.28569861235e-06 0.00020252540373 +GGTTTC 0.000319997805729 0.000226300125037 +CTCGTG 0.000123427725067 0.000100382156631 +CTCTTT 0.000322283504342 0.000504552418858 +TGTTAA 9.14279444941e-06 0.000473733335681 +CGCGAA 4.34282736347e-05 4.66688973813e-05 +CGCGAC 6.85709583706e-05 4.0505080746e-05 +GATATT 0.000377140271038 0.000452600250075 +TCTGGC 0.000278855230707 0.000235986122607 +TACGTT 8.4570848657e-05 0.000132081785041 +CGCGAG 0.000166855998702 6.42798020534e-05 +AGTCGT 0.000130284820904 8.18907067256e-05 +TTACCT 0.000134856218129 0.000270327386718 +CTCCAT 0.000146284711191 0.000320518465034 +TATGCC 0.000283426627932 9.95016113978e-05 +TTACCG 6.85709583706e-05 8.62934328936e-05 +CTCCAC 0.000223998464011 0.000339890460173 +AGTCGG 0.000118856327842 8.36517971928e-05 +TTACCC 0.000105142136168 0.000145289963545 +AGTCGA 0.000125713423679 9.06961590617e-05 +TTACCA 0.000123427725067 0.000229822305972 +AGTCGC 0.000121142026455 7.92490710248e-05 +GTACCG 5.94281639212e-05 4.13856259796e-05 +CCGACT 9.59993417188e-05 6.16381663526e-05 +GCCTAC 0.000372568873813 9.59794304633e-05 +GCCTAA 2.28569861235e-05 0.000110068154201 +GCCTAG 2.28569861235e-06 7.48463448567e-05 +GTGCGG 0.000210284272336 6.25187115862e-05 +CCGACA 0.000100570738944 8.98156138281e-05 +GATATC 0.000299426518218 0.000147931599246 +CCGACC 0.000105142136168 6.16381663526e-05 +CCGACG 4.11425750223e-05 4.13856259796e-05 +GCCTAT 0.000185141587601 0.000103023792332 +TGACGG 0.0 0.000130320694574 +TGACGT 4.5713972247e-06 0.000213091946533 +CCCGAG 0.000255998244583 5.81159854182e-05 +CTCGTT 7.08566569829e-05 9.77405209306e-05 +CCCGAA 0.000157713204252 9.95016113978e-05 +CGCACA 0.000210284272336 0.000211330856066 +ATACAC 0.000121142026455 0.000376873359985 +CGCACC 0.000244569751522 6.86825282215e-05 +ATACAA 8.68565472694e-05 0.000376873359985 +ACTGCT 0.000436568434959 0.000381276086153 +ATACAG 0.000276569532095 0.000337248824472 +ATTGCC 0.000329140600179 0.000131201239808 +GGGTCC 9.59993417188e-05 8.62934328936e-05 +GAGCGT 0.000283426627932 0.000130320694574 +CTCACT 0.000409140051611 0.000285296655689 +GGGTCT 0.000125713423679 0.000115351425603 +ACTGCA 0.000393140161325 0.000489583149886 +TTTGTC 0.000354283284915 0.00040505080746 +ACTGCC 0.000299426518218 0.000181392318123 +CGCACT 0.000164570300089 8.98156138281e-05 +ATACAT 7.54280542076e-05 0.000381276086153 +ACTGCG 0.000107427834781 0.000130320694574 +CTCACC 0.000553139064189 0.000220136308402 +CTCACA 0.000411425750223 0.000321399010267 +CTCACG 0.000159998902865 9.33377947625e-05 +GTGGGT 0.000340569093241 0.000147931599246 +GTGAGG 0.000226284162623 0.000229822305972 +GATTTT 0.00034971188769 0.000633992568198 +ACAATT 0.000146284711191 0.000409453533628 +CTTGAA 0.000306283614055 0.000316996284099 +TAGTGA 0.0 0.000164661958685 +GTGAGA 0.000235426957072 0.00030995192223 +TCGACG 3.42854791853e-05 6.95630734551e-05 +TCGACA 5.25710680841e-05 0.000111829244668 +TAGAGG 2.28569861235e-06 0.000182272863357 +ACAATA 0.000105142136168 0.000421781166899 +GTGAGT 0.000221712765398 0.000221016853636 +ACAATC 0.00015542750564 0.000266805205783 +GATTTC 0.000498282297493 0.000337248824472 +CCAGTA 0.000134856218129 0.000176109046722 +TCGACT 7.31423555953e-05 9.95016113978e-05 +CTGGTT 0.000493710900268 0.000272969022419 +TTGAGT 0.000164570300089 0.000389200993255 +TCAAAA 0.000262855340421 0.000655125653805 +CGAACG 3.65711777976e-05 6.25187115862e-05 +CGAACC 8.22851500447e-05 6.42798020534e-05 +CGAACA 9.37136431064e-05 0.000125917968406 +CTGGTG 0.00132570519516 0.000285296655689 +CTGGTA 0.000173713094539 0.000151453780181 +CTGGTC 0.000548567666965 0.000182272863357 +CGAACT 7.99994514323e-05 7.6607435324e-05 +TTGCAT 0.000102856437556 0.000402409171759 +GACGAG 0.000868565472694 0.000134723420742 +TTGCAG 0.000356568983527 0.000345173731575 +GAGGGA 0.000619424323947 0.000284416110456 +TTGCAC 0.000173713094539 0.000239508303542 +TTGCAA 0.000148570409803 0.000347815367275 +TCCATC 0.000653709803133 0.000308190831763 +TCGCAT 4.11425750223e-05 0.000109187608967 +GTCGTG 0.000146284711191 9.50988852297e-05 +GTCGTA 4.79996708594e-05 4.93105330821e-05 +GTCGTC 0.000132570519516 9.86210661642e-05 +CCGTTT 0.000166855998702 0.000151453780181 +GAGCAA 0.000484568105819 0.000284416110456 +TTTTTG 0.000217141368173 0.00087350087174 +ATATCG 4.11425750223e-05 9.50988852297e-05 +GTCGTT 6.39995611459e-05 8.89350685945e-05 +TCAGAT 0.000511996489167 0.000434108800169 +TCGCAG 0.000223998464011 0.000110068154201 +GTGCTC 0.000434282736347 0.00023422503214 +TCGCAC 6.39995611459e-05 9.42183399961e-05 +CCGTTG 5.25710680841e-05 8.27712519592e-05 +CCGTTA 3.65711777976e-05 5.81159854182e-05 +CCGTTC 0.000191998683438 7.6607435324e-05 +GATCGG 0.000100570738944 5.45938044837e-05 +AATGCT 0.000438854133572 0.000390081538489 +GGTACG 4.5713972247e-05 4.31467164468e-05 +GGTACA 0.000114284930618 0.000133842875509 +TCCAGG 0.000214855669561 0.000248313755878 +GGTACC 5.71424653088e-05 4.66688973813e-05 +GCGGAC 0.000207998573724 6.95630734551e-05 +CAGGGA 0.000372568873813 0.000221016853636 +TTCAGC 0.000603424433661 0.000477255516616 +TATTTC 0.00031085501128 0.000489583149886 +TATTTA 0.000111999232005 0.000924572495289 +CCCCCT 0.000244569751522 0.000177870137189 +GGTACT 7.99994514323e-05 8.71739781273e-05 +AATGCG 0.000118856327842 0.00012679851364 +AATGCC 0.000393140161325 0.000183153408591 +AATGCA 0.000463996818308 0.000523924413997 +CCCCCC 7.99994514323e-05 0.000122395787472 +CCCCCA 0.000178284491763 0.000136484511209 +TATTTT 0.000207998573724 0.00146874944966 +CCCCCG 8.68565472694e-05 6.16381663526e-05 +CTTGAG 0.000411425750223 0.000206047584664 +ATGATG 0.000726852158728 0.000510716235493 +GAAGTA 0.000230855559848 0.000198122677562 +ATGATA 0.000130284820904 0.000280013384288 +ATGATC 0.000601138735049 0.000272088477185 +TGCAAC 0.000324569202954 0.000235986122607 +TGCAAA 0.000347426189078 0.000556504587641 +TGCAAG 0.000340569093241 0.000247433210644 +GAAGTG 0.000795423117099 0.000315235193632 +ATGATT 0.000365711777976 0.000494866421288 +CTTGAT 0.000301712216831 0.000258880298681 +GTGCTT 0.000306283614055 0.000283535565222 +TTGAGG 0.000207998573724 0.000254477572513 +GCCATT 0.000575996050313 0.00029762428896 +TTGAGC 0.000191998683438 0.00025623866298 +TGCAAT 0.000196570080662 0.000346934822042 +GAGAAT 0.000715423665666 0.000273849567652 +TGTCTT 0.000143999012578 0.000387439902788 +TTCAGA 0.00045713972247 0.000529207685399 +AGTAGC 0.00011657062923 0.000105665428033 +AAGGCC 0.00054399626974 0.000159378687283 +ATAACC 9.37136431064e-05 0.000204286494197 +AAGGCA 0.000397711558549 0.000273849567652 +ATAACA 0.000159998902865 0.000452600250075 +AAGGCG 0.000196570080662 9.59794304633e-05 +ATAACG 5.25710680841e-05 0.000102143247099 +TATGCT 0.000301712216831 0.000264163570083 +GAGGGT 0.000498282297493 0.000165542503918 +TGTCTG 0.000527996379453 0.000526566049698 +GAGAAG 0.00225369883178 0.000361904091013 +GAGAAA 0.00165484579534 0.000587323670817 +TGTCTC 0.000148570409803 0.000263283024849 +GAGAAC 0.00107884974503 0.000236866667841 +TGTCTA 9.14279444941e-05 0.000219255763169 +TGTGGC 0.000237712655685 0.000216614127468 +TGTGGA 0.000450282626633 0.000469330609513 +TGTGGG 0.000201141477887 0.000241269394009 +ATAACT 9.14279444941e-05 0.000258880298681 +ACTGAC 0.000420568544673 0.000328443372136 +GGTATT 0.000191998683438 0.000168184139619 +AGGTAT 0.000125713423679 0.00015849814205 +ACCAAG 0.000658281200357 0.00017082577532 +GAGCAT 0.000329140600179 0.000235986122607 +ACCAAA 0.000671995392032 0.00035133754821 +ATCATC 0.00085485128102 0.000474613880915 +ACCAAC 0.000566853255863 0.0002122114013 +GGTCAC 0.000242284052909 0.000174347956255 +GATCGC 0.000162284601477 6.0757621119e-05 +GGTCAT 0.000276569532095 0.000203405948964 +ACCAAT 0.000365711777976 0.000206047584664 +AGGTAG 6.85709583706e-06 0.0001074265185 +AGGTAA 4.5713972247e-06 0.000221897398869 +AGGTAC 0.000100570738944 0.000124156877939 +TTTTGC 9.82850403312e-05 0.000473733335681 +GCCGTC 0.000281140929319 9.77405209306e-05 +AATTGG 6.17138625335e-05 0.000199883768029 +GTCGGA 0.000127999122292 8.98156138281e-05 +AATTGA 2.28569861235e-06 0.00036366518148 +ATCATG 0.00069028098093 0.000377753905218 +AATTGC 3.885687641e-05 0.000198122677562 +TAGCCA 0.0 0.000137365056443 +TGGGGT 0.000146284711191 0.000172586865787 +CGCTCG 0.000102856437556 7.57268900903e-05 +GATAAT 0.000297140819606 0.000214853037001 +CGCTCA 0.000153141807028 0.000141767782611 +CGCTCC 0.000313140709892 0.00011711251607 +AATTGT 7.31423555953e-05 0.000440272616804 +AATCTT 0.000230855559848 0.000370709543349 +TCTTAT 0.000171427395926 0.000244791574943 +GTTATG 0.000159998902865 0.000247433210644 +CGCTCT 0.000267426737645 0.000173467411021 +GATAAA 0.000642281310071 0.000399767536058 +TCGAAG 0.000102856437556 8.541288766e-05 +GATAAC 0.000331426298791 0.000172586865787 +TAGCCT 0.0 0.000154975961115 +TGGGGC 0.000169141697314 0.000125917968406 +GATAAG 0.000482282407206 0.00012679851364 +GATCCT 0.000436568434959 0.000198122677562 +AAGAAT 0.000489139503043 0.000366306817181 +GGGGCT 0.000150856108415 0.000154095415882 +GTTATC 0.000162284601477 0.00016113977775 +AAGAAC 0.000989707499149 0.000309071376997 +GATCCA 0.000409140051611 0.000233344486906 +AAGAAA 0.00165941719257 0.0008541288766 +GATCCC 0.000258283943196 0.000108307063734 +AAGAAG 0.00198170069691 0.000475494426149 +GATCCG 0.000175998793151 0.000103023792332 +GCCCCA 0.000166855998702 9.77405209306e-05 +TATTAA 1.82855888988e-05 0.000708838913055 +CAAGGG 0.000132570519516 0.00012679851364 +CAAGGC 0.00027199813487 0.000146170508779 +GGTTCT 0.000212569970949 0.00015849814205 +CAAGGA 0.000351997586302 0.000177870137189 +GGTTCA 0.000180570190376 0.000179631227656 +GGTTCC 0.000127999122292 9.77405209306e-05 +AAGGCT 0.000619424323947 0.000228061215505 +TGCATT 0.000228569861235 0.00058556258035 +GGTTCG 6.17138625335e-05 4.75494426149e-05 +TAGGCA 0.0 9.6859975697e-05 +ATTATA 9.14279444941e-05 0.000514238416427 +ATTATC 0.000210284272336 0.000257999753447 +TAGAAT 0.0 0.000208689220365 +GATAGT 0.000130284820904 0.000132081785041 +ATTATG 0.000221712765398 0.000405931352694 +AGTCTT 0.000212569970949 0.000266805205783 +TCCCAA 0.000139427615353 0.000213091946533 +TAGAAA 0.0 0.000353098638677 +TAGAAC 0.0 0.000112709789902 +ATTATT 0.000212569970949 0.000972121937904 +TAGAAG 0.0 0.000193719951394 +AGTCTA 0.000130284820904 0.000163781413451 +AGTCTC 0.000169141697314 0.000207808675132 +AGTCTG 0.000564567557251 0.000353979183911 +TAACTC 0.0 0.000174347956255 +TAACTA 2.28569861235e-06 0.000195481041861 +TAACTG 0.0 0.00023422503214 +ATCTAT 0.000265141039033 0.000272969022419 +GCCCCG 0.000123427725067 5.37132592501e-05 +TAACTT 0.0 0.000312593557931 +ATCTAG 9.14279444941e-06 0.000125037423172 +ATCTAC 0.00053485347529 0.000177870137189 +ATCTAA 1.14284930618e-05 0.000294982653259 +GGTCAG 0.000445711229409 0.000249194301111 +CCATGT 0.000162284601477 0.000250074846345 +GCCCCC 0.000159998902865 0.000103023792332 +TGGAGG 0.000283426627932 0.000404170262226 +TGGAGA 0.00027199813487 0.000554743497173 +GGTCAA 0.000221712765398 0.000198122677562 +CCATGC 0.000125713423679 0.00016113977775 +TCCAGA 0.00037942596965 0.000412975714562 +CCATGA 4.5713972247e-06 0.000240388848775 +CCATGG 0.000141713313966 0.000155856506349 +TGGAGT 0.000189712984825 0.000331085007837 +TCAGTA 0.000123427725067 0.000257119208214 +TCAGTC 0.000205712875112 0.000324040645968 +CTCTCT 0.00031085501128 0.000493985876054 +GGGGAT 0.000285712326544 0.000113590335136 +TCAGTG 0.000463996818308 0.000439392071571 +CGATAT 8.68565472694e-05 6.78019829879e-05 +CTCTCG 0.00011657062923 0.000131201239808 +GGGGAC 0.00023314125846 0.000135603965976 +CTCTCC 0.00042742564051 0.000332846098304 +TCAGTT 0.000166855998702 0.000448197523907 +CTCTCA 0.000237712655685 0.00028793829139 +CGATAG 2.28569861235e-06 4.40272616804e-05 +CGATAA 1.37141916741e-05 8.80545233609e-05 +ATGCTG 0.000966850513025 0.000434108800169 +CGATAC 0.000166855998702 5.72354401846e-05 +TTCACT 0.000461711119695 0.000448197523907 +GTGTTT 0.000829708596284 0.000772238169875 +GCTACT 0.000143999012578 0.000145289963545 +GTGGTT 0.000491425201656 0.000254477572513 +GTGTTC 0.000509710790555 0.000279132839054 +GTGTTA 0.000130284820904 0.000276491203353 +GTGTTG 0.000399997257162 0.000397125900358 +CGTGCC 0.000139427615353 6.5160347287e-05 +GTGGTA 0.00015542750564 0.000131201239808 +GCTACA 0.000210284272336 0.000209569765599 +GTGGTC 0.000511996489167 0.000157617596816 +GCTACC 7.54280542076e-05 6.95630734551e-05 +GTGGTG 0.00117256338814 0.000266805205783 +GCTACG 4.5713972247e-05 3.52218093443e-05 +CGTGCA 0.000141713313966 0.000119754151771 +TTTCGT 0.000107427834781 0.000114470880369 +TGCCCT 0.000157713204252 0.000171706320554 +AGCGTT 0.000130284820904 0.00014881214448 +CGTGCG 3.19997805729e-05 7.04436186887e-05 +CACGGA 0.000164570300089 9.33377947625e-05 +TGCCCA 0.000150856108415 0.000145289963545 +TGCCCG 8.4570848657e-05 6.16381663526e-05 +ACAGTG 0.000559996160026 0.000407692443161 +AGCGTA 4.5713972247e-05 7.22047091559e-05 +AGCGTC 0.000214855669561 0.000110948699435 +GTACGT 3.885687641e-05 6.86825282215e-05 +AGCGTG 0.000230855559848 0.000117993061304 +GGCCCC 0.000198855779275 8.89350685945e-05 +GGCCCA 0.000262855340421 0.000117993061304 +CACGTG 0.000175998793151 0.000156737051582 +GGCCCG 0.000118856327842 6.5160347287e-05 +CACGTA 4.34282736347e-05 7.92490710248e-05 +CACGTC 0.000130284820904 0.000117993061304 +ACGTGC 7.99994514323e-05 9.24572495289e-05 +ACGTGA 4.5713972247e-06 0.00015849814205 +ACGTGG 6.39995611459e-05 0.000106545973267 +AAATGG 0.000322283504342 0.000427944983534 +CACGTT 8.22851500447e-05 0.000133842875509 +GGGGCC 0.000178284491763 7.92490710248e-05 +TCTTAA 9.14279444941e-06 0.000364545726714 +GGCCCT 0.000299426518218 0.000108307063734 +GGGACA 0.000223998464011 0.000191958860927 +GGTATA 3.885687641e-05 0.0001074265185 +GGGACC 0.000137141916741 8.62934328936e-05 +TCGTGG 6.85709583706e-05 6.16381663526e-05 +ACGTGT 9.59993417188e-05 0.000167303594386 +TCTTAG 2.28569861235e-06 0.00012944014934 +GCTTCC 0.00019428438205 0.000136484511209 +AGAGAG 0.00084570848657 0.000708838913055 +AGAGAA 0.000511996489167 0.00057587658278 +AGAGAC 0.000626281419785 0.000323160100734 +TCGTGA 9.14279444941e-06 8.18907067256e-05 +GTCTTT 0.000255998244583 0.000334607188771 +TCTATC 0.000159998902865 0.000160259232517 +TCTATG 0.000219427066786 0.000150573234947 +AGAGAT 0.000422854243285 0.000348695912509 +TTCACA 0.000482282407206 0.000567951675678 +GTCTTG 0.000164570300089 0.000195481041861 +TCTATT 0.000148570409803 0.000300265924661 +GTCTTC 0.000425139941898 0.000267685751017 +GTCTTA 9.59993417188e-05 0.000169945230086 +GCAGAG 0.00120913456593 0.000402409171759 +GAAGGC 0.000461711119695 0.000201644858496 +GCTCAC 0.000267426737645 0.000213972491767 +GCAGAC 0.000617138625335 0.000276491203353 +GCAGAA 0.000710852268442 0.000471972245214 +GCTCAG 0.000861708376857 0.000289699381857 +ATGCGT 0.000166855998702 0.000114470880369 +TTGCTG 0.000420568544673 0.000401528626526 +CTTGAC 0.000255998244583 0.000208689220365 +TTGCTA 8.4570848657e-05 0.000199003222796 +TTGCTC 0.000159998902865 0.000260641389148 +GCAGAT 0.00065142410452 0.000358381910079 +GCTCAT 0.000313140709892 0.00026592466055 +GTTTGC 0.000137141916741 0.000311713012697 +TTGCTT 0.000189712984825 0.000420900621665 +ATGCGG 0.00011657062923 9.15767042953e-05 +GAGAGG 0.000623995721172 0.000369828998116 +TAGCCG 0.0 5.72354401846e-05 +ATGCGC 0.000214855669561 0.0001074265185 +ATGCGA 0.000118856327842 0.000110948699435 +TGTAAG 0.000253712545971 0.000237747213074 +GGCTAG 2.28569861235e-06 8.18907067256e-05 +TGTAAC 0.000182855888988 0.00027825229382 +GGCTAC 0.000367997476589 9.59794304633e-05 +GACTGT 0.0003885687641 0.00029762428896 +GGCTAA 9.14279444941e-06 0.000156737051582 +ACTGTA 0.000221712765398 0.000388320448021 +TTGGTA 8.22851500447e-05 0.00013912614691 +ACTGTC 0.000285712326544 0.000222777944103 +TTGGTC 0.000187427286213 0.000205167039431 +TTCGAC 0.000265141039033 8.80545233609e-05 +ACTGTG 0.000525710680841 0.000365426271948 +AATCAA 0.000235426957072 0.000513357871194 +GACTGC 0.000548567666965 0.000267685751017 +GGCTAT 0.000255998244583 0.000156737051582 +GACTGA 6.62852597582e-05 0.000403289716993 +GACTGG 0.000553139064189 0.00027561065812 +TGTAAT 0.000137141916741 0.000469330609513 +TGTGGT 0.000230855559848 0.000349576457743 +TTGGTT 0.000182855888988 0.000323160100734 +ACTGTT 0.000351997586302 0.000425303347833 +GTTTCT 0.000260569641808 0.00044907806914 +ATAATT 9.14279444941e-05 0.000549460225772 +CCTTTT 0.00019428438205 0.000455241885776 +GTTTCG 6.39995611459e-05 8.80545233609e-05 +GTTTCC 0.000175998793151 0.000290579927091 +GTTTCA 0.000230855559848 0.000400648081292 +ATAATG 0.000153141807028 0.000409453533628 +CCTTTA 0.000171427395926 0.000359262455312 +CCTTTC 0.000246855450134 0.000208689220365 +ATAATC 0.000150856108415 0.000274730112886 +ATAATA 5.48567666965e-05 0.000579398763714 +CCTTTG 0.000207998573724 0.000258880298681 +ACAGAA 0.000610281529498 0.000590845851751 +ACAGAC 0.00066056689897 0.000409453533628 +GCCCCT 0.000244569751522 9.86210661642e-05 +ACAGAG 0.000980564704699 0.000471972245214 +TTATCT 8.68565472694e-05 0.000280893929521 +CTGGAC 0.00133713368823 0.000280893929521 +CTGGAA 0.000758851939301 0.000427944983534 +CTGGAG 0.00258512513057 0.000533610411567 +ACAGAT 0.000541710571128 0.00044907806914 +GTGAAG 0.00119999177148 0.000389200993255 +TGGGGA 0.000175998793151 0.000176989591955 +CCTTGC 0.000143999012578 0.000144409418312 +TTATCA 9.37136431064e-05 0.000390081538489 +CTGGAT 0.0010354214714 0.000389200993255 +TTATCC 0.000105142136168 0.000185795044291 +CCAATG 0.000338283394628 0.000188436679992 +AAACCG 0.000159998902865 0.000127679058873 +AAACCA 0.000658281200357 0.000476374971382 +CCAATC 0.000180570190376 0.000165542503918 +AAACCC 0.000546281968352 0.000313474103165 +CCAATA 7.99994514323e-05 0.000184914499058 +TGTCGG 8.91422458817e-05 9.86210661642e-05 +GAGATC 0.00122970585345 0.000232463941673 +AACGAG 0.000473139612757 0.000123276332705 +GAGATA 0.000210284272336 0.00017082577532 +AACGAA 0.000217141368173 0.000137365056443 +GAGATG 0.00116799199091 0.000374231724284 +AACGAC 0.000374854572426 0.000117993061304 +AAACCT 0.000518853585004 0.000374231724284 +CCAATT 0.000118856327842 0.000206928129898 +ATTTGC 0.000105142136168 0.00038303717662 +GTCGAT 8.4570848657e-05 6.78019829879e-05 +ATTTGG 0.000105142136168 0.000312593557931 +AACGAT 0.000196570080662 0.00012679851364 +TGTCGT 7.771375282e-05 0.00014881214448 +CCCTAC 0.000210284272336 6.69214377543e-05 +GAGATT 0.000681138186481 0.000302907560361 +CCCTAA 4.5713972247e-06 0.000140887237377 +TACAAC 0.000571424653088 0.000232463941673 +CATTAT 0.000201141477887 0.000448197523907 +CTACAC 9.14279444941e-05 0.000222777944103 +TGGATT 0.000219427066786 0.000422661712132 +ATCACA 0.000598853036436 0.000440272616804 +ATCACC 0.000605710132273 0.000221897398869 +CATGCT 0.000287998025156 0.00032932391737 +TGACAC 2.28569861235e-06 0.000225419579804 +ATCACG 0.000187427286213 0.000106545973267 +TGGATA 8.4570848657e-05 0.000309071376997 +CATTAG 4.5713972247e-06 0.000260641389148 +CATTAA 1.59998902865e-05 0.000459644611944 +CATTAC 0.000235426957072 0.000285296655689 +CATGCA 0.000265141039033 0.000403289716993 +TGACAT 2.28569861235e-06 0.000434108800169 +CATGCC 0.000210284272336 0.000179631227656 +ATCACT 0.000523424982229 0.000282655019988 +CATGCG 7.31423555953e-05 0.000117993061304 +GTAGAA 0.000164570300089 0.000194600496628 +GTAGAC 0.000205712875112 0.000110948699435 +GTAGAG 0.000322283504342 0.000140887237377 +ACAACA 0.000338283394628 0.000678019829879 +ACAACC 0.000146284711191 0.000213091946533 +GATTGC 0.000111999232005 0.000155856506349 +ACAACG 7.31423555953e-05 0.000104784882799 +ACTAAG 0.000203427176499 0.000160259232517 +GTAGAT 0.000249141148746 0.000164661958685 +TGCTAC 0.000226284162623 0.000125037423172 +GGGTAC 0.000111999232005 5.98770758854e-05 +TGATGC 4.5713972247e-06 0.000308190831763 +GATTGT 0.000148570409803 0.000293221562792 +TTCAGT 0.000450282626633 0.000518641142595 +ACAACT 0.000187427286213 0.00033196555307 +GGGTAA 4.5713972247e-06 0.000130320694574 +TTGCCT 0.000180570190376 0.000247433210644 +TAATAG 0.0 0.000210450310832 +AGTCCG 9.59993417188e-05 6.86825282215e-05 +TAATAC 0.0 0.000221016853636 +GTCCGA 0.00015542750564 6.69214377543e-05 +TAATAA 0.0 0.000759910536604 +GTCCGC 0.000148570409803 6.42798020534e-05 +TGTTTA 9.59993417188e-05 0.000845323424264 +TCAACA 0.000228569861235 0.000443794797739 +CACATC 0.000502853694718 0.000357501364845 +GTCCGT 9.14279444941e-05 7.83685257912e-05 +TAATAT 0.0 0.000545938044837 +ATTCGC 0.00015542750564 8.89350685945e-05 +TTCTCC 0.000575996050313 0.000375112269517 +ATTCGA 0.000157713204252 8.541288766e-05 +TAACGA 0.0 8.45323424264e-05 +ATTCGG 9.14279444941e-05 6.69214377543e-05 +TTCTCG 8.91422458817e-05 0.000115351425603 +GAGCTA 0.000205712875112 0.000162900868218 +TCGTAC 0.000114284930618 5.37132592501e-05 +TACCTT 0.000180570190376 0.000189317225226 +TCGTAA 4.5713972247e-06 9.24572495289e-05 +TCGTAG 4.5713972247e-06 4.57883521476e-05 +GAACCT 0.000290283723769 0.000124156877939 +ATCCAA 0.000203427176499 0.000272969022419 +ATCCAC 0.00041828284606 0.000236866667841 +TGAACC 0.0 0.000198122677562 +TTCTCT 0.000445711229409 0.000457883521476 +ATCCAG 0.000836565692121 0.00033196555307 +GAACCC 0.000260569641808 0.000140006692144 +TTATAT 7.31423555953e-05 0.000604934575489 +GAACCA 0.000326854901566 0.000182272863357 +GAACCG 0.000107427834781 9.24572495289e-05 +TACCTC 0.000313140709892 0.000162900868218 +TCGTAT 9.59993417188e-05 8.62934328936e-05 +ACCATC 0.000799994514323 0.000185795044291 +ACCATA 0.000210284272336 0.000149692689713 +ACCATG 0.000493710900268 0.00018051177289 +ATGTTG 0.000367997476589 0.000425303347833 +AGCTCT 0.000452568325246 0.000339890460173 +TGTCCT 0.000230855559848 0.000311713012697 +AGTGGT 0.000230855559848 0.000189317225226 +TGATGT 6.85709583706e-06 0.000526566049698 +ATGTTT 0.000491425201656 0.000846203969498 +ACCATT 0.000443425530796 0.000281774474755 +AGCTCG 0.000139427615353 0.000117993061304 +AGTGGC 0.000262855340421 0.000176109046722 +AGCTCC 0.000411425750223 0.000251835936812 +AGCTCA 0.000317712107117 0.000370709543349 +TCCATG 0.00053485347529 0.000242149939242 +CAGTAA 2.74283833482e-05 0.000385678812321 +CAGTAC 0.000562281858639 0.000185795044291 +CAGTAG 2.28569861235e-05 0.000183153408591 +TTAATA 9.14279444941e-05 0.000635753658665 +CAGTAT 0.000402282955774 0.000281774474755 +AAAGTA 0.000276569532095 0.000388320448021 +AAAGTC 0.000470853914145 0.000375992814751 +AAAGTG 0.000850279883795 0.000515999506895 +GATCTA 0.000198855779275 0.000153214870648 +GATCTC 0.000354283284915 0.000204286494197 +TGTCCA 0.000221712765398 0.000324040645968 +GATCTG 0.000980564704699 0.000292341017558 +TCCCTT 0.000157713204252 0.000223658489337 +CCGGCC 0.000127999122292 9.33377947625e-05 +ATTAGG 8.22851500447e-05 0.000191078315693 +AAAGTT 0.000399997257162 0.000475494426149 +ACTCTC 0.000242284052909 0.000242149939242 +ATTAGC 0.00011657062923 0.00023422503214 +TCCCTC 0.000210284272336 0.000247433210644 +TGTCCG 0.000121142026455 0.0001074265185 +TCCCTA 6.39995611459e-05 9.95016113978e-05 +TTAATG 0.000173713094539 0.000543296409137 +TCCCTG 0.000258283943196 0.000217494672701 +GATCTT 0.000397711558549 0.000240388848775 +CACAAT 0.000308569312668 0.000418258985964 +AAGTGC 0.000285712326544 0.000254477572513 +TGTTCT 0.000146284711191 0.000415617350263 +AAGTGG 0.000299426518218 0.000265044115316 +TGATTG 4.5713972247e-06 0.000358381910079 +TGCGCA 6.39995611459e-05 0.000147051054013 +TCTGAT 0.000548567666965 0.000430586619235 +TGTATT 0.000114284930618 0.000591726396985 +CGAGGT 0.000105142136168 7.13241639223e-05 +TGTTCA 0.000127999122292 0.000518641142595 +CACAAG 0.000511996489167 0.000296743743726 +TGTTCC 0.000109713533393 0.00023422503214 +AAGTGT 0.000319997805729 0.000398886990825 +CACAAC 0.00049599659888 0.000322279555501 +TGTTCG 7.31423555953e-05 0.000121515242238 +TCTGAA 0.000601138735049 0.000558265678108 +TCTGAC 0.000454854023858 0.000294982653259 +TCTGAG 0.000676566789256 0.000344293186341 +CGCGCC 8.22851500447e-05 8.01296162584e-05 +CGCGCA 5.94281639212e-05 7.74879805576e-05 +GGTAAA 0.000324569202954 0.000289699381857 +CGCGCG 5.02853694718e-05 9.50988852297e-05 +CTCCCT 0.00019428438205 0.000220136308402 +AGCGGG 0.000180570190376 0.000115351425603 +AGCGGA 0.000269712436258 0.000127679058873 +ACCTCT 0.000306283614055 0.000250074846345 +GGTAAG 7.31423555953e-05 8.62934328936e-05 +CGCGCT 7.54280542076e-05 0.000102143247099 +ACCTCC 0.000290283723769 0.000187556134759 +ACCTCA 0.000255998244583 0.000269446841484 +ACCTCG 6.39995611459e-05 5.89965306518e-05 +CTCCCC 0.000114284930618 0.000145289963545 +GCCTCT 0.000333711997403 0.000200764313263 +TAAGCC 2.28569861235e-06 0.000123276332705 +CGGGAG 0.000285712326544 8.98156138281e-05 +CCCATG 0.000420568544673 0.000165542503918 +CGGGAA 0.000148570409803 8.98156138281e-05 +CCCATA 0.00011657062923 0.000119754151771 +CGGGAC 0.000251426847359 7.6607435324e-05 +CCCATC 0.000521139283616 0.000164661958685 +TTGTTG 0.000191998683438 0.000483419333251 +CCTGCT 0.000628567118397 0.000333726643538 +CCGAAT 0.000100570738944 8.18907067256e-05 +CCCATT 0.000306283614055 0.000221016853636 +CGGGAT 0.000191998683438 8.89350685945e-05 +GCCTCG 0.000143999012578 9.33377947625e-05 +GCCTCA 0.000262855340421 0.000210450310832 +TAAGCT 2.28569861235e-06 0.000218375217935 +GCCTCC 0.000363426079364 0.000183153408591 +CCGAAC 0.000114284930618 9.50988852297e-05 +CCTGCG 0.000182855888988 0.000120634697004 +CCGAAA 0.000157713204252 0.000123276332705 +CCGAAG 0.000137141916741 0.000116231970836 +CCTGCC 0.000372568873813 0.000240388848775 +CCTGCA 0.000493710900268 0.000388320448021 +GTACTG 0.000166855998702 0.000191078315693 +CAAGGT 0.000228569861235 0.00014881214448 +CCATTC 0.000239998354297 0.000220136308402 +CCATTA 0.000100570738944 0.000211330856066 +CCATTG 0.000143999012578 0.000243030484476 +TATAAC 0.000201141477887 0.000200764313263 +GATTTG 0.000356568983527 0.000377753905218 +GTACTC 6.62852597582e-05 8.36517971928e-05 +GTCGAA 5.02853694718e-05 6.60408925206e-05 +CCATTT 0.00019428438205 0.000442914252505 +TCAGCG 0.000121142026455 0.00020252540373 +TCTACC 8.22851500447e-05 9.95016113978e-05 +TCTACG 5.48567666965e-05 5.98770758854e-05 +ACTTTC 0.000299426518218 0.000298504834193 +CGCAAC 0.000251426847359 8.541288766e-05 +TGCTTA 6.17138625335e-05 0.000253597027279 +CGCAAA 0.000374854572426 0.000163781413451 +ATACCG 2.51426847359e-05 5.19521687829e-05 +CGCAAG 0.000386283065488 9.15767042953e-05 +ATACCA 0.000143999012578 0.000154975961115 +ATACCC 8.22851500447e-05 7.48463448567e-05 +TCTACT 0.000153141807028 0.00017082577532 +CCGAGT 0.000102856437556 5.89965306518e-05 +ATACCT 0.00011657062923 0.000149692689713 +GATAGG 5.25710680841e-05 5.01910783157e-05 +CGCAAT 0.000132570519516 9.15767042953e-05 +TAGTAA 0.0 0.00022453903457 +TGGGTG 0.000292569422381 0.000189317225226 +TAGTAC 0.0 9.77405209306e-05 +CTCAAG 0.000681138186481 0.0002122114013 +GTTTTG 0.000370283175201 0.000647200746702 +CTCAAA 0.000731423555953 0.000409453533628 +TAGTAG 0.0 0.000128559604107 +CTCAAC 0.000756566240689 0.000240388848775 +GTGTGC 0.00027199813487 0.000302027015128 +GTGTGA 2.05712875112e-05 0.000455241885776 +GTGTGG 0.000340569093241 0.000317876829333 +TAACGG 0.0 9.6859975697e-05 +TTGCGT 0.000111999232005 9.77405209306e-05 +CTCAAT 0.000468568215532 0.000288818836624 +TAGTAT 0.0 0.00016113977775 +GTTCTC 0.000249141148746 0.000240388848775 +GTGTGT 0.000546281968352 0.00129616258387 +ATCTTT 0.000313140709892 0.000435869890636 +GTTGGG 0.000153141807028 0.000155856506349 +GTTGGA 0.000409140051611 0.000244791574943 +GTTGGC 0.000265141039033 0.000138245601677 +CCTTGG 7.54280542076e-05 0.000142648327845 +CCTTGA 1.37141916741e-05 0.000183153408591 +GGGAAA 0.000514282187779 0.000407692443161 +AGGATT 0.000207998573724 0.00030995192223 +GGACGT 0.000109713533393 0.000111829244668 +GTTGGT 0.000306283614055 0.000187556134759 +ATCTTG 0.000258283943196 0.000252716482046 +GAGAGT 0.000518853585004 0.000214853037001 +ATCTTA 0.000150856108415 0.000233344486906 +ATCTTC 0.000674281090644 0.000299385379427 +AGGATC 0.000255998244583 0.000209569765599 +AGGATA 0.000114284930618 0.00015849814205 +AGGATG 0.000278855230707 0.000317876829333 +GACATG 0.000909708047716 0.000273849567652 +GACATC 0.00105827845752 0.000257119208214 +GACATA 0.000251426847359 0.00019283940616 +TCGGCG 7.54280542076e-05 5.54743497173e-05 +GAATAT 0.000308569312668 0.00033196555307 +GGTGCA 0.00037942596965 0.000179631227656 +TCGGCC 0.000123427725067 7.04436186887e-05 +ATCAGA 0.000354283284915 0.000431467164468 +TTGCGA 4.34282736347e-05 5.54743497173e-05 +GACATT 0.00069028098093 0.000389200993255 +TTGCGG 0.000102856437556 6.86825282215e-05 +GAATAG 4.5713972247e-06 0.000145289963545 +TCGGCT 0.000148570409803 0.000125037423172 +CATAGA 6.39995611459e-05 0.000154975961115 +GAATAC 0.000413711448836 0.000160259232517 +GAATAA 2.51426847359e-05 0.000361904091013 +TGCCTG 0.000306283614055 0.000253597027279 +CACCGC 0.000146284711191 0.000103023792332 +TGCAGA 0.000265141039033 0.000499269147456 +TTAATT 0.000102856437556 0.000757268900903 +GGACGA 0.00019428438205 0.000119754151771 +TGAGGT 2.28569861235e-06 0.000258880298681 +TTTCTA 0.000157713204252 0.00035133754821 +TTATAA 1.37141916741e-05 0.000508074599792 +GAAATG 0.000603424433661 0.000550340771005 +AATGAT 0.000498282297493 0.000489583149886 +GAAATA 0.000226284162623 0.000511596780727 +GAAATC 0.000546281968352 0.000355740274378 +GAAATT 0.000358854682139 0.000384798267087 +AATGAA 0.000825137199059 0.000773118715108 +AATGAC 0.000635424214234 0.000390081538489 +AATGAG 0.000911993746329 0.000391842628956 +TCAGCA 0.000338283394628 0.000476374971382 +TACTAG 9.14279444941e-06 7.74879805576e-05 +CTTGGA 0.000338283394628 0.00022453903457 +CTTGGC 0.00023314125846 0.000132962330275 +GTATTG 7.99994514323e-05 0.000214853037001 +GTCCTT 0.000182855888988 0.000173467411021 +CTTGGG 0.000141713313966 0.00012944014934 +AGCCCT 0.000281140929319 0.00018051177289 +GGGAAT 0.000198855779275 0.000163781413451 +TTGAAT 0.000196570080662 0.000486941514186 +GTCCTA 5.02853694718e-05 8.541288766e-05 +GTCCTC 0.000235426957072 0.000218375217935 +TGCACG 8.91422458817e-05 0.000109187608967 +CTTGGT 0.000217141368173 0.000169064684853 +GTCCTG 0.000493710900268 0.000253597027279 +TGCACC 0.000287998025156 0.000188436679992 +TTGAAA 0.000331426298791 0.000554743497173 +AGCCCG 0.000125713423679 8.62934328936e-05 +AGCCCA 0.000278855230707 0.000162900868218 +AGCCCC 0.000201141477887 0.000111829244668 +TTAACT 0.000100570738944 0.000353979183911 +ATAAAT 0.000157713204252 0.000862053783703 +TGGTCG 3.885687641e-05 8.1010161492e-05 +TGGTCA 0.000118856327842 0.000266805205783 +TGGTCC 0.000114284930618 0.000149692689713 +ACGATT 6.85709583706e-05 0.000138245601677 +GAGACT 0.000548567666965 0.000226300125037 +GATTCA 0.000356568983527 0.000360143000546 +GAGGAG 0.0030902645239 0.000477255516616 +ATAAAG 0.000313140709892 0.000545938044837 +ATAAAA 0.000180570190376 0.00104960991846 +TCAGCC 0.000292569422381 0.000264163570083 +ATAAAC 0.000217141368173 0.000496627511755 +GAGACG 0.000452568325246 0.000203405948964 +ACGATG 0.000123427725067 0.000142648327845 +CTACAG 0.000406854352999 0.000262402479615 +GAGACC 0.000678852487869 0.000136484511209 +ACGATC 6.62852597582e-05 9.24572495289e-05 +GAGACA 0.000409140051611 0.000306429741296 +ACGATA 4.34282736347e-05 8.18907067256e-05 +GGTTAC 0.000219427066786 8.27712519592e-05 +ATGTGT 0.000262855340421 0.000509835690259 +GGTTAA 9.14279444941e-06 0.000228061215505 +ACCACT 0.000383997366875 0.000233344486906 +GCACTT 0.000237712655685 0.000229822305972 +CTTCCT 0.000484568105819 0.000291460472324 +TAAACT 0.0 0.000457883521476 +CTCAGT 0.00046628251692 0.000271207931951 +TCCGCT 0.000139427615353 0.000140006692144 +ACCACG 0.000118856327842 9.42183399961e-05 +ATGTGG 0.000269712436258 0.000302027015128 +ACCACC 0.000571424653088 0.000182272863357 +GAGGAA 0.00161370322032 0.000506313509325 +ACCACA 0.000459425421083 0.00032932391737 +GGTTAT 0.000244569751522 0.000191078315693 +TCCGCC 0.00019428438205 9.95016113978e-05 +GACGTG 0.000477711009982 0.000132962330275 +TCCGCG 5.71424653088e-05 6.69214377543e-05 +AACGTC 0.000290283723769 0.000150573234947 +AACGTA 6.17138625335e-05 8.80545233609e-05 +AACGTG 0.000329140600179 0.00014881214448 +CGTGAC 0.000182855888988 0.000109187608967 +AGTGTG 0.00046628251692 0.000460525157177 +TCTTCG 6.85709583706e-05 0.0001074265185 +AGTGTA 0.000132570519516 0.000293221562792 +TCTTCA 0.000331426298791 0.000542415863903 +AGTGTC 0.000212569970949 0.000220136308402 +TCTTCC 0.000187427286213 0.000283535565222 +CGTGAT 0.000182855888988 0.000123276332705 +TAAGTC 0.0 0.000147931599246 +AACGTT 0.000150856108415 0.000233344486906 +CCGTGA 4.5713972247e-06 9.95016113978e-05 +TCTTCT 0.000301712216831 0.000427944983534 +AGTGTT 0.000294855120993 0.000486060968952 +GTGCGC 0.000260569641808 0.000120634697004 +TTGGCA 0.000235426957072 0.000251835936812 +AAAGGT 0.000470853914145 0.000323160100734 +TTGGCC 0.000269712436258 0.000179631227656 +GAGTAG 1.14284930618e-05 0.000115351425603 +TTAACG 4.34282736347e-05 0.000132081785041 +TTGGCG 0.000111999232005 8.45323424264e-05 +GACTTC 0.000722280761503 0.000188436679992 +TCGAGT 6.17138625335e-05 9.95016113978e-05 +GACTTA 8.68565472694e-05 0.000143528873078 +GACTTG 0.000395425859937 0.000188436679992 +AAAGGG 0.00027199813487 0.000311713012697 +AAAGGA 0.00081599440461 0.000454361340542 +TTGGCT 0.000347426189078 0.000241269394009 +AAAGGC 0.000617138625335 0.000252716482046 +GAGTAC 0.000585138844762 8.62934328936e-05 +GTATGG 7.54280542076e-05 0.000155856506349 +GACTTT 0.000596567337824 0.000380395540919 +TAAACC 2.28569861235e-06 0.000318757374566 +CAAGAA 0.000411425750223 0.000356620819612 +GACGTT 0.000173713094539 0.00016113977775 +CAAGAC 0.000402282955774 0.000230702851205 +GATTCG 0.000121142026455 7.22047091559e-05 +CAAGAG 0.000537139173903 0.00023422503214 +TTCTTG 0.000249141148746 0.00034165155064 +TTCTTC 0.000758851939301 0.000420900621665 +TTCTTA 0.000102856437556 0.00029762428896 +CAAGAT 0.000308569312668 0.000272088477185 +AGGGCT 0.000143999012578 0.000178750682423 +TACGGG 0.000118856327842 4.31467164468e-05 +TACGGA 0.000189712984825 6.60408925206e-05 +TACGGC 0.000329140600179 6.60408925206e-05 +CTCAGA 0.000354283284915 0.000375992814751 +TAGAGT 0.0 0.000168184139619 +TACGGT 0.000137141916741 4.93105330821e-05 +AGGGCG 9.59993417188e-05 9.6859975697e-05 +AGGGCA 0.000130284820904 0.000223658489337 +AGGGCC 0.000146284711191 0.000143528873078 +TCGAGA 3.885687641e-05 8.45323424264e-05 +CTGTGC 0.000386283065488 0.000308190831763 +TAGAGA 4.5713972247e-06 0.000211330856066 +GGCGAG 0.000306283614055 0.000105665428033 +CCGTGT 0.00011657062923 0.000119754151771 +GTGCTG 0.0010925639367 0.00036102354578 +GTGCTA 0.000146284711191 0.000117993061304 +ATCTGT 0.000317712107117 0.000394484264657 +CGAATT 6.17138625335e-05 8.71739781273e-05 +GCTCGT 0.000203427176499 0.000102143247099 +ATCTGA 2.28569861235e-05 0.000410334078862 +CGAATC 0.000125713423679 6.60408925206e-05 +ATCTGC 0.000441139832184 0.000350457002976 +CGAATA 5.71424653088e-05 8.541288766e-05 +CGAATG 0.000169141697314 0.000125037423172 +ATCTGG 0.000319997805729 0.00027561065812 +ATCATA 0.000205712875112 0.000286177200923 +GATAGC 0.000100570738944 9.24572495289e-05 +AAGCAG 0.00100570738944 0.000464047338112 +GATAGA 0.000118856327842 0.000151453780181 +AAGCAA 0.000322283504342 0.000387439902788 +CAAACT 0.000210284272336 0.000486060968952 +AAGCAC 0.000347426189078 0.000299385379427 +TTTACC 0.000166855998702 0.0003196379198 +CATTTC 0.00027199813487 0.000521282778296 +CTAGGT 5.25710680841e-05 6.69214377543e-05 +CATTTA 0.000125713423679 0.000686825282215 +CATTTG 0.000180570190376 0.0005318493211 +GTTATA 7.31423555953e-05 0.000208689220365 +CAAACA 0.000285712326544 0.000797773981649 +AAGCAT 0.000287998025156 0.000368948452882 +CAAACC 0.000253712545971 0.000316996284099 +CTCGGG 0.000141713313966 7.30852543895e-05 +ATCATT 0.000509710790555 0.000492224785587 +CAAACG 7.08566569829e-05 0.000178750682423 +CTAGGG 6.85709583706e-05 5.28327140165e-05 +GTTATT 0.000205712875112 0.000456122431009 +TGGAGC 0.000198855779275 0.000359262455312 +CTAGGC 0.000107427834781 6.25187115862e-05 +CATTTT 0.000182855888988 0.00109187608967 +CTAGGA 9.37136431064e-05 7.30852543895e-05 +AAGTTT 0.000630852817009 0.000491344240354 +TACAGT 0.000351997586302 0.000424422802599 +CTCGAA 0.000111999232005 0.000116231970836 +CTAACG 3.42854791853e-05 6.86825282215e-05 +CTAAGG 7.08566569829e-05 0.000105665428033 +CTAAGA 9.37136431064e-05 0.000121515242238 +CTAAGC 7.31423555953e-05 0.000112709789902 +TACAGG 0.000290283723769 0.000199003222796 +AAGTTG 0.000297140819606 0.000366306817181 +AAGTTA 0.000143999012578 0.0003196379198 +TACAGC 0.000559996160026 0.000307310286529 +AAGTTC 0.000603424433661 0.000197242132328 +TACAGA 0.000514282187779 0.00033196555307 +CTAAGT 6.85709583706e-05 0.000104784882799 +GTATGA 2.05712875112e-05 0.000211330856066 +GGGGCA 0.000201141477887 9.6859975697e-05 +GGGGCG 0.000107427834781 6.60408925206e-05 +TTCAAC 0.000722280761503 0.000286177200923 +TATATC 0.000134856218129 0.000210450310832 +TATATA 6.62852597582e-05 0.000694750189317 +TGTATA 5.71424653088e-05 0.000388320448021 +TATATG 0.000162284601477 0.000314354648398 +TCCGGT 0.000105142136168 8.98156138281e-05 +GGTCCA 0.000374854572426 0.000134723420742 +TATATT 0.000171427395926 0.000641917475301 +GGACTA 8.22851500447e-05 0.00014881214448 +TCTTAC 0.000180570190376 0.000185795044291 +TGATGG 2.28569861235e-06 0.00040505080746 +CTCGGT 0.000121142026455 8.541288766e-05 +ATGAGT 0.000287998025156 0.000267685751017 +GCCCAC 0.000171427395926 0.000108307063734 +TGATGA 2.28569861235e-06 0.000582920944649 +TGCCAT 0.000191998683438 0.000316115738866 +ACTCTT 0.000281140929319 0.000308190831763 +TGTTTC 0.000189712984825 0.00050014969269 +CACATA 0.000159998902865 0.000324921191202 +TCATAA 2.05712875112e-05 0.000377753905218 +CACATG 0.000518853585004 0.000378634450452 +TGTTTG 0.000276569532095 0.000751985629502 +ATGAGA 0.000306283614055 0.000339890460173 +ATGAGC 0.000514282187779 0.000267685751017 +ATGAGG 0.000411425750223 0.000257119208214 +GTGGCT 0.000610281529498 0.000179631227656 +ACTCTG 0.000655995501745 0.000316115738866 +TGCCAG 0.000331426298791 0.000215733582234 +ACTCTA 0.000164570300089 0.000172586865787 +ATTGTT 0.000308569312668 0.000511596780727 +CACATT 0.00027199813487 0.000486060968952 +TGCCAC 0.000159998902865 0.000191958860927 +AAATTC 0.000313140709892 0.000317876829333 +AAATTA 0.000153141807028 0.000742299631932 +AAATTG 0.000175998793151 0.000461405702411 +TCCACT 0.000372568873813 0.000340771005407 +TTCAAT 0.000319997805729 0.000393603719423 +CTTCGA 0.000111999232005 6.69214377543e-05 +CGTACC 4.11425750223e-05 3.87439902788e-05 +TTAATC 8.91422458817e-05 0.000369828998116 +AAATTT 0.000292569422381 0.000590845851751 +TTCGAA 9.14279444941e-05 9.33377947625e-05 +CTTAGT 7.54280542076e-05 0.000106545973267 +GGGAAC 0.000322283504342 0.000127679058873 +GGTGCG 0.000109713533393 5.45938044837e-05 +GCGTAT 0.000125713423679 6.16381663526e-05 +GGGAAG 0.000553139064189 0.000185795044291 +GGTGCC 0.000301712216831 8.71739781273e-05 +TCGTCC 0.000137141916741 0.000124156877939 +GCCCAT 0.000148570409803 0.000130320694574 +AGAGCG 0.000148570409803 0.000179631227656 +GCGACT 8.22851500447e-05 7.92490710248e-05 +AGAGCC 0.000267426737645 0.000217494672701 +AGAGCA 0.000301712216831 0.000423542257366 +GCGTAG 6.85709583706e-06 4.93105330821e-05 +GGTGCT 0.000450282626633 0.000190197770459 +TCCCCA 0.000166855998702 0.000135603965976 +GCGTAC 0.000164570300089 4.40272616804e-05 +TAATTT 0.0 0.000760791081838 +GCGTAA 2.28569861235e-06 9.6859975697e-05 +GCGACG 6.17138625335e-05 6.0757621119e-05 +AGAGCT 0.000342854791853 0.000370709543349 +GCGACA 7.31423555953e-05 8.27712519592e-05 +TATCGG 7.99994514323e-05 4.22661712132e-05 +GCGACC 7.54280542076e-05 7.13241639223e-05 +TCCGGC 0.00015542750564 8.01296162584e-05 +GCAGGA 0.000639995611459 0.000347815367275 +GCAGGC 0.00031085501128 0.00018051177289 +GCAGGG 0.000223998464011 0.000217494672701 +CAATTT 6.17138625335e-05 0.000457883521476 +GCACTA 8.91422458817e-05 0.000132962330275 +CCGTGC 4.5713972247e-05 7.22047091559e-05 +GCTCGG 0.000109713533393 8.27712519592e-05 +TATTCT 0.000148570409803 0.000325801736435 +CCGTGG 9.59993417188e-05 7.74879805576e-05 +CTTTAG 4.5713972247e-06 0.000268566296251 +GCAGGT 0.000299426518218 0.000268566296251 +CTTCGT 8.68565472694e-05 7.6607435324e-05 +TATTCC 0.000134856218129 0.00025623866298 +CAATTG 4.5713972247e-05 0.000206928129898 +TATTCA 0.000121142026455 0.000420900621665 +CAATTA 6.17138625335e-05 0.000333726643538 +TATTCG 6.39995611459e-05 0.000103023792332 +CAATTC 8.4570848657e-05 0.0002122114013 +TGTAGA 7.08566569829e-05 0.00028793829139 +GGCTCA 0.000255998244583 0.000184033953824 +AGCATT 0.000324569202954 0.000390081538489 +GGCTCC 0.000477711009982 0.000145289963545 +AATCCT 0.000345140490465 0.000260641389148 +TGTAGG 4.5713972247e-05 0.000199003222796 +GGCTCG 0.000132570519516 8.62934328936e-05 +TTCGGA 0.000164570300089 9.33377947625e-05 +TTCGGC 0.00027199813487 8.541288766e-05 +CTTTAC 0.000297140819606 0.000325801736435 +TTCAAG 0.000635424214234 0.000307310286529 +TTCGGG 0.000198855779275 6.5160347287e-05 +AGCATG 0.000516567886392 0.000307310286529 +GGCTCT 0.000436568434959 0.000178750682423 +AGCATC 0.000575996050313 0.000285296655689 +TTTACT 0.000169141697314 0.000586443125583 +AGCATA 0.000143999012578 0.000276491203353 +TTACAT 6.85709583706e-05 0.000498388602222 +AGACCT 0.00019428438205 0.00018051177289 +TTCGGT 0.000132570519516 9.24572495289e-05 +GTTTAT 0.000260569641808 0.000581159854182 +GGGGAG 0.000326854901566 0.000189317225226 +GCACTC 0.000180570190376 0.000157617596816 +GCCTTT 0.000365711777976 0.000272088477185 +TGCTTG 0.000182855888988 0.000314354648398 +GTTTAA 2.05712875112e-05 0.000552101861473 +GAGGTG 0.00124342004512 0.000240388848775 +AACTTG 0.000278855230707 0.000296743743726 +GTTTAG 6.85709583706e-06 0.000266805205783 +GAGGTA 0.000182855888988 0.00016113977775 +CTGGCT 0.000783994624037 0.000271207931951 +AGGCGG 8.68565472694e-05 8.62934328936e-05 +AGGCGA 5.94281639212e-05 0.000111829244668 +AGGCGC 9.14279444941e-05 7.57268900903e-05 +AGGAGT 0.000159998902865 0.000195481041861 +ACAGCC 0.000345140490465 0.000284416110456 +ACAGCA 0.000438854133572 0.000591726396985 +TCCAGC 0.000692566679543 0.000412975714562 +ACAGCG 0.000189712984825 0.000204286494197 +AGGAGC 0.000274283833482 0.000294102108025 +AGGCGT 5.48567666965e-05 0.000103904337566 +AGGAGA 0.000331426298791 0.000394484264657 +CTGGCG 0.000317712107117 9.86210661642e-05 +AGGAGG 0.000244569751522 0.000368067907648 +CTGGCA 0.000500567996105 0.00022453903457 +CTGGCC 0.000934850732452 0.000167303594386 +GCGCCC 8.4570848657e-05 6.0757621119e-05 +GCATCG 6.85709583706e-05 8.27712519592e-05 +ACAGCT 0.000420568544673 0.000420900621665 +GGAAAA 0.000564567557251 0.000595248577919 +AACGCT 0.000253712545971 0.000153214870648 +AATTTT 0.000148570409803 0.000738777450998 +AAACAG 0.000921136540778 0.000733494179596 +TGTCAC 0.000173713094539 0.000289699381857 +AAACAC 0.000580567447538 0.000875261962207 +GCTCTA 0.000180570190376 0.000146170508779 +AAACAA 0.000445711229409 0.000988852297343 +AACGCG 5.94281639212e-05 8.71739781273e-05 +AATTTA 0.000118856327842 0.000648081291936 +AATTTC 0.000217141368173 0.000394484264657 +AACGCC 0.000299426518218 9.06961590617e-05 +TTTCAC 0.000235426957072 0.00044907806914 +ATTGGA 0.000436568434959 0.000291460472324 +AATTTG 0.000207998573724 0.000484299878485 +ATTTAC 0.000274283833482 0.000479016607083 +ATTTAA 1.37141916741e-05 0.00089551450258 +ATTTAG 4.5713972247e-06 0.000400648081292 +CCCTCG 0.000107427834781 9.06961590617e-05 +TAAATA 0.0 0.000935139038092 +GGAAAG 0.000772566130975 0.000345173731575 +TGACCA 9.14279444941e-06 0.000283535565222 +CATTCT 0.000134856218129 0.00032932391737 +TAAATC 4.5713972247e-06 0.000432347709702 +TGACCG 4.5713972247e-06 9.42183399961e-05 +GCGCCA 6.39995611459e-05 0.000115351425603 +CATTCG 6.62852597582e-05 9.42183399961e-05 +CATTCC 0.000118856327842 0.000215733582234 +CATTCA 0.000185141587601 0.000511596780727 +TCCATA 0.000143999012578 0.000227180670271 +GCCTTC 0.000566853255863 0.000147931599246 +ACAAAT 0.000207998573724 0.000615501118292 +GATTAA 2.28569861235e-05 0.000349576457743 +GTAGGC 7.31423555953e-05 9.77405209306e-05 +GATTAC 0.000402282955774 0.000205167039431 +GTAGGA 0.000111999232005 0.000149692689713 +GTAGGG 7.99994514323e-05 9.06961590617e-05 +GATTAG 4.5713972247e-06 0.000171706320554 +GCGGGT 0.000125713423679 7.22047091559e-05 +ACAAAC 0.000347426189078 0.00065160347287 +TTTCAA 0.000182855888988 0.000622545480161 +ACAAAA 0.000313140709892 0.000865575964637 +ACAAAG 0.000324569202954 0.000420020076431 +GCGGGC 8.4570848657e-05 6.78019829879e-05 +GTCGGT 0.000146284711191 7.39657996231e-05 +GCGGGA 0.000139427615353 0.000122395787472 +CCCTCA 0.000239998354297 0.000205167039431 +GATTAT 0.000313140709892 0.000388320448021 +GTAGGT 8.4570848657e-05 0.000105665428033 +AACCTG 0.000953136321351 0.000280013384288 +TAATGA 0.0 0.000428825528767 +GATGTT 0.00058285314615 0.00043675043587 +TAATGC 0.0 0.000282655019988 +GGGATG 0.00031085501128 0.000186675589525 +GCCCTT 0.000175998793151 0.000134723420742 +GCGCCG 8.22851500447e-05 8.89350685945e-05 +GGCGTA 6.17138625335e-05 5.6354894951e-05 +GCCCTC 0.000258283943196 0.000162020322984 +GATGTG 0.00106742125197 0.000352218093443 +GCCCTA 7.99994514323e-05 6.86825282215e-05 +GATGTA 0.000205712875112 0.000245672120177 +GCCCTG 0.000532567776678 0.000197242132328 +GATGTC 0.000450282626633 0.000286177200923 +AACCTC 0.000463996818308 0.000206928129898 +TGAAAT 2.28569861235e-06 0.000694750189317 +GCTCTT 0.000486853804431 0.000316996284099 +TTCTAA 2.05712875112e-05 0.000291460472324 +ATCCGT 0.000223998464011 0.000100382156631 +TTCTAC 0.00049599659888 0.000169064684853 +GGCTTA 9.59993417188e-05 0.000101262701865 +TTGGAT 0.000409140051611 0.000356620819612 +TGAAAC 0.0 0.000449958614374 +TGAAAG 2.28569861235e-06 0.000462286247645 +CCCTCC 0.000303997915443 0.000236866667841 +ATCCGC 0.000331426298791 0.000112709789902 +ATCCGA 0.000171427395926 8.541288766e-05 +ATCCGG 0.000148570409803 6.78019829879e-05 +TGGCTC 0.000251426847359 0.000175228501488 +TGGCTA 6.62852597582e-05 0.000140006692144 +TGGCTG 0.000429711339122 0.000324040645968 +GGGTGA 6.85709583706e-06 0.000155856506349 +AGTTGT 7.771375282e-05 0.000312593557931 +AGCTAT 0.000210284272336 0.000176109046722 +ATTAGT 9.14279444941e-05 0.000325801736435 +GTCGGC 0.000159998902865 6.95630734551e-05 +TGGCTT 0.000171427395926 0.000307310286529 +AGTTGG 7.08566569829e-05 0.000172586865787 +AGCTAA 2.05712875112e-05 0.000244791574943 +AGCTAC 0.000322283504342 0.000173467411021 +AGTTGC 5.94281639212e-05 0.000188436679992 +AGTTGA 1.37141916741e-05 0.000378634450452 +AGCTAG 6.85709583706e-06 0.000101262701865 +TCCTAG 9.14279444941e-06 5.81159854182e-05 +TCCAGT 0.000395425859937 0.000333726643538 +CTAATT 7.54280542076e-05 0.000288818836624 +CAGTGT 0.00046628251692 0.000470211154747 +TAACAA 0.0 0.000382156631386 +TCAGGT 0.000180570190376 0.000230702851205 +TCAAGG 0.000100570738944 0.00017082577532 +GTCGGG 0.000105142136168 7.83685257912e-05 +TCAAGC 0.000153141807028 0.000208689220365 +CTGCAC 0.000539424872515 0.000359262455312 +CTAATA 6.17138625335e-05 0.000235986122607 +CAGTGC 0.000422854243285 0.000335487734005 +CTAATC 0.000100570738944 0.000152334325414 +CAGTGA 5.25710680841e-05 0.000425303347833 +CAGTGG 0.000422854243285 0.000272088477185 +CTAATG 0.000223998464011 0.000230702851205 +TCAAGT 0.000123427725067 0.000271207931951 +CTGCAA 0.000372568873813 0.000354859729144 +GAGCCC 0.000473139612757 0.000130320694574 +TCAGGC 0.000214855669561 0.000200764313263 +CGAGCT 0.000203427176499 0.000121515242238 +TCCTAA 1.37141916741e-05 0.000163781413451 +GATCAG 0.000697138076767 0.00029762428896 +CAACCA 0.000235426957072 0.000215733582234 +GTTCAT 0.000255998244583 0.000331085007837 +TTCCCT 0.000317712107117 0.000239508303542 +TAACAG 0.0 0.000344293186341 +CGAGCG 0.000107427834781 6.86825282215e-05 +GTTAGT 7.771375282e-05 0.000156737051582 +CGAGCA 0.000159998902865 0.000125037423172 +GAGCCG 0.000326854901566 0.00013912614691 +CGAGCC 0.000166855998702 9.86210661642e-05 +GTTCAG 0.000587424543375 0.000350457002976 +TTCCCC 0.000329140600179 0.000197242132328 +GTTCAC 0.000255998244583 0.000243030484476 +GTTCAA 0.000180570190376 0.000260641389148 +TGAGCA 4.5713972247e-06 0.00035133754821 +GGACCT 0.000413711448836 9.95016113978e-05 +TACATG 0.000591995940599 0.000277371748587 +AAGTAC 0.000420568544673 0.000168184139619 +AAGTAA 4.5713972247e-05 0.000389200993255 +TACATC 0.000649138405908 0.000233344486906 +AAGTAG 1.37141916741e-05 0.000173467411021 +TACATA 0.000134856218129 0.000295863198493 +GCACAC 0.000178284491763 0.00033196555307 +GGACCG 0.000125713423679 5.45938044837e-05 +ATTGGT 0.000372568873813 0.000219255763169 +GGACCA 0.000420568544673 0.000120634697004 +TCATCA 0.000267426737645 0.000556504587641 +GGACCC 0.000395425859937 9.42183399961e-05 +CACACG 0.000189712984825 0.000181392318123 +TACATT 0.000319997805729 0.000574115492313 +CACACC 0.000294855120993 0.000291460472324 +GCACAA 0.000249141148746 0.000354859729144 +CACACA 0.00041828284606 0.00165806667489 +TCTGCC 0.000404568654386 0.000254477572513 +TCTGCA 0.000516567886392 0.000470211154747 +TCTGCG 0.000148570409803 0.000169064684853 +TAGATT 2.28569861235e-06 0.000266805205783 +CCCGTC 0.000164570300089 9.77405209306e-05 +TTAAGG 8.22851500447e-05 0.000260641389148 +CCCGTA 2.74283833482e-05 4.4907806914e-05 +CGTCGA 4.34282736347e-05 4.66688973813e-05 +CCCGTG 0.000134856218129 8.1010161492e-05 +TTAAGC 7.54280542076e-05 0.000228941760738 +TTACGT 1.37141916741e-05 0.000119754151771 +TAGATC 2.28569861235e-06 0.000132081785041 +TGTCAA 0.000164570300089 0.00036102354578 +TAGATA 0.0 0.000152334325414 +TTTCGC 0.000105142136168 9.33377947625e-05 +TAGATG 2.28569861235e-06 0.00024391102971 +TGGTTA 7.31423555953e-05 0.000218375217935 +TCTGCT 0.000546281968352 0.000569712766145 +TTACGC 3.42854791853e-05 6.95630734551e-05 +TTAAGT 6.85709583706e-05 0.000416497895497 +TTACGA 5.02853694718e-05 8.98156138281e-05 +TTACGG 5.94281639212e-05 6.60408925206e-05 +CCCGTT 7.99994514323e-05 6.69214377543e-05 +AGTGAC 0.000484568105819 0.00024655266541 +AGTGAA 0.00053485347529 0.000455241885776 +AGTGAG 0.000537139173903 0.000283535565222 +GCGGTC 0.000130284820904 7.13241639223e-05 +CGGGCG 7.771375282e-05 2.99385379427e-05 +CCCTCT 0.000269712436258 0.000229822305972 +TAAGAG 0.0 0.000196361587095 +CCTGAT 0.00058285314615 0.000255358117747 +GAAAGT 0.000319997805729 0.000318757374566 +CGGGCA 9.37136431064e-05 8.98156138281e-05 +TAAGAC 0.0 0.000143528873078 +ATTTAT 0.000205712875112 0.000911364316785 +AGTGAT 0.000434282736347 0.00027561065812 +TAAGAT 0.0 0.000247433210644 +CCTGAA 0.000555424762802 0.000396245355124 +CCTGAC 0.000479996708594 0.000218375217935 +CCTGAG 0.000653709803133 0.000248313755878 +GTTAGG 4.11425750223e-05 9.42183399961e-05 +ATTGGG 0.000171427395926 0.000169064684853 +TATACT 8.68565472694e-05 0.000221016853636 +GTTAGC 7.771375282e-05 0.000116231970836 +CTCCAG 0.000548567666965 0.000476374971382 +GTTAGA 9.59993417188e-05 0.000136484511209 +TGTAGC 6.85709583706e-05 0.000213972491767 +GCCAAA 0.000948564924126 0.000293221562792 +TATACG 2.51426847359e-05 4.4907806914e-05 +GGTAGC 8.4570848657e-05 7.48463448567e-05 +TATACC 6.17138625335e-05 0.000108307063734 +TATACA 0.000109713533393 0.000403289716993 +ATTGGC 0.000287998025156 0.000203405948964 +TCTTGT 8.4570848657e-05 0.000358381910079 +TCCACG 0.000123427725067 0.000110068154201 +ACTAGT 5.94281639212e-05 0.000137365056443 +TTGACT 0.000201141477887 0.000405931352694 +GTAATA 5.25710680841e-05 0.000289699381857 +AACGCA 0.000198855779275 0.000183153408591 +GTAATC 0.000102856437556 0.000172586865787 +TCTAAC 0.000153141807028 0.000132962330275 +CGTTCT 8.22851500447e-05 0.000124156877939 +CCACCG 0.000164570300089 0.000110948699435 +GGTAGG 1.82855888988e-05 6.33992568198e-05 +CCACCA 0.00046628251692 0.00024655266541 +CCACCC 0.000290283723769 0.000147931599246 +GTTCGT 0.000141713313966 6.86825282215e-05 +TTTATG 0.000223998464011 0.000582040399415 +ACTAGG 2.51426847359e-05 8.27712519592e-05 +TTTATC 0.000212569970949 0.000403289716993 +ACTAGC 7.54280542076e-05 0.000114470880369 +TTTATA 0.000105142136168 0.000690347463149 +ACTAGA 8.22851500447e-05 0.000151453780181 +CCACCT 0.000514282187779 0.000204286494197 +CGTTCG 3.42854791853e-05 3.96245355124e-05 +TCGTTG 4.5713972247e-05 6.33992568198e-05 +CGTTCA 0.000109713533393 0.000136484511209 +CGTTCC 0.000109713533393 8.45323424264e-05 +TCTAAT 0.000185141587601 0.000257119208214 +TCGTCT 0.000114284930618 0.000128559604107 +CTTTGT 0.000146284711191 0.000393603719423 +AAACAT 0.000335997696016 0.000818907067256 +ACTGGA 0.000578281748925 0.000449958614374 +ACTGGC 0.000326854901566 0.000194600496628 +TAGTCC 2.28569861235e-06 0.000108307063734 +GTGACC 0.000445711229409 0.000178750682423 +TAGTCA 2.28569861235e-06 0.000169064684853 +GTGACA 0.000267426737645 0.000248313755878 +TAGTCG 0.0 5.10716235493e-05 +GTGACG 0.000290283723769 0.000147051054013 +TGTCAT 0.000134856218129 0.000442914252505 +ACTGGT 0.000324569202954 0.00023422503214 +CTTTGG 0.000105142136168 0.000276491203353 +CCTATG 0.000139427615353 8.45323424264e-05 +CTTTGC 0.000100570738944 0.000294102108025 +CTACGC 5.48567666965e-05 4.57883521476e-05 +GTGACT 0.000381711668263 0.000237747213074 +TAGTCT 0.0 0.00016113977775 +GTGCAC 0.000313140709892 0.000201644858496 +CGACGG 5.48567666965e-05 6.16381663526e-05 +ACCTAG 0.0 5.19521687829e-05 +CGACGA 6.85709583706e-05 5.19521687829e-05 +ACCTAA 1.37141916741e-05 0.000153214870648 +CGACGC 5.71424653088e-05 5.98770758854e-05 +ACCTAC 0.000374854572426 0.000128559604107 +CCGCAG 0.000356568983527 0.000145289963545 +CCGCAA 6.62852597582e-05 8.98156138281e-05 +CCGCAC 0.000100570738944 8.36517971928e-05 +CTACGG 6.17138625335e-05 4.13856259796e-05 +ACCTAT 0.000185141587601 0.000125917968406 +CGACGT 5.25710680841e-05 6.25187115862e-05 +CGAAGA 0.000102856437556 0.000140887237377 +CCCCGT 7.54280542076e-05 7.04436186887e-05 +CGAAGG 7.54280542076e-05 9.42183399961e-05 +GCGCTC 0.000178284491763 0.000152334325414 +GTGCAT 0.000198855779275 0.00032932391737 +GCGCTA 7.31423555953e-05 6.5160347287e-05 +CCGCAT 7.31423555953e-05 7.48463448567e-05 +GTTCGC 0.000111999232005 5.89965306518e-05 +GAAGAT 0.000918850842166 0.000371590088583 +GGATCT 0.000354283284915 0.0002122114013 +TCATGA 1.82855888988e-05 0.000337248824472 +TCGGAA 9.82850403312e-05 8.36517971928e-05 +TCGGAC 0.000153141807028 5.89965306518e-05 +GAATCT 0.000276569532095 0.000226300125037 +TCGGAG 0.000246855450134 8.98156138281e-05 +GGATCG 7.771375282e-05 5.37132592501e-05 +GAAGAG 0.00174627373984 0.000462286247645 +GAAGAA 0.0012022774701 0.000560026768575 +CGAGAA 0.000207998573724 0.000110068154201 +GAAGAC 0.000991993197761 0.000328443372136 +GGATCA 0.000226284162623 0.000233344486906 +GAATCA 0.000235426957072 0.000292341017558 +GAATCC 0.000217141368173 0.000179631227656 +TCGGAT 0.000134856218129 8.62934328936e-05 +GAATCG 8.68565472694e-05 8.62934328936e-05 +ATGGTC 0.000322283504342 0.000169064684853 +CAGATG 0.000843422787958 0.000454361340542 +ATGGTA 0.000150856108415 0.000162900868218 +TGACCC 6.85709583706e-06 0.000172586865787 +ATGGTG 0.000825137199059 0.000244791574943 +CAGATC 0.00080685161016 0.000304668650829 +ACGCCC 9.59993417188e-05 6.42798020534e-05 +CAGATA 0.000214855669561 0.000255358117747 +TGTCAG 0.000374854572426 0.00037335117905 +ACGCCT 0.000125713423679 9.42183399961e-05 +CAGATT 0.000511996489167 0.000466688973813 +TGATTA 0.0 0.00044907806914 +CTCGAC 0.000164570300089 9.42183399961e-05 +ATGGTT 0.000269712436258 0.000259760843915 +TCGCAA 6.62852597582e-05 9.42183399961e-05 +TTGCGC 8.4570848657e-05 0.000110948699435 +GCTATT 0.000267426737645 0.000218375217935 +TGCGTC 0.000102856437556 0.000103023792332 +GCGTGG 8.4570848657e-05 0.000106545973267 +TTGTAG 1.14284930618e-05 0.000261521934382 +AATGGT 0.000345140490465 0.000257119208214 +TCCACC 0.00054399626974 0.000228941760738 +CTACGT 5.25710680841e-05 5.89965306518e-05 +AATGGC 0.000486853804431 0.000261521934382 +AATGGA 0.000687995282318 0.00040505080746 +AATGGG 0.000260569641808 0.000221016853636 +CCCCGA 5.48567666965e-05 5.01910783157e-05 +AATAGA 9.14279444941e-05 0.00028793829139 +AATAGC 7.08566569829e-05 0.000188436679992 +GTGAAT 0.000367997476589 0.000290579927091 +AGCCAG 0.000530282078066 0.000272088477185 +GGAGGG 0.000267426737645 0.000243030484476 +AAAAGT 0.000313140709892 0.000614620573059 +AGCCAC 0.000244569751522 0.000236866667841 +AGCCAA 0.000212569970949 0.000242149939242 +TGGTAG 9.14279444941e-06 9.6859975697e-05 +CGCCAT 0.000102856437556 0.000121515242238 +GAGTGT 0.000468568215532 0.000340771005407 +TGGTAC 0.000175998793151 9.33377947625e-05 +TGGTAA 2.28569861235e-06 0.000220136308402 +AAAAGG 0.000265141039033 0.000429706074001 +GGAGGT 0.000397711558549 0.000155856506349 +AGCCAT 0.000169141697314 0.000284416110456 +AAAAGC 0.000486853804431 0.000552101861473 +AAAAGA 0.000429711339122 0.000749343993801 +TCTATA 0.000102856437556 0.000201644858496 +GAGTGG 0.000395425859937 0.000177870137189 +CGCCAG 0.000137141916741 0.000117993061304 +TGGTAT 0.000150856108415 0.000174347956255 +CGCCAA 9.82850403312e-05 8.80545233609e-05 +GAGTGC 0.00034971188769 0.000155856506349 +CGCCAC 0.000130284820904 9.86210661642e-05 +GAGTGA 5.02853694718e-05 0.000287057746156 +GCCAGG 0.000217141368173 0.000147051054013 +GCCAGA 0.000420568544673 0.000252716482046 +GCCAGC 0.000537139173903 0.00023422503214 +GCCCGT 0.000164570300089 6.69214377543e-05 +TACCGA 9.59993417188e-05 4.57883521476e-05 +TACCGC 0.000223998464011 5.54743497173e-05 +TACTGA 3.885687641e-05 0.000268566296251 +CTTCAT 0.000274283833482 0.000457883521476 +GCCCGC 0.000171427395926 5.01910783157e-05 +GCCAGT 0.000463996818308 0.000184914499058 +GCCCGA 0.000164570300089 6.86825282215e-05 +GCCCGG 0.000105142136168 6.33992568198e-05 +TGCGAT 0.000109713533393 9.86210661642e-05 +CTTCAC 0.000283426627932 0.000355740274378 +CTTCAA 0.000237712655685 0.00034165155064 +CTTCAG 0.000701709473992 0.000476374971382 +TCCGAG 0.000249141148746 9.42183399961e-05 +GTGGGC 0.00054399626974 0.000132962330275 +TCCGAC 0.000150856108415 5.10716235493e-05 +AATTCA 0.000169141697314 0.000452600250075 +GGACTG 0.000559996160026 0.000285296655689 +AATTCC 0.000146284711191 0.000188436679992 +AATTCG 3.65711777976e-05 6.0757621119e-05 +TCCGAT 9.82850403312e-05 8.01296162584e-05 +TAGCGG 0.0 5.19521687829e-05 +AATTCT 0.000137141916741 0.000309071376997 +TAGCGC 0.0 6.86825282215e-05 +GTCATC 0.00069942377538 0.000239508303542 +GTCATA 0.000137141916741 0.000204286494197 +GTCATG 0.000518853585004 0.00027825229382 +AGCAGG 0.000262855340421 0.000372470633816 +CCAGGT 0.00037942596965 0.000154975961115 +TTGGAA 0.000347426189078 0.000294982653259 +AGCAGC 0.000863994075469 0.000504552418858 +AGCAGA 0.000436568434959 0.000530968775866 +GGGGTG 0.000237712655685 0.000157617596816 +CCAGGG 0.00019428438205 0.000154975961115 +AGCAGT 0.000507425091942 0.000384798267087 +CCAGGA 0.000626281419785 0.000248313755878 +CCAGGC 0.000342854791853 0.00013912614691 +TCCCCG 0.000105142136168 6.33992568198e-05 +ATAGAG 0.000319997805729 0.000204286494197 +CATCTG 0.00046628251692 0.000488702604653 +ACGGTC 0.000146284711191 6.69214377543e-05 +CATCTA 0.00011657062923 0.000206047584664 +ATAGAC 0.000244569751522 0.00017082577532 +CATCTC 0.000278855230707 0.000296743743726 +CAAGCT 0.000306283614055 0.00024391102971 +TCCAAC 0.00050513939333 0.000229822305972 +GGGGTC 0.000175998793151 0.000123276332705 +CAAGCC 0.000253712545971 0.000155856506349 +CATCTT 0.000134856218129 0.00036366518148 +CAAGCA 0.000281140929319 0.000264163570083 +CAAGCG 0.000109713533393 8.1010161492e-05 +GTTCTT 0.000262855340421 0.000289699381857 +ACATGA 9.14279444941e-06 0.000425303347833 +CGTAAT 7.31423555953e-05 8.36517971928e-05 +ACATGC 0.00015542750564 0.00035133754821 +TCTTGA 1.59998902865e-05 0.000284416110456 +ACATGG 0.000244569751522 0.000247433210644 +AGGGAT 0.000249141148746 0.000191078315693 +GTACAT 9.14279444941e-05 0.000219255763169 +TGGCGG 5.02853694718e-05 9.6859975697e-05 +CGTAAG 0.000107427834781 5.10716235493e-05 +GAGTCA 0.000308569312668 0.000247433210644 +ACATGT 0.000141713313966 0.000403289716993 +CGTAAC 0.000100570738944 6.42798020534e-05 +CGTAAA 0.000130284820904 0.00015849814205 +AGGGAG 0.000395425859937 0.000203405948964 +TTATTT 8.4570848657e-05 0.00153479034218 +AGGGAC 0.000260569641808 0.000144409418312 +AGGGAA 0.00031085501128 0.000292341017558 +CCCTTT 0.000221712765398 0.000289699381857 +TGTTGT 6.85709583706e-05 0.000551221316239 +TCTTGG 0.000107427834781 0.000169945230086 +GTTGTT 0.000258283943196 0.000406811897927 +ATCCTT 0.000205712875112 0.000210450310832 +GCGGGG 0.00011657062923 9.77405209306e-05 +TTGTCC 0.000157713204252 0.000217494672701 +TTGTCA 0.000132570519516 0.000406811897927 +ATCCTC 0.000482282407206 0.000279132839054 +CTATGA 4.5713972247e-06 0.000154975961115 +ATCCTA 0.000162284601477 0.000114470880369 +GTTGTC 0.000187427286213 0.000210450310832 +ATCCTG 0.000879993965756 0.000279132839054 +GTTGTG 0.000447996928021 0.000353098638677 +TTGTCT 0.000244569751522 0.000321399010267 +CTATGC 3.19997805729e-05 0.000102143247099 +CTGTGG 0.000347426189078 0.000353979183911 +AAGGGG 0.000189712984825 0.000203405948964 +TGACTC 0.0 0.000267685751017 +TGGGCA 0.000175998793151 0.000162020322984 +TGGGCG 8.4570848657e-05 7.57268900903e-05 +TGACTG 4.5713972247e-06 0.000423542257366 +AAGCCG 0.000230855559848 0.000104784882799 +AAGCCC 0.000370283175201 0.000154975961115 +AAGCCA 0.000390854462712 0.000299385379427 +CAAAAT 0.000260569641808 0.000623426025395 +CTATGG 7.54280542076e-05 0.000114470880369 +TGGGCT 0.000205712875112 0.000198122677562 +CAAAAC 0.000436568434959 0.000564429494743 +CAAAAA 0.000322283504342 0.000760791081838 +AAGCCT 0.000429711339122 0.000225419579804 +CAAAAG 0.000377140271038 0.000493985876054 +TCAATG 0.000189712984825 0.000316996284099 +CCCTTA 5.71424653088e-05 0.000133842875509 +TCAATC 0.000148570409803 0.00025623866298 +CGTCGT 6.62852597582e-05 4.93105330821e-05 +CAGTTC 0.000598853036436 0.000279132839054 +CAGTTA 0.000159998902865 0.000215733582234 +CAGTTG 0.000274283833482 0.000305549196062 +GACCCC 0.00041828284606 0.000105665428033 +CTCTGT 0.000239998354297 0.000430586619235 +TCAATT 0.000100570738944 0.000350457002976 +TGGCGT 7.54280542076e-05 0.000120634697004 +GTTTGG 0.000143999012578 0.000348695912509 +CTCTGC 0.000287998025156 0.000348695912509 +GACCCT 0.000445711229409 0.000152334325414 +CTCTGA 3.19997805729e-05 0.000394484264657 +CTCTGG 0.000198855779275 0.00032932391737 +CAGTTT 0.000653709803133 0.000532729866333 +GCGAAG 0.000173713094539 7.39657996231e-05 +CTACTC 8.4570848657e-05 0.000105665428033 +TGCTGG 0.000201141477887 0.000399767536058 +GCGAAC 0.000102856437556 5.98770758854e-05 +GAGTCT 0.000607995830886 0.000244791574943 +GCGAAA 0.000148570409803 0.000117993061304 +TAGGGA 2.28569861235e-06 8.45323424264e-05 +GCTGGT 0.00045713972247 0.000237747213074 +GTGATT 0.000420568544673 0.000322279555501 +CTACTA 6.17138625335e-05 0.000103023792332 +TGCTGT 0.000214855669561 0.000549460225772 +GCGAAT 7.54280542076e-05 8.36517971928e-05 +TAATGG 0.0 0.000199003222796 +GTGATG 0.00065142410452 0.000404170262226 +CTACTG 0.000212569970949 0.000217494672701 +GTGATC 0.000489139503043 0.000150573234947 +GCTGGC 0.000297140819606 0.000178750682423 +GTGATA 0.000114284930618 0.000167303594386 +GCTGGA 0.000850279883795 0.000448197523907 +TGATAA 4.5713972247e-06 0.000353979183911 +TGTAGT 5.48567666965e-05 0.000287057746156 +TGATAC 0.0 0.000159378687283 +TGATAG 0.0 0.000164661958685 +CTTGTT 0.000205712875112 0.000421781166899 +GACGGT 0.000196570080662 8.80545233609e-05 +GCTAGG 4.34282736347e-05 5.6354894951e-05 +GCTAGA 8.68565472694e-05 0.000130320694574 +ATGAAT 0.00041828284606 0.00054417695437 +CTATGT 4.5713972247e-05 0.000134723420742 +AAGGGT 0.000409140051611 0.000173467411021 +TGATAT 0.0 0.000327562826902 +GTAGTG 0.000198855779275 0.000172586865787 +GCTAGT 0.000107427834781 0.000100382156631 +ATGAAC 0.000678852487869 0.000383917721853 +GACGGG 0.000201141477887 0.000101262701865 +ATGAAA 0.000587424543375 0.00069298909885 +GACGGA 0.00031085501128 0.000132081785041 +ATGAAG 0.0011108495256 0.000525685504464 +GACGGC 0.000393140161325 0.000100382156631 +CTTGTC 0.000139427615353 0.00019283940616 +CTAACT 6.62852597582e-05 0.000145289963545 +CCATAG 6.85709583706e-06 0.000136484511209 +GGTGAA 0.000553139064189 0.000295863198493 +GGTGAC 0.000390854462712 0.00014881214448 +GCGTCT 0.000228569861235 0.000114470880369 +TGTAAA 0.000278855230707 0.000776640896043 +GGTGAG 0.000479996708594 0.000172586865787 +GGAATG 0.00042742564051 0.000255358117747 +CGTCCT 0.000166855998702 0.000123276332705 +GGAATC 0.000329140600179 0.00015849814205 +GGAATA 0.000146284711191 0.000198122677562 +GCGTCA 8.68565472694e-05 0.0001074265185 +GCGTCC 0.000162284601477 0.000100382156631 +GGTGAT 0.000452568325246 0.000205167039431 +GCGTCG 6.85709583706e-05 5.81159854182e-05 +CGTCCG 7.54280542076e-05 5.81159854182e-05 +GGAATT 0.000251426847359 0.00020252540373 +CGTCCC 0.000102856437556 9.6859975697e-05 +CGTCCA 0.00011657062923 0.000142648327845 +TTTCGA 9.59993417188e-05 0.000103023792332 +CCGTAT 0.00015542750564 5.81159854182e-05 +GAAGTC 0.000345140490465 0.000235105577374 +CTACTT 7.99994514323e-05 0.00018051177289 +CGTGGT 0.000162284601477 9.06961590617e-05 +GCCGTA 0.000100570738944 4.40272616804e-05 +CCGTAC 0.000198855779275 4.4907806914e-05 +CCGTAA 6.85709583706e-06 6.5160347287e-05 +CCGTAG 4.5713972247e-06 3.87439902788e-05 +CTAACA 0.000134856218129 0.000198122677562 +GCCGTT 0.000118856327842 9.6859975697e-05 +TTGGTG 0.000395425859937 0.000273849567652 +GAAGTT 0.000377140271038 0.000299385379427 +GGCGCG 8.4570848657e-05 5.89965306518e-05 +ATGCCC 0.00027199813487 0.000118873606537 +TTAGTT 8.68565472694e-05 0.000409453533628 +ATGCCA 0.000301712216831 0.000259760843915 +GGCGCC 0.000107427834781 5.89965306518e-05 +ATGCCG 0.000148570409803 6.60408925206e-05 +GGCGCA 0.000139427615353 8.1010161492e-05 +TATTAC 0.000258283943196 0.000279132839054 +ACTATG 0.000171427395926 0.000159378687283 +CCATAT 0.000205712875112 0.000204286494197 +TTTAAG 0.000372568873813 0.000594368032686 +ACTATC 0.000148570409803 0.000109187608967 +TTTAAA 0.000527996379453 0.00152686543508 +ACTATA 0.00011657062923 0.000205167039431 +TTTAAC 0.000303997915443 0.000525685504464 +TATTAT 0.000217141368173 0.000726449817727 +TTAGTG 0.000173713094539 0.000223658489337 +GGCGCT 0.000217141368173 0.000112709789902 +TTAGTC 6.17138625335e-05 0.000172586865787 +ATGCCT 0.000415997147448 0.000184914499058 +TTAGTA 6.39995611459e-05 0.000221897398869 +TTTAAT 0.000246855450134 0.00096859975697 +ACTATT 0.000212569970949 0.000307310286529 +GGGCAT 0.000100570738944 0.000138245601677 +TCGATA 2.74283833482e-05 5.72354401846e-05 +TTCTAG 6.85709583706e-06 0.000138245601677 +TTTCCA 0.000297140819606 0.000547699135305 +TTTCCC 0.000198855779275 0.000352218093443 +GGGACT 0.000153141807028 0.000141767782611 +TATCGA 7.771375282e-05 8.27712519592e-05 +GTTTGT 0.000173713094539 0.00055386295194 +TTTCCT 0.000335997696016 0.000463166792878 +GGGCAG 0.000265141039033 0.000201644858496 +GTCTGT 0.00019428438205 0.00035133754821 +GGGCAA 0.000121142026455 0.000137365056443 +GGGCAC 8.22851500447e-05 9.6859975697e-05 +GTACGG 5.48567666965e-05 4.66688973813e-05 +GTTTGA 2.51426847359e-05 0.000521282778296 +GTACGC 5.48567666965e-05 4.4907806914e-05 +GTACGA 7.99994514323e-05 4.75494426149e-05 +CCGCTC 0.000198855779275 0.000113590335136 +GCATGT 0.00015542750564 0.000302907560361 +CCGCTA 5.48567666965e-05 7.30852543895e-05 +CCGCTG 0.000399997257162 0.000179631227656 +TGAAAA 4.5713972247e-06 0.000792490710248 +GCGCGG 7.771375282e-05 6.95630734551e-05 +GCGCGC 8.4570848657e-05 0.000156737051582 +TGTTTT 0.000166855998702 0.00137981438106 +GCGCGA 4.5713972247e-05 4.66688973813e-05 +GCATGG 0.000137141916741 0.000162020322984 +TGGATC 0.000278855230707 0.000242149939242 +GCATGC 8.91422458817e-05 0.000236866667841 +CCGCTT 9.37136431064e-05 0.000115351425603 +GCATGA 1.37141916741e-05 0.000263283024849 +TTTCAG 0.000564567557251 0.00061726220876 +GTCGAC 0.000118856327842 8.45323424264e-05 +GCGCGT 4.34282736347e-05 8.62934328936e-05 +GTCGAG 0.000148570409803 7.74879805576e-05 +CGTGGC 0.000148570409803 8.1010161492e-05 +CTTCCG 9.59993417188e-05 8.62934328936e-05 +TGCGCT 0.000125713423679 0.000157617596816 +GAGCTC 0.00081599440461 0.00027561065812 +ATTGAT 0.000587424543375 0.000365426271948 +GAGCTG 0.00212341401088 0.000454361340542 +AAACGA 0.000251426847359 0.000210450310832 +AAACGC 0.000297140819606 0.000220136308402 +GTAACA 0.000102856437556 0.000252716482046 +GTAGTA 7.31423555953e-05 0.000109187608967 +AAACGG 0.000175998793151 0.000165542503918 +ATTGAA 0.000550853365577 0.000407692443161 +TGCGCG 4.5713972247e-05 0.000111829244668 +ATTGAC 0.000589710241987 0.000270327386718 +GAGCTT 0.000607995830886 0.000251835936812 +TGCGCC 9.37136431064e-05 7.48463448567e-05 +ATTGAG 0.000879993965756 0.00028793829139 +CGCTGA 1.82855888988e-05 0.000173467411021 +CACGCA 0.000111999232005 0.000162900868218 +AAACGT 0.000169141697314 0.000264163570083 +ATTTCG 5.25710680841e-05 0.000110068154201 +ATTTCA 0.000203427176499 0.000626067661096 +ATTTCC 0.000205712875112 0.000402409171759 +ATCAGT 0.000500567996105 0.000396245355124 +ATCGGG 0.00015542750564 5.19521687829e-05 +CATAGT 4.79996708594e-05 0.000176989591955 +ATCGGC 0.000361140380752 6.5160347287e-05 +ATCGGA 0.000201141477887 4.4907806914e-05 +ATTTCT 0.000274283833482 0.000602292939788 +TACGCC 0.000274283833482 2.81774474755e-05 +TGGGTC 0.000141713313966 0.000136484511209 +TTTCGG 0.000107427834781 0.00012679851364 +TGGGTA 3.65711777976e-05 0.000122395787472 +CATAGG 3.19997805729e-05 9.50988852297e-05 +ATCGGT 0.000182855888988 8.01296162584e-05 +ATCAGG 0.000251426847359 0.000248313755878 +CATAGC 4.5713972247e-05 0.000112709789902 +ATCAGC 0.000667423994807 0.000337248824472 +ACGGGT 0.000118856327842 6.78019829879e-05 +GTCTGA 3.19997805729e-05 0.000399767536058 +GATTCT 0.000308569312668 0.000245672120177 +CTTTTG 0.00023314125846 0.000497508056989 +GCGTTC 0.000159998902865 0.00011711251607 +CTTTTC 0.000242284052909 0.00043675043587 +CTTTTA 0.000134856218129 0.000632231477731 +GATTCC 0.000159998902865 0.00015849814205 +ACGGGG 8.91422458817e-05 7.13241639223e-05 +ACGGGA 0.000130284820904 8.36517971928e-05 +ACGGGC 0.000178284491763 5.19521687829e-05 +TACTAT 0.000230855559848 0.000196361587095 +CTTTTT 0.000191998683438 0.000881425778842 +CGTAGA 5.71424653088e-05 4.0505080746e-05 +TCACGA 8.91422458817e-05 0.000105665428033 +TCACGC 0.000121142026455 0.000109187608967 +TCACGG 6.39995611459e-05 8.89350685945e-05 +TCTCAT 0.000221712765398 0.000352218093443 +TCACGT 7.771375282e-05 0.000127679058873 +AGTGGA 0.00045713972247 0.000321399010267 +TTCCAC 0.000370283175201 0.000309071376997 +TTCCAA 0.000175998793151 0.000273849567652 +TTCCAG 0.00080685161016 0.00032932391737 +CGAGGC 0.000109713533393 9.50988852297e-05 +ACCCTT 0.00015542750564 0.000175228501488 +TGAAGT 2.28569861235e-06 0.00044907806914 +ATGCTT 0.000283426627932 0.000398886990825 +TTCCAT 0.000178284491763 0.000300265924661 +ACCCTA 7.54280542076e-05 0.000111829244668 +ACCCTC 0.000290283723769 0.000172586865787 +TGAAGG 2.28569861235e-06 0.000390081538489 +ACCCTG 0.000390854462712 0.000191078315693 +CTGACT 0.000377140271038 0.000317876829333 +CCCCTT 0.000107427834781 0.000177870137189 +AGTTAT 0.000166855998702 0.000302027015128 +CTGACG 0.000342854791853 0.000169945230086 +CTGACC 0.000619424323947 0.000247433210644 +CTGACA 0.000475425311369 0.000360143000546 +AGTTAA 1.82855888988e-05 0.000364545726714 +CCCCTG 0.00019428438205 0.000177870137189 +AGTTAC 0.000217141368173 0.000153214870648 +CCCCTC 0.000166855998702 0.00018051177289 +AGTTAG 4.5713972247e-06 0.000122395787472 +CCCCTA 5.02853694718e-05 7.39657996231e-05 +ACACAG 0.000564567557251 0.000568832220911 +ACACAA 0.000324569202954 0.000659528379973 +ACACAC 0.000365711777976 0.00154359579452 +AATGTT 0.00034971188769 0.000726449817727 +CTGCAG 0.00169141697314 0.000605815120723 +CATGGG 0.000153141807028 0.000149692689713 +CATGGA 0.000329140600179 0.000268566296251 +CATGGC 0.000281140929319 0.000196361587095 +AATGTC 0.000299426518218 0.000420020076431 +ACACAT 0.000185141587601 0.000561787859042 +AATGTA 0.000198855779275 0.000525685504464 +AATGTG 0.000626281419785 0.000506313509325 +CATGGT 0.000221712765398 0.000179631227656 +CTGCAT 0.000331426298791 0.000445555888206 +GCACAG 0.000468568215532 0.000311713012697 +ACATTT 0.000299426518218 0.00096859975697 +CGAGAT 0.000226284162623 8.45323424264e-05 +ATTACT 0.000191998683438 0.000336368279239 +ACATTG 0.000173713094539 0.000355740274378 +GTACCA 0.000123427725067 9.86210661642e-05 +ACATTA 0.00015542750564 0.00049046369512 +ACATTC 0.000207998573724 0.000364545726714 +ATTACA 0.000212569970949 0.000461405702411 +ATGCTC 0.000290283723769 0.000206047584664 +ATTACC 0.000105142136168 0.000194600496628 +CGAGAC 0.000258283943196 0.000101262701865 +ATTACG 5.71424653088e-05 0.000102143247099 +AAGTCT 0.000383997366875 0.000277371748587 +ATGCTA 0.000153141807028 0.000242149939242 +GGACAT 0.000189712984825 0.000232463941673 +GCTATA 0.000134856218129 0.000167303594386 +GCTATC 0.000166855998702 0.000108307063734 +AAGTCG 8.91422458817e-05 9.59794304633e-05 +AAGTCA 0.000255998244583 0.00041473680503 +GCTATG 0.000253712545971 0.000111829244668 +AAGTCC 0.000278855230707 0.00016113977775 +GGACAG 0.000639995611459 0.000282655019988 +GGACAC 0.000333711997403 0.000232463941673 +GGACAA 0.000219427066786 0.000280013384288 +GCACCT 0.000276569532095 0.000155856506349 +AGGGTT 0.000148570409803 0.000235105577374 +GAGGCG 0.000381711668263 0.000136484511209 +TGTTGG 4.79996708594e-05 0.000321399010267 +GAGGCA 0.000491425201656 0.000206047584664 +AGCGCT 0.000260569641808 0.000196361587095 +GAGGCC 0.00084570848657 0.000156737051582 +TGTTGC 5.94281639212e-05 0.000322279555501 +TTCTCA 0.000253712545971 0.000416497895497 +ACTCGA 7.99994514323e-05 0.0001074265185 +TAAAAT 0.0 0.00104784882799 +ACTCGC 0.00011657062923 0.000115351425603 +ACTCGG 8.4570848657e-05 6.69214377543e-05 +AGCGCA 0.000182855888988 0.000154095415882 +GAGGCT 0.00080685161016 0.000232463941673 +AGCGCC 0.000239998354297 0.000133842875509 +AGGGTG 0.000221712765398 0.000151453780181 +AGGGTA 4.11425750223e-05 9.33377947625e-05 +AGCGCG 0.000100570738944 9.24572495289e-05 +AGGGTC 0.000121142026455 0.000110068154201 +TAAAAG 4.5713972247e-06 0.00057587658278 +ACTCGT 8.4570848657e-05 9.95016113978e-05 +TAAAAC 2.28569861235e-06 0.000687705827448 +TAAAAA 0.0 0.00120722751528 +ACGTTC 0.000137141916741 0.000105665428033 +AGCTGC 0.000297140819606 0.00040505080746 +ACGTTA 5.71424653088e-05 0.000159378687283 +AGCTGA 3.65711777976e-05 0.000509835690259 +ACGTTG 7.08566569829e-05 0.000100382156631 +AGCTGG 0.000283426627932 0.000294102108025 +TAGGGG 0.0 8.1010161492e-05 +TAAATG 0.0 0.000737896905764 +AGTGCG 0.000114284930618 0.000103904337566 +AGTGCA 0.000294855120993 0.000352218093443 +AGAGTT 0.000235426957072 0.000301146469894 +AGTGCC 0.000303997915443 0.00012679851364 +CCAAAA 0.000390854462712 0.00043675043587 +TTCGTG 0.000180570190376 6.60408925206e-05 +TAGGGT 0.0 0.000115351425603 +GCATAG 4.5713972247e-06 0.000116231970836 +AGCTGT 0.000299426518218 0.000475494426149 +ACGTTT 0.000203427176499 0.000294982653259 +AGTGCT 0.000365711777976 0.000303788105595 +AGAGTC 0.000253712545971 0.000227180670271 +AGAGTA 8.68565472694e-05 0.000150573234947 +AGAGTG 0.000333711997403 0.000270327386718 +TTCGTA 5.94281639212e-05 8.1010161492e-05 +GTTAAA 0.000308569312668 0.000482538788018 +CCTGGC 0.000406854352999 0.000152334325414 +GTTAAC 0.000148570409803 0.000207808675132 +CCTGGA 0.000827422897672 0.000263283024849 +CCTGGG 0.000226284162623 0.000113590335136 +GTTAAG 0.000130284820904 0.00016113977775 +TTCGTC 0.000173713094539 7.92490710248e-05 +GCACCC 0.000153141807028 0.000123276332705 +GTCTGG 0.000182855888988 0.00023422503214 +GTCTGC 0.000255998244583 0.000295863198493 +GTTAAT 0.000171427395926 0.000338129369706 +CCTGGT 0.000498282297493 0.000168184139619 +TCTAAA 0.000356568983527 0.000481658242784 +GGAAGG 0.000159998902865 0.000196361587095 +CCACAT 0.000189712984825 0.000263283024849 +GGAAGC 0.000269712436258 0.000199883768029 +GCACCG 0.000107427834781 8.62934328936e-05 +GGAAGA 0.000237712655685 0.000343412641107 +GCGTTA 6.17138625335e-05 9.15767042953e-05 +CAATGA 1.59998902865e-05 0.000322279555501 +CCCGCA 9.59993417188e-05 8.80545233609e-05 +CAATGC 9.37136431064e-05 0.000244791574943 +GCGTTG 9.59993417188e-05 7.30852543895e-05 +CAATGG 0.000127999122292 0.00025623866298 +CCACAG 0.000553139064189 0.000333726643538 +GGAAGT 0.000265141039033 0.000241269394009 +CCACAC 0.000187427286213 0.000314354648398 +CCACAA 0.000244569751522 0.000286177200923 +ATTTGT 0.000132570519516 0.000670094922776 +TCCTCG 0.000132570519516 0.000132081785041 +TGTATG 0.00019428438205 0.00036102354578 +CAATGT 0.000114284930618 0.000308190831763 +GCGTTT 0.000230855559848 0.000219255763169 +TCCTCC 0.000500567996105 0.000392723174189 +TGTATC 0.000109713533393 0.000154095415882 +CGTTAG 2.28569861235e-06 5.37132592501e-05 +TAACCG 4.5713972247e-06 8.18907067256e-05 +TCAAAC 0.000301712216831 0.000426183893067 +TCAGGG 0.000201141477887 0.00016113977775 +CGTTAC 0.000118856327842 8.89350685945e-05 +TAACCC 0.0 0.000147051054013 +CGTTAA 4.5713972247e-06 0.000120634697004 +TAACCA 4.5713972247e-06 0.000232463941673 +ACCGCT 0.000180570190376 9.59794304633e-05 +TCGCGA 2.74283833482e-05 4.0505080746e-05 +TAGTGT 0.0 0.000250074846345 +TCAGGA 0.000431997037735 0.000350457002976 +TTCGTT 7.08566569829e-05 0.000122395787472 +TACCCT 0.000235426957072 9.77405209306e-05 +TAACCT 4.5713972247e-06 0.000205167039431 +CGTTAT 0.000148570409803 0.000127679058873 +GTGAAA 0.000569138954476 0.00051247732596 +GTGAAC 0.000575996050313 0.000264163570083 +TGCATA 0.000105142136168 0.000357501364845 +ACCGCG 5.71424653088e-05 3.87439902788e-05 +ACCGCA 0.000141713313966 9.86210661642e-05 +CCCTAT 0.000127999122292 9.77405209306e-05 +ACCGCC 0.000217141368173 8.541288766e-05 +AGATCC 0.000111999232005 0.000206047584664 +AGGCTT 0.000114284930618 0.000221016853636 +CCTTCT 0.000285712326544 0.000221016853636 +GGGACG 0.000150856108415 0.000100382156631 +TGTCGC 8.91422458817e-05 8.1010161492e-05 +GTTGCC 0.000210284272336 0.000150573234947 +GTGCCG 0.000214855669561 6.33992568198e-05 +GTGCCA 0.000326854901566 0.000154095415882 +GTTGCG 7.08566569829e-05 7.39657996231e-05 +GTGCCC 0.000370283175201 8.36517971928e-05 +CCGCCG 0.000162284601477 8.1010161492e-05 +AGGCTA 8.22851500447e-05 0.000195481041861 +CCTTCC 0.000226284162623 0.000137365056443 +AGGCTC 0.000137141916741 0.00014881214448 +CCGCCC 0.000134856218129 8.27712519592e-05 +TGTCGA 9.59993417188e-05 0.000100382156631 +CCGCCA 0.000146284711191 0.000109187608967 +AGGCTG 0.000372568873813 0.0003196379198 +GTGCCT 0.000283426627932 0.000159378687283 +TCCAAA 0.000644567008683 0.000405931352694 +GTTGCT 0.000278855230707 0.000221897398869 +GGATAT 0.000281140929319 0.000276491203353 +TCGGGT 6.17138625335e-05 6.86825282215e-05 +ATTTGA 1.14284930618e-05 0.000596129123153 +GCAGTT 0.000262855340421 0.000292341017558 +GAAGCT 0.000687995282318 0.000288818836624 +GGATAA 1.14284930618e-05 0.000176109046722 +GGATAC 0.000370283175201 0.000105665428033 +TCGGGG 7.99994514323e-05 6.78019829879e-05 +GGATAG 4.5713972247e-06 9.50988852297e-05 +GAAGCG 0.000246855450134 0.000125917968406 +CCGCCT 0.000164570300089 8.18907067256e-05 +GCAGTG 0.000642281310071 0.000377753905218 +GAAGCC 0.000575996050313 0.000200764313263 +GCAGTA 0.000230855559848 0.000235105577374 +GAAGCA 0.000557710461414 0.000388320448021 +GCAGTC 0.000262855340421 0.000201644858496 +CGCCCG 4.79996708594e-05 3.52218093443e-05 +GAGAGA 0.000859422678244 0.000743180177166 +CGCCCC 5.25710680841e-05 6.33992568198e-05 +CGCCCA 5.02853694718e-05 6.78019829879e-05 +AGTTTG 0.000278855230707 0.000456122431009 +ACGCAG 0.000278855230707 0.000157617596816 +ACGCAC 0.000107427834781 0.000151453780181 +ACGCAA 9.59993417188e-05 0.000138245601677 +CTCGCA 0.000105142136168 0.000128559604107 +ACGTAC 0.000153141807028 6.42798020534e-05 +CCCTAG 2.28569861235e-06 4.75494426149e-05 +ACGCAT 7.54280542076e-05 0.000125037423172 +AGTTTC 0.000278855230707 0.000325801736435 +GCCGGG 0.000105142136168 7.13241639223e-05 +TACCCG 9.82850403312e-05 4.93105330821e-05 +GCCGGC 0.00019428438205 7.48463448567e-05 +CATGTT 0.000239998354297 0.000438511526337 +GCCGGA 0.000242284052909 9.6859975697e-05 +TTGGGG 0.000130284820904 0.000169064684853 +AGATCT 0.000157713204252 0.000294982653259 +ACGTAG 4.5713972247e-06 5.81159854182e-05 +GCCACT 0.000422854243285 0.000197242132328 +CATGTA 8.91422458817e-05 0.000251835936812 +GTTGCA 0.000230855559848 0.000249194301111 +CATGTC 0.000182855888988 0.000287057746156 +GCCGGT 0.000196570080662 7.83685257912e-05 +CATGTG 0.000402282955774 0.000420900621665 +GTGCGA 0.000189712984825 9.33377947625e-05 +TACCCC 0.000178284491763 7.39657996231e-05 +GATTGG 0.000173713094539 0.000217494672701 +GGCCTG 0.000530282078066 0.000207808675132 +GGCCTC 0.000251426847359 0.000169945230086 +GGCCTA 9.82850403312e-05 9.86210661642e-05 +TTTGTT 0.000381711668263 0.00105929591603 +AATCGG 8.68565472694e-05 7.04436186887e-05 +AATCGC 0.000102856437556 0.000106545973267 +AATCGA 0.000102856437556 0.000106545973267 +TTCCCA 0.000315426408505 0.000340771005407 +GGCCTT 0.000230855559848 0.000153214870648 +TCACTA 8.4570848657e-05 0.000221016853636 +TCACTC 0.000157713204252 0.000279132839054 +AATCGT 0.000102856437556 0.000125037423172 +AGCCGA 0.000150856108415 0.000125037423172 +CCTTCA 0.000278855230707 0.000338129369706 +AGCCGC 0.000180570190376 0.000115351425603 +TAATTG 0.0 0.000299385379427 +AAAAAT 0.000404568654386 0.00112357571808 +AGCCGG 0.000102856437556 8.80545233609e-05 +GTTTTT 0.000230855559848 0.000942183399961 +TGGTGA 2.51426847359e-05 0.000307310286529 +GATGGA 0.000930279335227 0.000417378440731 +TGGTGC 0.00011657062923 0.000163781413451 +CCAAGT 0.000159998902865 0.000153214870648 +GGGCTG 0.000299426518218 0.000199883768029 +TGGTGG 0.000187427286213 0.000250074846345 +GATGGG 0.000406854352999 0.000184914499058 +AAAAAA 0.000642281310071 0.00215997745804 +AAAAAC 0.000630852817009 0.000858531602768 +GAGCGA 0.000322283504342 0.000157617596816 +AGCCGT 0.000130284820904 0.000131201239808 +AAAAAG 0.000934850732452 0.000810982160154 +TGACAG 2.28569861235e-06 0.000421781166899 +CCAAGG 8.22851500447e-05 0.000108307063734 +CTACAT 0.000107427834781 0.000283535565222 +TCTCAG 0.000635424214234 0.000316115738866 +CCAAGC 0.000196570080662 0.000119754151771 +TGGTGT 0.000118856327842 0.000346054276808 +CCAAGA 0.00019428438205 0.000178750682423 +AGAACT 0.000169141697314 0.000240388848775 +GCATTA 0.000162284601477 0.000257119208214 +CCTTCG 6.62852597582e-05 5.37132592501e-05 +TACCAT 0.000185141587601 0.000150573234947 +AGATAG 2.28569861235e-06 0.000122395787472 +CACTGT 0.000258283943196 0.000357501364845 +ACCCGA 7.771375282e-05 5.98770758854e-05 +ATGTCT 0.000436568434959 0.000411214624095 +ACCCGC 0.000143999012578 7.83685257912e-05 +AGATAA 6.85709583706e-06 0.000295863198493 +TACCAC 0.000235426957072 0.000117993061304 +AGAACG 6.85709583706e-05 0.000134723420742 +TACCAA 0.000178284491763 0.000172586865787 +AGAACA 0.000169141697314 0.000438511526337 +TACCAG 0.000541710571128 0.00014881214448 +AGAACC 0.000187427286213 0.000176989591955 +ATGTCA 0.000274283833482 0.000416497895497 +CACTGG 0.000265141039033 0.000346934822042 +AGATAT 0.000153141807028 0.000324921191202 +TGACAA 0.0 0.000413856259796 +ACCAGG 0.000164570300089 0.00016113977775 +CACTGC 0.000285712326544 0.000380395540919 +ATGTCG 0.000166855998702 0.0001074265185 +CACTGA 2.28569861235e-05 0.000491344240354 +TAGGTT 0.0 0.000133842875509 +GTTTAC 0.000313140709892 0.000343412641107 +AATTAT 0.000157713204252 0.000588204216051 +CTGTTG 0.000415997147448 0.000425303347833 +TGCTAT 0.000157713204252 0.000201644858496 +CTGTTC 0.000489139503043 0.00032932391737 +CGGGTT 0.000105142136168 6.33992568198e-05 +CTGTTA 0.000150856108415 0.000281774474755 +AATTAC 0.000230855559848 0.000324921191202 +AATTAA 1.82855888988e-05 0.00073437472483 +AATTAG 4.5713972247e-06 0.000304668650829 +TAGGTC 0.0 8.98156138281e-05 +CGGGTC 0.000139427615353 7.30852543895e-05 +CTGTTT 0.000726852158728 0.000700914005952 +CGGGTA 3.19997805729e-05 4.0505080746e-05 +CGGGTG 0.000148570409803 8.62934328936e-05 +CCAAAC 0.000406854352999 0.000334607188771 +CTTCGG 9.59993417188e-05 7.57268900903e-05 +CACACT 0.000399997257162 0.000456122431009 +TGGATG 0.000351997586302 0.00038303717662 +AGCAAA 0.000687995282318 0.000519521687829 +ATATAT 9.14279444941e-05 0.000750224539035 +AGCAAC 0.00050513939333 0.00023422503214 +TTAGGC 7.99994514323e-05 0.00014881214448 +AAAGCT 0.000658281200357 0.000434989345403 +AGCAAG 0.00053485347529 0.000240388848775 +AAAGCA 0.000740566350402 0.000641917475301 +ATATAG 2.28569861235e-06 0.000206047584664 +TTAGGT 6.17138625335e-05 0.000154095415882 +ATATAA 6.85709583706e-06 0.000524804959231 +AGCAAT 0.000358854682139 0.000283535565222 +ATATAC 0.000114284930618 0.000285296655689 +ATAGCA 0.000148570409803 0.000194600496628 +ATAGCC 0.000130284820904 0.000131201239808 +TACTTG 0.000201141477887 0.000215733582234 +ATAGCG 8.68565472694e-05 5.81159854182e-05 +TCTCAC 0.000226284162623 0.000245672120177 +TGCTAA 6.85709583706e-06 0.000280013384288 +ATAGCT 0.000111999232005 0.000168184139619 +TACTTT 0.000303997915443 0.000440272616804 +GTTTTC 0.000329140600179 0.000482538788018 +CGAGTT 9.59993417188e-05 8.45323424264e-05 +CTCGCC 0.000201141477887 0.000100382156631 +CGTACT 5.48567666965e-05 5.37132592501e-05 +TCCCGT 0.00011657062923 8.36517971928e-05 +ACATAC 0.00027199813487 0.000307310286529 +CTTAAG 0.000185141587601 0.000151453780181 +ACATAA 1.59998902865e-05 0.000424422802599 +CTTAAA 0.000333711997403 0.000584682035116 +AGGGGT 0.000118856327842 0.000166423049152 +CTTAAC 0.000157713204252 0.000156737051582 +CGTACA 6.17138625335e-05 9.50988852297e-05 +CGAGTG 0.000237712655685 0.000104784882799 +TTGTTA 8.22851500447e-05 0.000396245355124 +CGAGTA 6.17138625335e-05 5.72354401846e-05 +CGAGTC 0.000105142136168 0.000103904337566 +AGGGGA 0.000123427725067 0.000206928129898 +CTTAAT 0.000196570080662 0.000316996284099 +AGGGGC 0.000105142136168 0.000111829244668 +TATGCA 0.000281140929319 0.000361904091013 +AGGGGG 6.62852597582e-05 0.000169064684853 +TCATTA 0.000130284820904 0.000492224785587 +CGACTT 7.08566569829e-05 6.95630734551e-05 +TCGGGC 0.000100570738944 6.42798020534e-05 +TTGTAA 6.85709583706e-06 0.000508074599792 +TTGTAC 0.000143999012578 0.000223658489337 +GTACCT 0.000125713423679 0.000110948699435 +TCATTG 0.000139427615353 0.000375992814751 +TGAATC 2.28569861235e-06 0.000338129369706 +CGACTA 4.11425750223e-05 3.25801736435e-05 +TGAATA 2.28569861235e-06 0.000377753905218 +CGACTC 0.000123427725067 9.42183399961e-05 +TGAATG 4.5713972247e-06 0.000569712766145 +AAGTAT 0.000335997696016 0.000251835936812 +CGACTG 0.000249141148746 0.000142648327845 +TTGCCA 0.000226284162623 0.000277371748587 +TTGTAT 0.000137141916741 0.000427944983534 +ATCGTG 0.000333711997403 7.22047091559e-05 +GCTTCG 9.14279444941e-05 7.30852543895e-05 +ATCGTC 0.000290283723769 9.33377947625e-05 +ATCGTA 8.91422458817e-05 7.22047091559e-05 +TGGGAG 0.000498282297493 0.00020252540373 +TGGGAA 0.000260569641808 0.000263283024849 +TGGGAC 0.000331426298791 0.000166423049152 +GCTTCT 0.000253712545971 0.000313474103165 +ATCGTT 0.000137141916741 0.000104784882799 +GATTGA 1.82855888988e-05 0.000284416110456 +TGGGAT 0.000313140709892 0.000208689220365 +GGGCTA 7.31423555953e-05 0.000112709789902 +CTAGCC 0.000109713533393 6.0757621119e-05 +CTAGCA 0.000125713423679 0.00011711251607 +CTAGCG 5.94281639212e-05 3.96245355124e-05 +CGGTAG 0.0 5.54743497173e-05 +ACTTAG 2.28569861235e-06 9.77405209306e-05 +ACTTAA 4.5713972247e-06 0.00038303717662 +CGGTAC 7.771375282e-05 3.6102354578e-05 +ACTTAC 0.000205712875112 0.000154095415882 +CGGTAA 0.0 8.71739781273e-05 +GACCAG 0.000742852049015 0.000194600496628 +GACCAC 0.000358854682139 0.000191958860927 +TTGCCC 0.000150856108415 0.000104784882799 +CTAGCT 0.000130284820904 9.86210661642e-05 +TTCGAG 0.000315426408505 8.36517971928e-05 +ACTTAT 0.000100570738944 0.000227180670271 +CGGTAT 3.885687641e-05 6.0757621119e-05 +TACACA 0.000395425859937 0.000444675342972 +TACACG 0.000191998683438 9.42183399961e-05 +GCGAGA 7.99994514323e-05 0.000114470880369 +GAGAGC 0.000838851390733 0.000294102108025 +GCGAGC 0.000121142026455 9.86210661642e-05 +GGCCGT 0.000178284491763 6.16381663526e-05 +GCGAGG 7.31423555953e-05 0.000111829244668 +TTAAGA 8.22851500447e-05 0.000324921191202 +CAGCCG 0.00037942596965 0.00015849814205 +CAGCCA 0.000406854352999 0.000349576457743 +CAGCCC 0.000452568325246 0.000173467411021 +TCCGGG 8.4570848657e-05 7.22047091559e-05 +GCGAGT 0.000105142136168 9.86210661642e-05 +CAGCCT 0.000498282297493 0.000283535565222 +GAACTC 0.000308569312668 0.000193719951394 +GAACTA 0.000159998902865 0.000198122677562 +GGACTT 0.000251426847359 0.000167303594386 +CACCTA 7.771375282e-05 0.000147931599246 +AACTAT 0.000299426518218 0.000258880298681 +CACCTC 0.000265141039033 0.000267685751017 +CACCTG 0.00053485347529 0.000320518465034 +TGATCC 2.28569861235e-06 0.000190197770459 +AACACT 0.000484568105819 0.000535371502034 +TGATCA 0.0 0.000358381910079 +TGATCG 0.0 5.72354401846e-05 +TGCATG 0.000317712107117 0.000362784636247 +AACTAG 4.5713972247e-06 0.000140887237377 +GATTTA 0.000169141697314 0.0004270644383 +CACCTT 0.000111999232005 0.000232463941673 +AACTAC 0.000598853036436 0.000227180670271 +AACTAA 2.28569861235e-05 0.000344293186341 +AACACG 0.000196570080662 0.000174347956255 +AACACA 0.000694852378155 0.000831234700527 +AACACC 0.000537139173903 0.00029762428896 +TGATCT 9.14279444941e-06 0.0003196379198 +CCAGCA 0.000473139612757 0.00041473680503 +CTATAC 6.39995611459e-05 0.000111829244668 +CAACAG 0.000411425750223 0.000383917721853 +GACAGT 0.00058285314615 0.000243030484476 +CAACAA 0.000338283394628 0.000508955145026 +CCAGCC 0.00034971188769 0.000223658489337 +CAACAC 0.000228569861235 0.000411214624095 +GGCCGG 0.000141713313966 7.74879805576e-05 +CTATAA 1.14284930618e-05 0.000236866667841 +GACAGG 0.00037942596965 0.000248313755878 +CAACAT 0.000139427615353 0.000429706074001 +GACAGC 0.000937136431064 0.000289699381857 +GACAGA 0.000566853255863 0.000430586619235 +GTATTA 5.71424653088e-05 0.000286177200923 +GTATTC 9.14279444941e-05 0.000167303594386 +GTAAGT 5.25710680841e-05 0.000155856506349 +CCAGCG 0.000169141697314 0.000174347956255 +AGAGGT 0.000274283833482 0.000250074846345 +GGTGGC 0.000299426518218 0.000137365056443 +TCCCCT 0.000205712875112 0.000189317225226 +GGTGGA 0.00066056689897 0.000254477572513 +CGTCAT 8.22851500447e-05 0.000164661958685 +GGTGGG 0.000169141697314 0.000128559604107 +AGAGGC 0.000251426847359 0.000214853037001 +GTAAGA 5.02853694718e-05 0.000164661958685 +AGAGGA 0.00058285314615 0.000467569519046 +CGCCCT 5.71424653088e-05 0.000103904337566 +AGAGGG 0.000187427286213 0.000321399010267 +GTAAGG 5.25710680841e-05 0.000123276332705 +CGTCAA 7.99994514323e-05 0.000106545973267 +CGTCAC 9.82850403312e-05 0.000125917968406 +TCCCCC 0.000123427725067 0.000173467411021 +GGTGGT 0.000402282955774 0.00018051177289 +CGTCAG 0.000274283833482 0.000125917968406 +GTCCGG 0.000107427834781 6.86825282215e-05 +GTCGCA 9.82850403312e-05 7.04436186887e-05 +GTCGCC 0.000125713423679 7.39657996231e-05 +CCGTCT 0.000228569861235 0.000151453780181 +TCGGTA 2.51426847359e-05 6.0757621119e-05 +CTCGCG 6.17138625335e-05 5.37132592501e-05 +TCGGTG 0.000198855779275 9.86210661642e-05 +GTCGCT 0.000143999012578 9.6859975697e-05 +TACCTA 9.82850403312e-05 8.98156138281e-05 +GTAACT 8.4570848657e-05 0.00015849814205 +CCGTCG 5.94281639212e-05 4.93105330821e-05 +CCGTCA 0.000130284820904 0.000118873606537 +CCGTCC 0.000226284162623 8.62934328936e-05 +GGCTGT 0.000342854791853 0.000280893929521 +CGCGTG 0.000111999232005 9.06961590617e-05 +CTATAT 5.71424653088e-05 0.000227180670271 +CGCGTC 7.54280542076e-05 5.98770758854e-05 +CGCGTA 2.05712875112e-05 3.34607188771e-05 +GTTTTA 0.000203427176499 0.000717644365391 +ATGCAG 0.000829708596284 0.000402409171759 +CCACTA 7.31423555953e-05 0.000146170508779 +CCACTC 0.000166855998702 0.000191078315693 +CGCGTT 4.34282736347e-05 7.30852543895e-05 +GGCTGG 0.000441139832184 0.000206928129898 +GGCTGA 1.59998902865e-05 0.000286177200923 +GGCTGC 0.00034971188769 0.000226300125037 +ATGCAT 0.000205712875112 0.000442033707272 +TTTAGA 9.59993417188e-05 0.000415617350263 +TTTCAT 0.000210284272336 0.000620784389694 +TTGACC 0.000207998573724 0.000221016853636 +ATGACG 0.000178284491763 0.000153214870648 +GGAGTT 0.000338283394628 0.000207808675132 +ATGACA 0.000358854682139 0.000479016607083 +ATGACC 0.00050513939333 0.00018051177289 +CGGGCC 0.000127999122292 6.60408925206e-05 +GGGCCT 0.000207998573724 0.000146170508779 +TAAGAA 4.5713972247e-06 0.000301146469894 +ATGACT 0.000317712107117 0.00033196555307 +GGAGTA 0.000153141807028 0.000128559604107 +GGAGTC 0.000372568873813 0.000168184139619 +GGAGTG 0.000614852926723 0.000255358117747 +GGGCCG 0.000121142026455 7.48463448567e-05 +GGGCCC 0.000137141916741 6.86825282215e-05 +GGGCCA 0.000201141477887 0.000150573234947 +GGCAAG 0.000473139612757 0.000114470880369 +ACGAGT 5.25710680841e-05 8.89350685945e-05 +AGGCCA 0.000159998902865 0.000201644858496 +AGGCCC 9.37136431064e-05 0.000124156877939 +AGGCCG 9.37136431064e-05 7.04436186887e-05 +ACGAGG 5.02853694718e-05 0.000105665428033 +ACGAGC 0.000105142136168 0.000118873606537 +GACCCG 0.000267426737645 5.89965306518e-05 +ACGAGA 5.71424653088e-05 0.000132962330275 +GGCAAC 0.000395425859937 0.000123276332705 +AGGCCT 0.000146284711191 0.000204286494197 +AGACGG 0.000121142026455 0.000132081785041 +GGGTTG 0.000132570519516 0.000172586865787 +TTTGGG 0.00027199813487 0.000316115738866 +GGGTTA 4.5713972247e-05 0.000143528873078 +GGGTTC 0.000118856327842 8.98156138281e-05 +TTTGAT 0.000722280761503 0.000606695665956 +TGAACT 4.5713972247e-06 0.000413856259796 +CTACGA 6.85709583706e-05 4.0505080746e-05 +ATTGCT 0.00042742564051 0.000289699381857 +GGGTTT 0.00019428438205 0.000313474103165 +TACAAG 0.000658281200357 0.000215733582234 +TGCGAG 0.000226284162623 0.000105665428033 +AACGGC 0.000470853914145 0.000102143247099 +ATCCAT 0.000185141587601 0.000263283024849 +ATTGCA 0.000381711668263 0.000332846098304 +TTTGGC 0.000411425750223 0.000282655019988 +ATTGCG 9.82850403312e-05 9.15767042953e-05 +TTTGAC 0.000735994953177 0.000437630981103 +TTTGAA 0.000619424323947 0.000700914005952 +CGGGCT 0.00011657062923 7.04436186887e-05 +CTCGTA 3.885687641e-05 7.57268900903e-05 +CTCGTC 0.000153141807028 0.000146170508779 +CCCTGG 0.000201141477887 0.000133842875509 +CCCTGA 1.37141916741e-05 0.000210450310832 +AAGCTT 0.000436568434959 0.000290579927091 +CCCTGC 0.000262855340421 0.000235986122607 +GTGTAA 2.05712875112e-05 0.00033196555307 +CCCTGT 0.000173713094539 0.000204286494197 +AAGCTC 0.000484568105819 0.000271207931951 +GATATA 0.000187427286213 0.000216614127468 +AAGCTA 0.000175998793151 0.000198122677562 +GATATG 0.000317712107117 0.000179631227656 +AAGCTG 0.00115884919646 0.000424422802599 +TGACGC 0.0 0.000156737051582 +TACTCT 0.000251426847359 0.000185795044291 +CATCGT 7.08566569829e-05 9.6859975697e-05 +GTAGTC 7.08566569829e-05 0.000104784882799 +CAGGGT 0.000463996818308 0.000135603965976 +CCAGTC 0.000308569312668 0.000207808675132 +TACGAT 0.000171427395926 7.74879805576e-05 +CCAGTG 0.000539424872515 0.00027561065812 +TACTCG 0.000125713423679 6.16381663526e-05 +CATCGA 7.31423555953e-05 0.000111829244668 +CATCGC 0.000114284930618 0.0001074265185 +TACTCC 0.000283426627932 0.000111829244668 +TACTCA 0.000139427615353 0.000200764313263 +CATCGG 5.94281639212e-05 7.48463448567e-05 +CAGGGG 0.000235426957072 0.000176989591955 +CCAGTT 0.000198855779275 0.000199003222796 +CAGGGC 0.000521139283616 0.000163781413451 +AATCTG 0.000562281858639 0.000393603719423 +CGGACT 6.62852597582e-05 7.92490710248e-05 +AATCTC 0.000237712655685 0.000292341017558 +AATCTA 0.000159998902865 0.000271207931951 +TGAACG 0.0 0.000167303594386 +TCACAC 0.000178284491763 0.000397125900358 +TCACAA 0.000207998573724 0.000412095169329 +TAAATT 0.0 0.000581159854182 +GCCATA 0.000228569861235 0.000166423049152 +CGGACG 9.14279444941e-05 6.16381663526e-05 +ACATCG 7.771375282e-05 8.541288766e-05 +CGGACA 9.82850403312e-05 0.000122395787472 +CGGACC 8.22851500447e-05 4.31467164468e-05 +GCCATG 0.000820565801835 0.000219255763169 +TTTTAA 1.59998902865e-05 0.00151101562087 +TCACAT 0.000102856437556 0.000405931352694 +ATTAAC 0.000207998573724 0.000333726643538 +ATTAAA 0.000514282187779 0.000909603226318 +ATTAAG 0.000313140709892 0.000326682281669 +TTTTAC 0.00031085501128 0.00066128947044 +TCACTT 0.000123427725067 0.000379514995685 +ATTCGT 0.000153141807028 8.89350685945e-05 +GTGCAG 0.00085485128102 0.000325801736435 +TTACTA 7.31423555953e-05 0.000274730112886 +ATTAAT 0.000205712875112 0.00068330310128 +TACCTG 0.000749709144852 0.000194600496628 +GGTATG 0.000173713094539 0.000116231970836 +CTGAAT 0.000546281968352 0.000502791328391 +GGTATC 0.000121142026455 7.48463448567e-05 +CGAGAG 0.000393140161325 0.000154095415882 +CTGAAA 0.000927993636615 0.000642798020534 +CTGAAC 0.000717709364279 0.00038303717662 +TCGACC 6.17138625335e-05 5.89965306518e-05 +GGAACC 0.000278855230707 9.95016113978e-05 +CTGAAG 0.00178055921902 0.000569712766145 +CATTGC 6.17138625335e-05 0.000254477572513 +TATGAG 0.000685709583706 0.000213091946533 +CATTGA 4.5713972247e-06 0.000344293186341 +CTAGTT 9.14279444941e-05 0.000164661958685 +CATTGG 8.4570848657e-05 0.000244791574943 +TCAACT 0.000125713423679 0.00032932391737 +ATGTTA 0.000139427615353 0.000413856259796 +ACACCG 0.000180570190376 0.000112709789902 +CTGCCA 0.000420568544673 0.00030995192223 +CTGCCC 0.000500567996105 0.000160259232517 +ACACCC 0.000214855669561 0.00017082577532 +ACACCA 0.000335997696016 0.00032932391737 +CTGCCG 0.000285712326544 0.000177870137189 +CTAGTG 0.000217141368173 0.000124156877939 +TCAACC 0.000130284820904 0.000155856506349 +ATGTTC 0.000381711668263 0.000321399010267 +CTAGTC 9.59993417188e-05 8.27712519592e-05 +TCAACG 7.99994514323e-05 7.39657996231e-05 +CTAGTA 4.5713972247e-05 0.000100382156631 +CATTGT 0.000100570738944 0.00035133754821 +TATGAC 0.000514282187779 0.000207808675132 +GTAATG 0.000139427615353 0.000254477572513 +ACACCT 0.000267426737645 0.000312593557931 +CTGCCT 0.000463996818308 0.00025623866298 +CCTCGT 0.000150856108415 0.000104784882799 +TTGTGT 0.000148570409803 0.000652484018104 +AGACTG 0.000411425750223 0.000393603719423 +AGACTC 0.000262855340421 0.000235105577374 +AGACTA 0.000105142136168 0.000141767782611 +CCTCGG 9.82850403312e-05 9.06961590617e-05 +CCTCGC 0.000159998902865 8.36517971928e-05 +CCCGAC 0.000164570300089 5.81159854182e-05 +CCTCGA 0.000143999012578 9.77405209306e-05 +CGAGGA 0.000239998354297 0.000137365056443 +AGACTT 0.000175998793151 0.000266805205783 +CGAGGG 0.000121142026455 7.22047091559e-05 +CACCGT 0.000105142136168 9.33377947625e-05 +ACTTTA 0.000189712984825 0.000479897152317 +ACTTTG 0.000214855669561 0.000360143000546 +TAGCTA 0.0 0.000130320694574 +GACCTG 0.00115427779924 0.000229822305972 +GTCGCG 6.17138625335e-05 4.57883521476e-05 +GACCTA 0.000137141916741 8.45323424264e-05 +GACCTC 0.000518853585004 0.00012944014934 +CACCGG 0.000134856218129 0.000100382156631 +CTATTG 7.31423555953e-05 0.000183153408591 +CACCGA 8.91422458817e-05 0.000120634697004 +ACTTTT 0.000262855340421 0.000735255270063 +GACCTT 0.000315426408505 0.000198122677562 +GGACGC 0.00015542750564 0.000123276332705 +GGACGG 0.000132570519516 9.6859975697e-05 +CTATTA 5.02853694718e-05 0.000250074846345 +CACAGC 0.000459425421083 0.000460525157177 +CTTATC 0.000127999122292 0.000114470880369 +CACAGA 0.000425139941898 0.00056354894951 +CTTATA 8.4570848657e-05 0.000177870137189 +CACAGG 0.000226284162623 0.000283535565222 +CTTATG 0.000146284711191 0.000172586865787 +ACTCAT 0.000169141697314 0.000282655019988 +CGTATA 3.19997805729e-05 6.0757621119e-05 +GACGTA 9.37136431064e-05 8.71739781273e-05 +CGTATC 0.00011657062923 4.93105330821e-05 +CTATTC 7.08566569829e-05 0.000137365056443 +GAGGAC 0.00139884755076 0.000229822305972 +CGTATG 0.000132570519516 8.71739781273e-05 +AGCGAT 0.000265141039033 0.00014881214448 +TTAACA 0.000118856327842 0.000496627511755 +ACTCAC 0.000203427176499 0.00025623866298 +TTAACC 9.37136431064e-05 0.000240388848775 +ACTCAA 0.000228569861235 0.00035133754821 +ACTCAG 0.000461711119695 0.000228941760738 +CTTATT 0.000198855779275 0.000378634450452 +CACAGT 0.000345140490465 0.000324921191202 +AGCGAC 0.000372568873813 8.541288766e-05 +TAAACA 2.28569861235e-06 0.000711480548756 +AGCGAA 0.000196570080662 0.00012679851364 +GAGGAT 0.00129827681182 0.00032932391737 +AGCGAG 0.000454854023858 0.000147931599246 +CGTATT 9.59993417188e-05 9.50988852297e-05 +TAAACG 0.0 0.000165542503918 +AGTTCC 0.000118856327842 0.000169945230086 +GTATGT 7.08566569829e-05 0.000241269394009 +AGTTCA 0.000255998244583 0.000347815367275 +TACTTA 5.25710680841e-05 0.000172586865787 +AGTTCG 3.65711777976e-05 6.25187115862e-05 +TAGCCC 0.0 8.27712519592e-05 +CTGTGA 3.885687641e-05 0.000429706074001 +CGGGGT 5.71424653088e-05 6.33992568198e-05 +TAGGAC 0.0 0.000109187608967 +GCAAAG 0.000413711448836 0.000285296655689 +TAGGAA 2.28569861235e-06 0.000173467411021 +GCAAAA 0.000370283175201 0.000486941514186 +TAGGAG 4.5713972247e-06 0.000123276332705 +GCAAAC 0.000361140380752 0.000366306817181 +CGGGGC 0.00011657062923 7.92490710248e-05 +CGGGGA 6.85709583706e-05 8.1010161492e-05 +CTGTGT 0.000409140051611 0.000486941514186 +CGGGGG 4.34282736347e-05 5.72354401846e-05 +GTATGC 5.94281639212e-05 0.000133842875509 +AGTTCT 0.000198855779275 0.000258880298681 +GCAAAT 0.000185141587601 0.000375112269517 +TAGGAT 0.0 0.00016113977775 +TAATTA 0.0 0.000550340771005 +CTATTT 7.99994514323e-05 0.000404170262226 +TGCTTT 0.000249141148746 0.000682422556047 +GTCTAT 0.000196570080662 0.00016113977775 +GTAATT 7.771375282e-05 0.000316996284099 +GTTACC 0.000114284930618 9.6859975697e-05 +GTTACA 0.000153141807028 0.000230702851205 +TAATTC 0.0 0.000300265924661 +GTTACG 4.79996708594e-05 5.81159854182e-05 +TATCGC 0.000118856327842 5.10716235493e-05 +TATAGT 5.94281639212e-05 0.000187556134759 +GTCTAA 1.82855888988e-05 0.000187556134759 +AGGACG 8.91422458817e-05 0.000124156877939 +GTCTAC 0.00027199813487 0.000125037423172 +TGCTTC 0.000347426189078 0.000321399010267 +TTCAAA 0.00058285314615 0.000655125653805 +GGCGTT 0.000111999232005 8.98156138281e-05 +GGGCTC 0.000159998902865 0.000135603965976 +TATAGC 5.48567666965e-05 0.00015849814205 +TATCGT 8.22851500447e-05 6.86825282215e-05 +TATAGA 7.99994514323e-05 0.00024655266541 +AGTGGG 0.00019428438205 0.000153214870648 +TATAGG 3.65711777976e-05 0.000111829244668 +GTGGCG 0.000338283394628 0.000104784882799 +GTTACT 0.000139427615353 0.000178750682423 +GCTCTC 0.000372568873813 0.000255358117747 +AACCTT 0.000285712326544 0.000282655019988 +GATGGC 0.000587424543375 0.000219255763169 +GCTCTG 0.00115656349785 0.000364545726714 +CCACGT 0.000141713313966 0.000106545973267 +CAATAT 0.000127999122292 0.000350457002976 +CAGGAG 0.00167770278147 0.000349576457743 +GAATGG 0.000313140709892 0.000263283024849 +GAATGA 2.51426847359e-05 0.000465808428579 +ACTACT 0.000164570300089 0.000187556134759 +GAATGC 0.000235426957072 0.0002122114013 +CCACGA 0.000130284820904 6.5160347287e-05 +CAATAC 0.000148570409803 0.000176109046722 +CCACGC 0.000146284711191 9.6859975697e-05 +CAATAA 1.37141916741e-05 0.000464927883345 +GTTCCG 8.22851500447e-05 3.69828998116e-05 +CAATAG 4.5713972247e-06 0.000189317225226 +CCACGG 8.68565472694e-05 8.71739781273e-05 +AACCTA 0.000146284711191 0.000137365056443 +GAATGT 0.000278855230707 0.000374231724284 +ACTACC 9.37136431064e-05 8.45323424264e-05 +GGCTTC 0.000653709803133 0.000171706320554 +ACTACA 0.000203427176499 0.000316115738866 +ACTACG 7.08566569829e-05 5.54743497173e-05 +GGCTTG 0.000239998354297 0.000182272863357 +CGTTGA 2.28569861235e-06 8.541288766e-05 +AGTCAG 0.000445711229409 0.000307310286529 +CGTTGC 4.79996708594e-05 6.16381663526e-05 +TAACAC 2.28569861235e-06 0.000257119208214 +TCCTAC 0.00031085501128 0.000117993061304 +AGTCAC 0.000164570300089 0.000228941760738 +CGTTGG 3.65711777976e-05 7.39657996231e-05 +AGTCAA 0.000169141697314 0.000367187362415 +TCATCG 7.99994514323e-05 0.000145289963545 +TGAGGG 0.0 0.000203405948964 +TCATCC 0.000290283723769 0.000302027015128 +GAGCGG 0.000287998025156 0.000131201239808 +CTTTCT 0.00027199813487 0.000419139531198 +AGTCAT 0.000164570300089 0.000294102108025 +TCCTAT 0.000150856108415 0.000130320694574 +TAACAT 2.28569861235e-06 0.000416497895497 +CGTTGT 5.25710680841e-05 9.95016113978e-05 +CTTTCC 0.00019428438205 0.000247433210644 +CTTTCA 0.000226284162623 0.000451719704841 +CTTTCG 6.17138625335e-05 9.33377947625e-05 +GAGCGC 0.000450282626633 0.000121515242238 +GTTGAT 0.000329140600179 0.000257119208214 +CGACCA 9.82850403312e-05 7.74879805576e-05 +CGACCC 0.000105142136168 5.10716235493e-05 +GCATAC 0.000141713313966 0.00015849814205 +CGACCG 4.79996708594e-05 4.57883521476e-05 +CCTTAT 0.000185141587601 0.000172586865787 +GTTGAC 0.000274283833482 0.00030995192223 +GTTGAA 0.000434282736347 0.000358381910079 +ACTGGG 0.000210284272336 0.000167303594386 +GTTGAG 0.000404568654386 0.000237747213074 +TTAGGG 9.37136431064e-05 0.000153214870648 +CCTTAC 0.000214855669561 0.000120634697004 +TTCGAT 0.000178284491763 6.69214377543e-05 +CCTTAA 2.05712875112e-05 0.000252716482046 +CCTTAG 0.0 5.89965306518e-05 +CGACCT 9.82850403312e-05 8.27712519592e-05 +CAACTC 0.000189712984825 0.000199883768029 +CAACTA 0.000118856327842 0.000153214870648 +CAACTG 0.000255998244583 0.000291460472324 +AATAGG 5.25710680841e-05 0.000176109046722 +GATGGT 0.000498282297493 0.000233344486906 +GGGGTT 0.000148570409803 0.000145289963545 +GGATGT 0.000157713204252 0.000315235193632 +AATAGT 7.99994514323e-05 0.000273849567652 +GGGCTT 0.000143999012578 0.000152334325414 +CAACTT 0.000143999012578 0.000370709543349 +GGATGC 0.000141713313966 0.000165542503918 +GGATGA 9.14279444941e-06 0.000310832467464 +GGATGG 0.000207998573724 0.000221897398869 +GGGGTA 3.885687641e-05 7.92490710248e-05 +TCTCCG 0.000189712984825 0.000155856506349 +GAGTCC 0.00041828284606 0.000165542503918 +AAACTG 0.00102170727972 0.000655125653805 +TCTCCC 0.000239998354297 0.0002122114013 +AAACTA 0.000308569312668 0.000424422802599 +TCTCCA 0.000564567557251 0.000453480795308 +AAACTC 0.000653709803133 0.000423542257366 +ACGCGA 2.74283833482e-05 6.16381663526e-05 +AGTACG 2.28569861235e-05 5.10716235493e-05 +ACGCGC 8.4570848657e-05 8.71739781273e-05 +AGTACC 5.94281639212e-05 8.36517971928e-05 +ACGCGG 5.94281639212e-05 6.69214377543e-05 +AGTACA 0.000137141916741 0.000253597027279 +AAACTT 0.000317712107117 0.000452600250075 +CCCTTC 0.000358854682139 0.000164661958685 +TCTCCT 0.000441139832184 0.000324040645968 +CCCTTG 0.000109713533393 0.000124156877939 +TTCTAT 0.000230855559848 0.000214853037001 +AGTACT 9.37136431064e-05 0.00014881214448 +ACGCGT 5.48567666965e-05 6.5160347287e-05 +GCCGAA 0.000187427286213 9.95016113978e-05 +GTGGAG 0.00189255845103 0.000370709543349 +GCCGAC 0.000210284272336 6.69214377543e-05 +GTGGAC 0.00112684941589 0.000196361587095 +GCCGAG 0.000425139941898 0.000117993061304 +GTGGAA 0.000653709803133 0.00030995192223 +TATCAT 0.000141713313966 0.000280893929521 +GTGGAT 0.000804565911548 0.000286177200923 +GCCATC 0.000927993636615 0.000211330856066 +GCCGAT 0.000166855998702 6.95630734551e-05 +CCTATT 0.000205712875112 0.000147051054013 +ACCCGG 7.08566569829e-05 5.54743497173e-05 +ACCGAG 0.000269712436258 0.000103904337566 +AAATCG 8.91422458817e-05 0.00016113977775 +CACGCG 7.08566569829e-05 9.59794304633e-05 +TTATCG 2.28569861235e-05 7.48463448567e-05 +ACCGAC 0.000173713094539 7.83685257912e-05 +AAATCC 0.000390854462712 0.000321399010267 +ACCGAA 0.000214855669561 0.000122395787472 +AAATCA 0.000386283065488 0.000664811651375 +GGCTTT 0.000445711229409 0.000315235193632 +CTTTGA 1.59998902865e-05 0.000361904091013 +AGATAC 0.000226284162623 0.000132962330275 +TCCCGA 6.85709583706e-05 8.18907067256e-05 +CACGCT 0.000123427725067 0.00011711251607 +AAATCT 0.000404568654386 0.00056619058521 +ACCGAT 0.000182855888988 7.39657996231e-05 +GTAGTT 8.91422458817e-05 0.000222777944103 +ACCAGT 0.000319997805729 0.000201644858496 +GATGAT 0.001220563059 0.000435869890636 +GGAGCA 0.000521139283616 0.000296743743726 +GGAGCC 0.000532567776678 0.000182272863357 +CCGGCT 0.000169141697314 8.541288766e-05 +CCAAAT 0.000235426957072 0.000330204462603 +GGAGCG 0.00023314125846 0.000133842875509 +AAAACT 0.00042742564051 0.000662170015674 +GATGAC 0.0010171358825 0.000280893929521 +TGTCCC 0.000125713423679 0.000178750682423 +GATGAA 0.00131656240071 0.000483419333251 +GATGAG 0.00164113160367 0.000305549196062 +AAAACC 0.000511996489167 0.000391842628956 +AAAACA 0.000598853036436 0.00132522057658 +AAAACG 0.000173713094539 0.000302907560361 +GGAGCT 0.000605710132273 0.00027825229382 +CCAAAG 0.000358854682139 0.000239508303542 +TCGCGG 3.19997805729e-05 4.40272616804e-05 +CTAGAG 0.000365711777976 0.000131201239808 +TCGCGC 4.34282736347e-05 4.13856259796e-05 +CTACCG 4.79996708594e-05 3.87439902788e-05 +GCCACA 0.000555424762802 0.000248313755878 +AGATCA 0.000166855998702 0.000339009914939 +GCCACC 0.000518853585004 0.00017082577532 +AGAAAT 0.000246855450134 0.000591726396985 +TCCCGG 8.22851500447e-05 7.92490710248e-05 +GCGCTT 0.000107427834781 0.00012944014934 +GCCACG 0.000237712655685 8.541288766e-05 +AGATCG 4.11425750223e-05 7.13241639223e-05 +ACCAGC 0.000530282078066 0.00027825229382 +TCGCGT 2.51426847359e-05 5.72354401846e-05 +GTGCAA 0.000219427066786 0.000226300125037 +TCCCAT 0.000157713204252 0.000211330856066 +ACCCGT 0.000100570738944 6.78019829879e-05 +AGAAAG 0.000452568325246 0.000552101861473 +CTAGAC 0.000148570409803 9.95016113978e-05 +AGAAAC 0.000404568654386 0.000502791328391 +ATGTCC 0.00037942596965 0.000241269394009 +AGAAAA 0.0003885687641 0.000888470140711 +CTACCC 9.14279444941e-05 7.22047091559e-05 +ATGTAC 0.00049599659888 0.000214853037001 +GACCGA 0.000244569751522 9.95016113978e-05 +ATGTAA 2.28569861235e-05 0.000439392071571 +TATGTG 0.000546281968352 0.000287057746156 +ATGTAG 1.82855888988e-05 0.000267685751017 +CCTATA 8.91422458817e-05 0.000156737051582 +CGAAGT 5.94281639212e-05 8.1010161492e-05 +CCGGCA 0.000107427834781 7.6607435324e-05 +GACGAC 0.000619424323947 0.000110068154201 +CCTATC 9.59993417188e-05 6.86825282215e-05 +ATGTAT 0.000276569532095 0.000434989345403 +ATTAGA 8.22851500447e-05 0.000311713012697 +GGCGTC 0.000223998464011 0.000105665428033 +GACCGG 0.000162284601477 6.25187115862e-05 +AGCACT 0.000475425311369 0.000273849567652 +TTAGAA 0.000173713094539 0.000283535565222 +CGGCTG 0.00031085501128 0.000155856506349 +TTAGAC 0.000173713094539 0.000198122677562 +CGGCTC 0.000164570300089 0.000133842875509 +TTAGAG 0.000308569312668 0.000253597027279 +CGGCTA 3.885687641e-05 4.40272616804e-05 +AGCACC 0.000425139941898 0.000208689220365 +AGCACA 0.00046628251692 0.000427944983534 +AGCACG 0.00015542750564 0.00012679851364 +GGTCGG 7.54280542076e-05 7.57268900903e-05 +CGGCTT 5.71424653088e-05 8.89350685945e-05 +TTAGAT 0.000180570190376 0.000247433210644 +AAAGAC 0.00120684886732 0.000435869890636 +CGAAGC 7.771375282e-05 8.80545233609e-05 +AAAGAA 0.0010354214714 0.000784565803145 +AAAGAG 0.00174398804122 0.000524804959231 +ATATCA 0.000169141697314 0.000335487734005 +GCGCTG 0.000498282297493 0.00022453903457 +TCGCTA 3.885687641e-05 5.28327140165e-05 +TGTGAG 0.000605710132273 0.000401528626526 +TGTGAA 0.000399997257162 0.000595248577919 +CTACCT 9.37136431064e-05 0.0001074265185 +TGTGAC 0.000399997257162 0.000306429741296 +ATATCT 0.000111999232005 0.000300265924661 +CCAGCT 0.000422854243285 0.000335487734005 +AAAGAT 0.000882279664368 0.000497508056989 +TGTGAT 0.000372568873813 0.000438511526337 +TTATTG 9.14279444941e-05 0.000494866421288 +TTATTA 7.771375282e-05 0.000975644118838 +GAAAGC 0.000354283284915 0.000346054276808 +TTATTC 9.59993417188e-05 0.000394484264657 +ACATCT 0.000283426627932 0.000353098638677 +TTTTAT 0.000249141148746 0.00126622404593 +CTTACT 0.000118856327842 0.000157617596816 +AGACGC 0.000180570190376 0.000174347956255 +TACAAA 0.000434282736347 0.000496627511755 +AGACGA 0.000153141807028 0.000182272863357 +AAGTGA 3.42854791853e-05 0.000383917721853 +CCTCTG 0.000710852268442 0.000366306817181 +AGGTGG 6.62852597582e-05 0.000177870137189 +CCTCTC 0.000267426737645 0.000287057746156 +AGGTGC 5.48567666965e-05 0.000162020322984 +CCTCTA 0.000159998902865 0.000147051054013 +AGGTGA 1.59998902865e-05 0.000250074846345 +CTTACG 2.74283833482e-05 5.81159854182e-05 +TCCACA 0.000415997147448 0.000398886990825 +TTTTAG 1.14284930618e-05 0.000556504587641 +AGACGT 0.000109713533393 0.000175228501488 +CTTACC 0.000107427834781 0.000123276332705 +ACATCA 0.000283426627932 0.000556504587641 +CTTACA 0.000187427286213 0.000232463941673 +ACATCC 0.000198855779275 0.000274730112886 +AGGTGT 0.000100570738944 0.000324040645968 +CCTCTT 0.000249141148746 0.000286177200923 +GCGTGA 6.85709583706e-06 0.000114470880369 +CCCGGC 0.000175998793151 6.95630734551e-05 +CCCGGA 0.000178284491763 7.48463448567e-05 +CCCGGG 9.82850403312e-05 4.0505080746e-05 +TATGAA 0.000479996708594 0.00044907806914 +TGGAAT 0.000210284272336 0.000288818836624 +TAGGTG 2.28569861235e-06 0.000124156877939 +TTGTGC 0.000105142136168 0.000312593557931 +TATGAT 0.000468568215532 0.000301146469894 +TTGTGG 0.000141713313966 0.000301146469894 +CCCGGT 0.000127999122292 5.81159854182e-05 +GGCGTG 0.000221712765398 9.15767042953e-05 +TAGGTA 0.0 8.62934328936e-05 +GGTTTT 0.000262855340421 0.000431467164468 +TATCAC 0.000198855779275 0.000187556134759 +TGGAAA 0.00037942596965 0.000586443125583 +TGAGGA 1.14284930618e-05 0.000405931352694 +AAGAGG 0.00065142410452 0.000349576457743 +TGAGGC 0.0 0.000217494672701 +AAGAGA 0.000653709803133 0.000472852790448 +AAGAGC 0.000653709803133 0.000312593557931 +AAGCGT 0.000228569861235 0.000105665428033 +GCTTAG 9.14279444941e-06 0.000102143247099 +GCTTAC 0.000219427066786 0.000110068154201 +GCTTAA 1.82855888988e-05 0.000207808675132 +TGATTC 0.0 0.000289699381857 +AAGCGC 0.000294855120993 0.000109187608967 +AAGAGT 0.000438854133572 0.000283535565222 +AAGCGA 0.000169141697314 0.00013912614691 +AAGCGG 0.000198855779275 0.000111829244668 +GCTTAT 0.000198855779275 0.000198122677562 +GTTCGG 8.22851500447e-05 6.78019829879e-05 +CTGCTA 0.000217141368173 0.000176989591955 +GCGTGT 8.91422458817e-05 0.000120634697004 +CTGCTC 0.000809137308773 0.000397125900358 +ACTTCT 0.000146284711191 0.00027825229382 +GACCGT 0.000173713094539 5.89965306518e-05 +CTGCTG 0.00202512897054 0.000695630734551 +CGGTCA 5.71424653088e-05 0.000104784882799 +GGTCGT 0.000162284601477 5.28327140165e-05 +CGGTCC 5.02853694718e-05 5.45938044837e-05 +CTAGAA 0.000205712875112 0.000149692689713 +CGGTCG 2.05712875112e-05 4.66688973813e-05 +ACTTCG 3.65711777976e-05 6.5160347287e-05 +GACCGC 0.000283426627932 6.78019829879e-05 +TTTTGG 0.000127999122292 0.000504552418858 +ACTTCC 0.000137141916741 0.000198122677562 +CTGCTT 0.000393140161325 0.000469330609513 +ACTTCA 0.000205712875112 0.000396245355124 +CTAGAT 0.000189712984825 0.000156737051582 +CACAAA 0.000454854023858 0.000675378194178 +CGGTCT 4.79996708594e-05 7.22047091559e-05 +GGTCGC 0.000111999232005 6.60408925206e-05 +GGTCGA 0.000134856218129 5.72354401846e-05 +CGTCGC 7.54280542076e-05 6.0757621119e-05 +CAGCAT 0.000402282955774 0.000450839159608 +TGAAGC 4.5713972247e-06 0.000411214624095 +CGTCGG 4.79996708594e-05 5.28327140165e-05 +TGGTTG 0.000127999122292 0.000232463941673 +GCGTGC 6.17138625335e-05 0.000104784882799 +TGCTCA 0.000191998683438 0.000339890460173 +TTCATC 0.000788566021262 0.000420020076431 +TGCTCC 0.00023314125846 0.000251835936812 +TTCATA 0.000187427286213 0.00039536480989 +TTCATG 0.000511996489167 0.000392723174189 +TGCTCG 9.37136431064e-05 0.000115351425603 +GTACAA 8.68565472694e-05 0.0002122114013 +CAGCAG 0.00174855943845 0.000679780920346 +TACACT 0.000358854682139 0.000354859729144 +GCTTTG 0.00037942596965 0.000315235193632 +CAGCAC 0.000575996050313 0.00037335117905 +CAGCAA 0.000431997037735 0.000429706074001 +GGGAGT 0.000118856327842 0.000146170508779 +GTACAC 8.91422458817e-05 0.00016113977775 +TGATTT 0.0 0.000675378194178 +TTCATT 0.000461711119695 0.000729971998662 +GTAGCC 0.000132570519516 8.45323424264e-05 +GCTGCC 0.000619424323947 0.000228941760738 +GCTGCA 0.000628567118397 0.000417378440731 +AACTCT 0.000498282297493 0.000348695912509 +GCTGCG 0.000265141039033 0.000165542503918 +TGGCCA 0.000114284930618 0.000266805205783 +GTAGCA 0.000157713204252 0.000146170508779 +TCATCT 0.000306283614055 0.000454361340542 +TGCTCT 0.000253712545971 0.000390081538489 +AACTCA 0.000294855120993 0.000340771005407 +TGGCCT 0.000157713204252 0.000181392318123 +AACTCC 0.000420568544673 0.000191078315693 +GCTGCT 0.0007771375282 0.000456122431009 +AACTCG 0.000146284711191 0.000109187608967 +CTACAA 0.000141713313966 0.000257999753447 +GGATCC 0.000262855340421 0.000169945230086 +TGAAGA 2.28569861235e-06 0.000656006199038 +AATATT 0.000253712545971 0.000831234700527 +GTTCGA 0.000111999232005 7.6607435324e-05 +CAACCT 0.000187427286213 0.000204286494197 +TGGCCG 6.17138625335e-05 8.541288766e-05 +GGCACG 0.000139427615353 6.5160347287e-05 +GGAGGA 0.00104913566307 0.000532729866333 +GGCACA 0.00042742564051 0.000218375217935 +GACAAT 0.000477711009982 0.000251835936812 +GGCACC 0.000372568873813 9.06961590617e-05 +CAACCG 8.91422458817e-05 8.27712519592e-05 +ACGACC 5.02853694718e-05 6.5160347287e-05 +AATATG 0.000303997915443 0.000457002976243 +CAACCC 0.000121142026455 0.000156737051582 +TTAGGA 0.000143999012578 0.000216614127468 +AATATC 0.000221712765398 0.000344293186341 +GACAAA 0.000948564924126 0.00046845006428 +GGCACT 0.000372568873813 0.00012944014934 +GACAAC 0.000783994624037 0.000267685751017 +ACGACA 5.02853694718e-05 0.00012944014934 +GACAAG 0.000934850732452 0.000227180670271 +TGGCCC 8.91422458817e-05 0.000113590335136 +GCAATC 0.000173713094539 0.000159378687283 +GCAATA 0.000107427834781 0.000220136308402 +GAATTT 0.000205712875112 0.000457883521476 +TTGAAC 0.000226284162623 0.000316996284099 +CGATGG 6.62852597582e-05 0.000100382156631 +GTTCCC 0.000249141148746 0.000130320694574 +CGATGC 5.02853694718e-05 8.98156138281e-05 +GTAAAT 0.00015542750564 0.000499269147456 +CGATGA 2.28569861235e-06 0.000136484511209 +GAATTA 0.000148570409803 0.000327562826902 +GAATTC 0.000230855559848 0.00018051177289 +GAATTG 0.000173713094539 0.000238627758308 +GTAAAC 0.000223998464011 0.000340771005407 +CGATGT 6.17138625335e-05 9.6859975697e-05 +GTAAAA 0.000212569970949 0.000631350932497 +GTAAAG 0.000301712216831 0.000277371748587 +TCCCAG 0.000329140600179 0.000235105577374 +GGATTT 0.000356568983527 0.000390962083722 +GTGCGT 0.000214855669561 0.000156737051582 +ACGCCG 0.000123427725067 6.0757621119e-05 +GGATTC 0.000313140709892 0.000178750682423 +GGATTA 7.99994514323e-05 0.00022453903457 +GGATTG 0.000185141587601 0.000206928129898 +AGCGGC 0.000303997915443 0.00014881214448 +ACGCCA 0.000127999122292 0.0001074265185 +AAAGCC 0.000719995062891 0.000307310286529 +TCTGTG 0.000706280871217 0.00051247732596 +CTATCC 8.22851500447e-05 7.92490710248e-05 +GTCCCG 0.000169141697314 9.06961590617e-05 +TACAAT 0.000326854901566 0.000339890460173 +AACCGG 0.000157713204252 7.39657996231e-05 +CTCCTT 0.000130284820904 0.000255358117747 +GGCGGA 0.000203427176499 0.000100382156631 +AACCGC 0.000303997915443 9.24572495289e-05 +AACCGA 0.000173713094539 0.000111829244668 +GTGGGA 0.000443425530796 0.000163781413451 +TACCCA 0.000239998354297 0.00011711251607 +TCTGTT 0.00031085501128 0.000510716235493 +AAAGCG 0.000262855340421 0.000190197770459 +CTCCTG 0.000653709803133 0.00036366518148 +AACCGT 0.00015542750564 0.000106545973267 +CTCCTA 7.771375282e-05 0.000114470880369 +CTCCTC 0.000367997476589 0.000420020076431 +GGCGGT 0.000178284491763 7.74879805576e-05 +AACAAG 0.000781708925425 0.000367187362415 +CCCACG 0.000105142136168 6.60408925206e-05 +AACAAC 0.000941707828289 0.000507194054559 +AACAAA 0.000799994514323 0.000912244862019 +TACTTC 0.000470853914145 0.000195481041861 +AGCCTT 0.000210284272336 0.000206047584664 +CCGATT 5.48567666965e-05 8.71739781273e-05 +CCCACT 0.000306283614055 0.000238627758308 +AACAAT 0.000386283065488 0.000469330609513 +GACGCT 0.000319997805729 0.000168184139619 +AGCGGT 0.000127999122292 0.000105665428033 +CCGATA 3.42854791853e-05 4.4907806914e-05 +AGCCTG 0.000415997147448 0.000267685751017 +AGCCTA 8.68565472694e-05 0.000150573234947 +AGCCTC 0.000301712216831 0.000195481041861 +GTACCC 8.91422458817e-05 4.75494426149e-05 +CTCCCG 0.000100570738944 7.92490710248e-05 +TAGGCT 0.0 0.000188436679992 +ACGAAT 4.5713972247e-05 9.6859975697e-05 +CGCCTT 9.37136431064e-05 6.5160347287e-05 +AGGCAT 8.4570848657e-05 0.000173467411021 +CAGAGT 0.000399997257162 0.000281774474755 +GCATCT 0.000335997696016 0.000264163570083 +CTCCCA 0.000150856108415 0.000184033953824 +ACGAAA 0.000102856437556 0.000125917968406 +CGCCTC 0.000130284820904 0.000118873606537 +AGGCAA 0.000137141916741 0.000219255763169 +CGCCTA 2.05712875112e-05 3.08190831763e-05 +AGGCAG 0.000313140709892 0.000251835936812 +CGCCTG 0.000237712655685 0.000124156877939 +ACGAAG 0.000111999232005 0.000127679058873 +CAGAGG 0.000591995940599 0.000353098638677 +GCATCC 0.000217141368173 0.000173467411021 +GCATCA 0.000255998244583 0.000302027015128 +CAGAGC 0.000617138625335 0.000418258985964 +CGGCCA 0.000111999232005 9.59794304633e-05 +CAGAGA 0.000642281310071 0.000528327140165 +CGGCAC 9.82850403312e-05 8.01296162584e-05 +CCTAGA 9.82850403312e-05 8.98156138281e-05 +CCTAGC 0.000105142136168 4.40272616804e-05 +TTTGCT 0.000521139283616 0.000520402233063 +TAGCTC 2.28569861235e-06 0.000132962330275 +TGCGGT 6.62852597582e-05 0.000101262701865 +CCTAGG 5.02853694718e-05 2.90579927091e-05 +TAAGCA 2.28569861235e-06 0.000228061215505 +GTCTAG 6.85709583706e-06 9.77405209306e-05 +TGCGGC 0.000141713313966 9.86210661642e-05 +TGCGGA 0.000114284930618 9.86210661642e-05 +CCTAGT 0.000100570738944 6.95630734551e-05 +TGCGGG 0.000100570738944 9.42183399961e-05 +TAAGCG 0.0 6.69214377543e-05 +TGAGTT 2.28569861235e-06 0.000350457002976 +TAGCTG 0.0 0.000235105577374 +ATCGCT 0.000383997366875 9.86210661642e-05 +TCTCGT 0.000109713533393 0.000113590335136 +ATCGCC 0.00041828284606 6.69214377543e-05 +TGAGTG 2.28569861235e-06 0.000330204462603 +ATCGCA 0.000210284272336 0.0001074265185 +TGAGTA 0.0 0.000208689220365 +ATCGCG 9.37136431064e-05 5.19521687829e-05 +TGAGTC 2.28569861235e-06 0.000230702851205 +AAGGTG 0.000820565801835 0.000262402479615 +ACGGCT 0.000226284162623 9.95016113978e-05 +CATCAT 0.000201141477887 0.000611098392124 +AAGGTA 0.000139427615353 0.000204286494197 +TTTATT 0.000262855340421 0.0015955479633 +ACGGCA 0.000164570300089 0.000112709789902 +CATCAC 0.000226284162623 0.000357501364845 +ACGGCC 0.000153141807028 8.1010161492e-05 +CATCAA 0.000150856108415 0.000389200993255 +CATCAG 0.000354283284915 0.000410334078862 +ACGGCG 9.82850403312e-05 7.6607435324e-05 +TCACCT 0.000214855669561 0.000294982653259 +TCTAAG 0.000185141587601 0.000104784882799 +TCGTTT 9.82850403312e-05 0.000183153408591 +CGGAAT 6.39995611459e-05 6.42798020534e-05 +CTCATG 0.000546281968352 0.000240388848775 +TCACCG 0.000127999122292 0.000103023792332 +TCACCA 0.000207998573724 0.000327562826902 +TCACCC 0.00019428438205 0.000182272863357 +CGGAAG 0.000175998793151 9.15767042953e-05 +AATATA 0.000127999122292 0.000582920944649 +CGGCCT 0.000127999122292 9.6859975697e-05 +CGGAAC 7.771375282e-05 3.87439902788e-05 +CGGAAA 0.000214855669561 0.000128559604107 +TAGCTT 0.0 0.000189317225226 +ATGGCG 0.000354283284915 0.000134723420742 +TGTGTA 0.000111999232005 0.000427944983534 +ATGGCC 0.000623995721172 0.000157617596816 +ATGGCA 0.000548567666965 0.000286177200923 +TCCAAT 0.000260569641808 0.000200764313263 +ATGGCT 0.000687995282318 0.000254477572513 +TGTGTT 0.000235426957072 0.000792490710248 +TTGTCG 7.99994514323e-05 0.000102143247099 +GAACAT 0.000226284162623 0.000388320448021 +AGATTA 6.39995611459e-05 0.000324921191202 +AGATTC 0.000157713204252 0.000258880298681 +GAAACA 0.000390854462712 0.000582920944649 +AGATTG 0.000105142136168 0.000308190831763 +CTGAGT 0.000381711668263 0.000240388848775 +GAAACT 0.00034971188769 0.000350457002976 +AGAATG 0.000249141148746 0.000365426271948 +AGAATA 0.000143999012578 0.000337248824472 +AGAATC 0.000198855779275 0.000221897398869 +CTGAGC 0.000610281529498 0.00033196555307 +CTGAGA 0.000411425750223 0.000386559357554 +CTGAGG 0.000525710680841 0.000343412641107 +CATACC 6.17138625335e-05 0.000125037423172 +CATACA 8.68565472694e-05 0.000399767536058 +CATACG 3.42854791853e-05 7.30852543895e-05 +TCAAAT 0.000212569970949 0.000525685504464 +GTCCAT 0.000121142026455 0.0002122114013 +ACATAG 6.85709583706e-06 0.000106545973267 +CATACT 5.94281639212e-05 0.00018051177289 +GTCCAG 0.000459425421083 0.000272969022419 +TCAAAG 0.000313140709892 0.000404170262226 +CACGAG 0.000326854901566 0.000131201239808 +GTCCAC 0.000219427066786 0.000194600496628 +GCAATG 0.000290283723769 0.00024391102971 +CGGCGG 0.00011657062923 7.74879805576e-05 +TTGATG 0.000221712765398 0.000432347709702 +CTCATT 0.000429711339122 0.000425303347833 +CGGCGC 7.99994514323e-05 7.30852543895e-05 +TTGATC 0.000180570190376 0.00022453903457 +CGGCGA 6.85709583706e-05 5.6354894951e-05 +TCCAAG 0.000443425530796 0.000147931599246 +CTATCT 7.99994514323e-05 0.000171706320554 +TTGATT 0.000185141587601 0.000540654773436 +CGGCGT 9.14279444941e-05 7.04436186887e-05 +TATAAG 0.00023314125846 0.000181392318123 +TGACTT 6.85709583706e-06 0.000396245355124 +TCACAG 0.000381711668263 0.000415617350263 +CGGTTA 5.94281639212e-05 8.27712519592e-05 +GGTTTA 0.000166855998702 0.000269446841484 +CGGTTC 7.771375282e-05 7.04436186887e-05 +GCTGTT 0.00046628251692 0.000475494426149 +TTTACA 0.000187427286213 0.000774879805576 +CGGTTG 4.79996708594e-05 8.1010161492e-05 +GGTTTG 0.000370283175201 0.000318757374566 +TATAAA 0.000324569202954 0.000668333832309 +CGTACG 1.14284930618e-05 2.11330856066e-05 +GCTGTG 0.000969136211637 0.000420900621665 +GCTGTC 0.000365711777976 0.000285296655689 +CGGTTT 0.000123427725067 0.000138245601677 +GCTGTA 0.000326854901566 0.000336368279239 +GGGAGG 0.000105142136168 0.000228941760738 +TCATAG 1.14284930618e-05 0.00018051177289 +TATGCG 7.54280542076e-05 6.86825282215e-05 +TCTCAA 0.000258283943196 0.000283535565222 +AGGTTG 0.000100570738944 0.00018051177289 +GCATTT 0.000265141039033 0.000614620573059 +AGGTTC 0.000118856327842 0.000124156877939 +AGGTTA 5.48567666965e-05 0.000177870137189 +ACTCCT 0.000367997476589 0.000223658489337 +TTTTTC 0.000306283614055 0.000870859236039 +GAGGGC 0.000653709803133 0.000195481041861 +TTTTTA 0.000102856437556 0.00143528873078 +TCCTCT 0.000406854352999 0.000405931352694 +TTTTGT 0.000148570409803 0.000998538294912 +GAGGGG 0.00031085501128 0.000194600496628 +TTAAAT 0.000185141587601 0.000975644118838 +AGGTTT 0.000239998354297 0.00033196555307 +ACATAT 0.000185141587601 0.000380395540919 +TTAAAC 0.000153141807028 0.000528327140165 +TTAAAA 0.000217141368173 0.00132874275752 +ACTCCG 0.000109713533393 7.83685257912e-05 +TTAAAG 0.000244569751522 0.000670094922776 +ACTCCA 0.00037942596965 0.000309071376997 +TTTTTT 0.000221712765398 0.00264779951746 +ACTCCC 0.000141713313966 0.000133842875509 +CTGTAA 3.19997805729e-05 0.000482538788018 +GTCACG 0.000207998573724 0.000103023792332 +CTGTAC 0.000484568105819 0.000228061215505 +GCAACT 0.000169141697314 0.000169945230086 +GCAATT 0.000123427725067 0.000242149939242 +CTGTAG 2.97140819606e-05 0.000257999753447 +GTCACA 0.000386283065488 0.000300265924661 +GCAACG 6.85709583706e-05 8.541288766e-05 +GTCACT 0.000370283175201 0.000243030484476 +TAGGCG 0.0 3.96245355124e-05 +GCAACC 0.000166855998702 0.000121515242238 +CTGTAT 0.0003885687641 0.000331085007837 +GCAACA 0.000297140819606 0.000300265924661 +TAGGCC 2.28569861235e-06 8.98156138281e-05 +TGGTTC 0.000235426957072 0.000199003222796 +ATTCAT 0.00027199813487 0.000529207685399 +TCCCAC 0.000157713204252 0.00024655266541 +GCGATT 9.37136431064e-05 0.000122395787472 +TCGTGT 7.08566569829e-05 0.000109187608967 +TGAATT 0.0 0.000539774228202 +GTCTCT 0.000255998244583 0.000276491203353 +TCCGTG 0.000109713533393 8.45323424264e-05 +ATTCAG 0.000591995940599 0.000402409171759 +ATTCAA 0.000246855450134 0.000417378440731 +ATTCAC 0.000297140819606 0.000394484264657 +GTCTCC 0.000313140709892 0.000190197770459 +GTCTCA 0.000150856108415 0.000191078315693 +GCGATG 0.000182855888988 9.77405209306e-05 +GTCTCG 8.91422458817e-05 8.18907067256e-05 +GCGATA 4.34282736347e-05 7.30852543895e-05 +TCGTGC 3.885687641e-05 8.1010161492e-05 +GCGATC 7.08566569829e-05 7.30852543895e-05 +CGATTG 7.99994514323e-05 7.6607435324e-05 +TCTACA 0.000207998573724 0.000301146469894 +GAAGGA 0.000756566240689 0.000346054276808 +CGATTC 0.000130284820904 0.000109187608967 +CGATTA 4.11425750223e-05 0.000115351425603 +CAATCT 9.37136431064e-05 0.000214853037001 +GGAACA 0.000342854791853 0.00024655266541 +GGAACG 0.000139427615353 8.541288766e-05 +TGACGA 0.0 0.000162020322984 +TACTGC 0.000326854901566 0.000205167039431 +CGATTT 8.22851500447e-05 0.000152334325414 +GAAGGT 0.000395425859937 0.000235105577374 +CTCGCT 0.000189712984825 0.000146170508779 +CAATCG 3.42854791853e-05 6.33992568198e-05 +CAATCA 0.000130284820904 0.000368948452882 +CAATCC 0.000105142136168 0.000200764313263 +TCCTGA 3.19997805729e-05 0.000357501364845 +AGTCCA 0.000299426518218 0.000259760843915 +TCCTGC 0.000258283943196 0.000349576457743 +AGTCCC 0.000205712875112 0.000137365056443 +ACTAAA 0.000399997257162 0.000411214624095 +TCCTGG 0.00027199813487 0.000249194301111 +ACTAAC 0.000162284601477 0.000168184139619 +TAACGC 2.28569861235e-06 8.18907067256e-05 +CAGCTG 0.00151084678276 0.000570593311378 +CAGCTA 0.00023314125846 0.000205167039431 +CAGCTC 0.000708566569829 0.000399767536058 +ACTAAT 0.000143999012578 0.000263283024849 +AGTCCT 0.000367997476589 0.000175228501488 +TCCTGT 0.000269712436258 0.000328443372136 +TAATGT 0.0 0.000526566049698 +CAGCTT 0.000498282297493 0.000371590088583 +GGTTAG 2.28569861235e-06 0.000122395787472 +TAACGT 2.28569861235e-06 0.000141767782611 +ACGGAC 0.000219427066786 9.77405209306e-05 +GTACTT 9.37136431064e-05 0.00017082577532 +CGACAT 5.94281639212e-05 9.77405209306e-05 +TTGTTC 0.000148570409803 0.000362784636247 +CCGAGG 5.94281639212e-05 8.36517971928e-05 +CCGAGA 5.48567666965e-05 0.00011711251607 +ACGGAA 7.54280542076e-05 8.89350685945e-05 +CCGAGC 0.000114284930618 0.000114470880369 +CCGCGT 4.34282736347e-05 4.66688973813e-05 +CGACAC 0.000150856108415 8.36517971928e-05 +CGACAA 7.54280542076e-05 0.000109187608967 +CGACAG 0.000210284272336 0.000125037423172 +GTACTA 2.97140819606e-05 8.45323424264e-05 +CCGCGC 6.39995611459e-05 6.78019829879e-05 +ACGGAG 0.000372568873813 0.000120634697004 +CCGCGA 3.42854791853e-05 4.4907806914e-05 +TACGCA 0.000130284820904 6.25187115862e-05 +CCGCGG 7.08566569829e-05 5.6354894951e-05 +TTGTTT 0.00019428438205 0.0011579169822 +GCATAT 0.000189712984825 0.000267685751017 +TCCGTT 8.68565472694e-05 0.000114470880369 +AGGAAC 0.0003885687641 0.000221897398869 +AATAAC 0.000297140819606 0.000417378440731 +AATAAA 0.000459425421083 0.00135515911452 +AATAAG 0.000409140051611 0.000378634450452 +TACTAA 1.37141916741e-05 0.000203405948964 +TGGAAG 0.000404568654386 0.000303788105595 +AATAAT 0.00027199813487 0.000773999260342 +ACGGTG 0.000324569202954 0.000124156877939 +CCAACC 0.000173713094539 0.000169945230086 +CCAACA 0.000317712107117 0.000309071376997 +TTATGC 6.39995611459e-05 0.000269446841484 +CCAACG 5.94281639212e-05 7.04436186887e-05 +GGGATT 0.000242284052909 0.000183153408591 +CGCCGC 0.000169141697314 0.000108307063734 +AGTAAA 0.000436568434959 0.00052216332353 +CGCCGA 3.885687641e-05 6.42798020534e-05 +AGTAAC 0.000221712765398 0.000155856506349 +CGCCGG 7.771375282e-05 7.6607435324e-05 +GAGTAA 3.65711777976e-05 0.000198122677562 +AGTAAG 0.000207998573724 0.000182272863357 +CTCAGG 0.000258283943196 0.000179631227656 +CTCAGC 0.000575996050313 0.000303788105595 +CCAACT 0.000159998902865 0.000183153408591 +GAGTAT 0.000413711448836 0.000144409418312 +GCTTCA 0.000292569422381 0.000354859729144 +GTGTAG 1.14284930618e-05 0.000199883768029 +TCGAGC 7.54280542076e-05 8.18907067256e-05 +AGTAAT 0.000228569861235 0.000338129369706 +GTGTAC 0.000347426189078 0.000133842875509 +CGCCGT 8.91422458817e-05 5.28327140165e-05 +GTGGCA 0.000431997037735 0.000182272863357 +CATATT 0.000123427725067 0.000434108800169 +CTCGGA 0.000162284601477 9.06961590617e-05 +CTCGGC 0.000246855450134 9.24572495289e-05 +GCCGCC 0.000292569422381 9.6859975697e-05 +GCCGCA 0.00015542750564 0.000114470880369 +GCCGCG 9.82850403312e-05 6.5160347287e-05 +TATTGA 1.14284930618e-05 0.000339009914939 +ACACTT 0.000214855669561 0.000416497895497 +CATATG 0.000127999122292 0.000282655019988 +TCGCTC 0.000171427395926 0.000128559604107 +CATATC 0.000121142026455 0.000199003222796 +CATATA 5.71424653088e-05 0.000270327386718 +GTCAGC 0.000397711558549 0.000226300125037 +TACTAC 0.000363426079364 0.000152334325414 +ACACTC 0.000297140819606 0.000343412641107 +ACACTA 0.000146284711191 0.000282655019988 +ACACTG 0.000518853585004 0.000539774228202 +GCCGCT 0.000258283943196 0.000135603965976 +ACCGGA 0.000219427066786 8.62934328936e-05 +AAATAA 5.02853694718e-05 0.00133842875509 +ACCGGC 0.000219427066786 8.36517971928e-05 +AAATAC 0.000489139503043 0.00044643643344 +CTTTAA 2.28569861235e-05 0.000656886744272 +ACCGGG 0.000121142026455 5.72354401846e-05 +AAATAG 2.74283833482e-05 0.000382156631386 +AACTTA 6.62852597582e-05 0.000303788105595 +CAGGTA 0.000118856327842 0.00016113977775 +AACTTC 0.00069028098093 0.000254477572513 +CAGGTC 0.000338283394628 0.000201644858496 +CACGAC 0.000178284491763 7.83685257912e-05 +CACGAA 0.000107427834781 9.06961590617e-05 +CAGGTG 0.000934850732452 0.000285296655689 +CTTTAT 0.000207998573724 0.000515999506895 +AATCCC 0.000219427066786 0.000187556134759 +AATCCA 0.000299426518218 0.000340771005407 +AAATAT 0.000395425859937 0.000987091206875 +AATCCG 8.91422458817e-05 9.59794304633e-05 +CACGAT 9.14279444941e-05 0.000103904337566 +CAGGTT 0.000347426189078 0.000257999753447 +AACTTT 0.000441139832184 0.00048077769755 +TCTGTC 0.000340569093241 0.000411214624095 +GGGATA 9.82850403312e-05 0.000117993061304 +GGGATC 0.000221712765398 0.000115351425603 +GGGTGT 7.771375282e-05 0.000174347956255 +GGAGAT 0.000822851500447 0.000293221562792 +GATGCT 0.000715423665666 0.00029762428896 +CGCAGT 0.000230855559848 0.000103023792332 +GAGTCG 0.000196570080662 7.22047091559e-05 +GAGCCT 0.000518853585004 0.00015849814205 +GGAGAC 0.000886851061593 0.000209569765599 +GGCGGC 0.000237712655685 7.74879805576e-05 +GGAGAA 0.000905136650492 0.000390962083722 +GGGTGC 4.79996708594e-05 9.33377947625e-05 +GGAGAG 0.00124799144234 0.000366306817181 +TGGAAC 0.000274283833482 0.000217494672701 +GGGTGG 4.5713972247e-05 0.000162020322984 +GAGCCA 0.000390854462712 0.000249194301111 +CGCAGG 0.000201141477887 9.77405209306e-05 +GATGCG 0.000235426957072 0.000111829244668 +CGCAGA 0.000269712436258 0.000176989591955 +GATGCA 0.000564567557251 0.000315235193632 +CGCAGC 0.000297140819606 0.00017082577532 +GATGCC 0.000623995721172 0.000169064684853 +CGGATT 0.000102856437556 8.62934328936e-05 +GGTAGT 8.22851500447e-05 9.95016113978e-05 +ACCCCT 0.000265141039033 0.000144409418312 +GCCAAT 0.000454854023858 0.000191078315693 +GGCGGG 0.000118856327842 8.89350685945e-05 +AGAAGT 0.000146284711191 0.00027561065812 +ACCCCA 0.000201141477887 0.000169945230086 +GGTAGA 9.59993417188e-05 9.42183399961e-05 +ACCCCC 0.000130284820904 0.000102143247099 +CGGATG 0.000182855888988 0.000125037423172 +GCCAAG 0.000802280212936 0.000136484511209 +CGGATA 8.68565472694e-05 6.78019829879e-05 +ACCCCG 0.000118856327842 6.86825282215e-05 +CGGATC 0.000137141916741 0.000102143247099 +AGAAGA 0.000297140819606 0.000578518218481 +AGAAGC 0.000189712984825 0.000303788105595 +GCAAGA 0.000180570190376 0.000233344486906 +AGAAGG 0.000201141477887 0.00030995192223 +CACTCC 0.000205712875112 0.000254477572513 +CACTCA 0.000159998902865 0.000332846098304 +CACTCG 0.000137141916741 0.000110948699435 +TGCGTT 6.39995611459e-05 0.000188436679992 +TCAGCT 0.00034971188769 0.000399767536058 +AGCTTT 0.000315426408505 0.000412975714562 +CCCCGG 4.11425750223e-05 4.4907806914e-05 +TGCGTA 3.885687641e-05 8.98156138281e-05 +CCCCGC 8.91422458817e-05 8.18907067256e-05 +CACTCT 0.000249141148746 0.00027561065812 +AGCTTC 0.000420568544673 0.000245672120177 +AGCTTA 6.39995611459e-05 0.000203405948964 +AGCTTG 0.000198855779275 0.000241269394009 +TTAGCT 0.000102856437556 0.000254477572513 +TTAGCC 0.000123427725067 0.000156737051582 +GCACTG 0.000523424982229 0.000316115738866 +TTAGCA 0.000143999012578 0.000262402479615 +GACCAT 0.000239998354297 0.000149692689713 +TTAGCG 3.42854791853e-05 8.98156138281e-05 +TACCGG 0.000139427615353 5.37132592501e-05 +TCATAT 0.000143999012578 0.000332846098304 +ATAGGT 6.62852597582e-05 0.000106545973267 +ATAAGG 4.79996708594e-05 0.000169064684853 +TGTGCT 0.000319997805729 0.000408572988394 +AAGGGC 0.00042742564051 0.000176109046722 +ATAAGC 0.000114284930618 0.00018051177289 +AAGGGA 0.00042742564051 0.000239508303542 +ATAAGA 9.37136431064e-05 0.000276491203353 +CCAGAC 0.000589710241987 0.000264163570083 +TCTGTA 0.000180570190376 0.000355740274378 +CCAGAA 0.000591995940599 0.000361904091013 +CCAGAG 0.000941707828289 0.000315235193632 +GCAAGT 0.000148570409803 0.000196361587095 +TGTGCG 0.000125713423679 0.00020252540373 +ATAAGT 6.85709583706e-05 0.00023422503214 +ATAGGG 4.5713972247e-05 9.33377947625e-05 +TGTGCC 0.000292569422381 0.000181392318123 +ATAGGA 0.000132570519516 0.000127679058873 +TGTGCA 0.000217141368173 0.000433228254935 +ATAGGC 7.54280542076e-05 0.000106545973267 +GCTCGC 0.000171427395926 0.00011711251607 +TAAGTT 2.28569861235e-06 0.000306429741296 +TTTGCG 0.000150856108415 0.000102143247099 +CCAGAT 0.000516567886392 0.000240388848775 +AGACAA 0.000265141039033 0.000410334078862 +GACGCA 0.000235426957072 0.000163781413451 +AGACAC 0.000251426847359 0.000352218093443 +TCCATT 0.000319997805729 0.000316115738866 +AGACAG 0.000571424653088 0.000381276086153 +GCTTTT 0.000372568873813 0.000574115492313 +TTTTCT 0.000242284052909 0.000924572495289 +CGCTGT 0.000290283723769 0.000213972491767 +GACGCC 0.000342854791853 0.000110948699435 +TCAAGA 0.00011657062923 0.000315235193632 +CGTGGG 8.4570848657e-05 6.5160347287e-05 +CTATAG 9.14279444941e-06 0.000155856506349 +AGACAT 0.000191998683438 0.000348695912509 +CGTGGA 0.000205712875112 0.000115351425603 +TTTTCG 7.54280542076e-05 0.000145289963545 +CCCACA 0.000399997257162 0.000222777944103 +CGCTGC 0.000303997915443 0.000213972491767 +TTTTCC 0.000196570080662 0.0005318493211 +TTTTCA 0.000205712875112 0.000830354155293 +CGCTGG 0.000290283723769 0.000153214870648 +TATGGA 0.000402282955774 0.000236866667841 +TATGGC 0.000292569422381 0.000156737051582 +CGTAGG 3.19997805729e-05 3.96245355124e-05 +CCCACC 0.000354283284915 0.000125037423172 +TATGGG 0.000175998793151 0.000153214870648 +CGTAGC 4.11425750223e-05 5.37132592501e-05 +TACCGT 0.000137141916741 6.33992568198e-05 +TGGACT 0.00019428438205 0.00033196555307 +CGTAGT 5.02853694718e-05 5.81159854182e-05 +GCTCGA 0.000210284272336 9.77405209306e-05 +TATGGT 0.000207998573724 0.000167303594386 +TGGACA 0.000210284272336 0.000330204462603 +TGGACC 0.00019428438205 0.000127679058873 +TGGACG 0.000134856218129 0.000163781413451 +CCGGTT 0.000123427725067 8.1010161492e-05 +GACCAA 0.00027199813487 0.000203405948964 +GCTTGA 2.05712875112e-05 0.000226300125037 +TGAGAC 0.0 0.000238627758308 +GCTTGC 9.82850403312e-05 0.000149692689713 +TGAGAA 4.5713972247e-06 0.00038303717662 +TGAGAG 6.85709583706e-06 0.000394484264657 +GCTTGG 0.000118856327842 0.000165542503918 +CCTGTT 0.000425139941898 0.000325801736435 +CCGGTG 0.000315426408505 9.06961590617e-05 +CCGGTC 0.000127999122292 7.04436186887e-05 +CCGGTA 3.885687641e-05 4.75494426149e-05 +CCTGTC 0.000306283614055 0.000226300125037 +CCTGTA 0.000150856108415 0.000218375217935 +CCTGTG 0.000557710461414 0.00027825229382 +GCTTGT 0.000121142026455 0.000300265924661 +TGAGAT 4.5713972247e-06 0.000353098638677 +TCCGAA 9.59993417188e-05 9.42183399961e-05 +GGTTGT 0.000100570738944 0.000227180670271 +TCCTCA 0.000358854682139 0.000365426271948 +TATCAG 0.000452568325246 0.000300265924661 +TATTGC 7.54280542076e-05 0.000237747213074 +TAGCGT 0.0 6.25187115862e-05 +GGTTGG 0.000105142136168 0.000157617596816 +GGTTGA 1.37141916741e-05 0.000163781413451 +TTCACG 0.000146284711191 0.000128559604107 +GGTTGC 0.000109713533393 0.000123276332705 +GGAAAC 0.000605710132273 0.000360143000546 +TCCCGC 0.000191998683438 0.000104784882799 +GTAACG 3.885687641e-05 0.000102143247099 +CAGCGT 0.000217141368173 0.000150573234947 +GTAACC 0.000100570738944 0.000125037423172 +CGTTTT 0.000105142136168 0.00028793829139 +ATTCTC 0.000249141148746 0.000277371748587 +ATTCTA 0.000130284820904 0.000184914499058 +ATTCTG 0.00062171002256 0.000420020076431 +TCATTT 0.00023314125846 0.000803937798285 +CAGCGA 0.000187427286213 0.000157617596816 +CAGCGC 0.000345140490465 0.00027825229382 +TGCTAG 2.28569861235e-06 0.000128559604107 +CAGCGG 0.000294855120993 0.000203405948964 +CCTTGT 9.59993417188e-05 0.000184914499058 +TCATTC 0.000178284491763 0.000356620819612 +CGTTTG 0.000162284601477 0.000206928129898 +CGTTTA 5.71424653088e-05 0.000181392318123 +ATTCTT 0.000228569861235 0.000327562826902 +CGTTTC 0.000164570300089 0.000174347956255 +TGGCAA 9.37136431064e-05 0.00022453903457 +CCGATC 0.00011657062923 5.54743497173e-05 +TGGCAG 0.000356568983527 0.000290579927091 +GCTGAG 0.000959993417188 0.000343412641107 +GCTGAA 0.000825137199059 0.000537132592501 +GCTGAC 0.000537139173903 0.000255358117747 +TGGCAT 0.000150856108415 0.000251835936812 +GCTGAT 0.000644567008683 0.000358381910079 +CCGATG 0.000141713313966 8.18907067256e-05 +GGCCGC 0.000242284052909 8.45323424264e-05 +GAACTT 0.000313140709892 0.000200764313263 +GGCCGA 0.000191998683438 7.83685257912e-05 +GGCAAT 0.000276569532095 0.000143528873078 +CTGATT 0.000452568325246 0.00041473680503 +CCATCG 9.37136431064e-05 7.74879805576e-05 +CTAAAT 0.000150856108415 0.00035133754821 +CCATCC 0.000239998354297 0.000156737051582 +CCATCA 0.000297140819606 0.000350457002976 +CTGATC 0.000642281310071 0.000277371748587 +CTGATA 0.000178284491763 0.000268566296251 +GAGTTT 0.00103313577278 0.00040505080746 +CTGATG 0.000847994185183 0.000474613880915 +GGCAAA 0.000635424214234 0.000272969022419 +CTAAAG 0.000306283614055 0.000281774474755 +CCATCT 0.000335997696016 0.000282655019988 +CTAAAC 0.000201141477887 0.000288818836624 +CTAAAA 0.00023314125846 0.000513357871194 +GACACC 0.00062171002256 0.000143528873078 +GACACA 0.000596567337824 0.000402409171759 +GACACG 0.000292569422381 0.000108307063734 +GCGCCT 0.000100570738944 8.18907067256e-05 +TTCACC 0.000642281310071 0.000294982653259 +TAGCGA 0.0 5.54743497173e-05 +TACGAG 0.000413711448836 5.72354401846e-05 +TACGAC 0.00034971188769 4.57883521476e-05 +GACACT 0.000594281639212 0.000236866667841 +GGCGAT 0.000148570409803 7.57268900903e-05 +TGTTAC 0.000153141807028 0.000238627758308 +ACGCTG 0.000356568983527 0.000181392318123 +ACGCTA 6.17138625335e-05 6.86825282215e-05 +ACGCTC 0.000201141477887 0.000112709789902 +TCGATG 0.000105142136168 0.000100382156631 +TCGAGG 3.885687641e-05 7.48463448567e-05 +TCGATC 6.85709583706e-05 5.6354894951e-05 +ACGCTT 0.000107427834781 0.000149692689713 +TCTGGG 0.000212569970949 0.00025623866298 +GCATTG 0.000169141697314 0.000221897398869 +GAACTG 0.000692566679543 0.000315235193632 +TCGATT 6.39995611459e-05 0.000104784882799 +TCGGCA 0.000105142136168 6.16381663526e-05 +AGGCAC 0.000105142136168 0.000147931599246 +TTTTGA 9.14279444941e-06 0.00076871598894 +AACCAA 0.000265141039033 0.000305549196062 +TGCCGG 0.000109713533393 7.92490710248e-05 +AACCAC 0.000397711558549 0.000262402479615 +ACGAAC 7.771375282e-05 8.541288766e-05 +AACCAG 0.000733709254565 0.000289699381857 +TGCCGA 5.25710680841e-05 8.45323424264e-05 +TTGGAC 0.000395425859937 0.000241269394009 +TCTGGA 0.000585138844762 0.000469330609513 +GAGTTC 0.000790851719874 0.000177870137189 +TGCCGT 7.771375282e-05 0.000112709789902 +AACCAT 0.000242284052909 0.000250074846345 +AACAGA 0.000436568434959 0.00059789021362 +ACGTCG 3.65711777976e-05 4.75494426149e-05 +AACAGC 0.00081599440461 0.000442914252505 +ACGTCC 0.000139427615353 0.000110068154201 +AACAGG 0.000303997915443 0.000293221562792 +ACGTCA 0.000105142136168 0.000184033953824 +CCCAAG 0.000395425859937 0.000104784882799 +GTCCCT 0.000164570300089 0.000120634697004 +GACGAA 0.000262855340421 0.000124156877939 +CCCAAC 0.000486853804431 0.000147051054013 +CCCAAA 0.000795423117099 0.000290579927091 +ACGTCT 0.000164570300089 0.000171706320554 +GGGAGA 0.000148570409803 0.000184033953824 +AACAGT 0.000434282736347 0.000438511526337 +GGGAGC 0.000146284711191 0.000127679058873 +CCCAAT 0.000294855120993 0.00014881214448 +GACGAT 0.000377140271038 0.000147051054013 +GGGCGC 7.771375282e-05 6.25187115862e-05 +GGGCGA 7.771375282e-05 7.74879805576e-05 +GGGCGG 4.5713972247e-05 8.18907067256e-05 +TCTCTT 0.000287998025156 0.000423542257366 +TGGGGG 7.54280542076e-05 0.000167303594386 +GTACAG 0.000253712545971 0.000228061215505 +TCATAC 0.000153141807028 0.000208689220365 +CAGAAT 0.000516567886392 0.000447316978673 +ACGACT 7.31423555953e-05 9.42183399961e-05 +TCTCTC 0.00031085501128 0.000459644611944 +TCTCTA 0.000196570080662 0.000200764313263 +TCTCTG 0.000946279225514 0.000457883521476 +GGGCGT 3.885687641e-05 4.66688973813e-05 +CAGAAA 0.000969136211637 0.000739657996231 +CAGAAC 0.000797708815711 0.00033196555307 +ACGACG 5.02853694718e-05 6.33992568198e-05 +CAGAAG 0.00124113434651 0.000434108800169 +CACTTT 0.00023314125846 0.000573234947079 +CCTAAT 0.000164570300089 0.000145289963545 +GCATTC 0.000169141697314 0.000207808675132 +GTCATT 0.000511996489167 0.000381276086153 +TGCCCC 0.000109713533393 8.80545233609e-05 +CACTTG 0.000178284491763 0.00024391102971 +CCTAAC 0.000171427395926 9.42183399961e-05 +CCTAAA 0.000475425311369 0.000250074846345 +CACTTC 0.000354283284915 0.00029762428896 +CCTAAG 0.000125713423679 7.6607435324e-05 +CACTTA 7.08566569829e-05 0.000241269394009 +GTCCCA 0.000171427395926 0.000175228501488 +GTCCCC 0.000114284930618 0.000112709789902 +AAGATT 0.000628567118397 0.000412095169329 +ATCGAT 0.000347426189078 0.000103904337566 +GACTCG 0.000258283943196 0.0001074265185 +CTCATC 0.000754280542076 0.000324921191202 +GACTCC 0.00046628251692 0.000188436679992 +CTCATA 0.000189712984825 0.000213091946533 +GACTCA 0.000278855230707 0.000244791574943 +AAGATA 0.000187427286213 0.00029762428896 +AAGATC 0.00097142191025 0.000240388848775 +AAGATG 0.00092342223939 0.000438511526337 +GACTCT 0.00062171002256 0.000229822305972 +ATCGAG 0.000658281200357 8.1010161492e-05 +TTGAGA 0.000150856108415 0.00033196555307 +ATCGAA 0.000207998573724 9.6859975697e-05 +ATCGAC 0.000420568544673 7.30852543895e-05 +ATATTT 0.000219427066786 0.00102935737809 +ACGGTA 3.885687641e-05 7.57268900903e-05 +GGACTC 0.000297140819606 0.000173467411021 +ATAGTT 0.000134856218129 0.000299385379427 +CTACCA 0.000143999012578 0.000106545973267 +TACTGT 0.000258283943196 0.000349576457743 +ATATTC 0.000166855998702 0.000407692443161 +ATATTA 8.4570848657e-05 0.000530088230632 +ATATTG 8.91422458817e-05 0.000393603719423 +ATAGTA 5.94281639212e-05 0.000181392318123 +ATAGTC 0.000107427834781 0.000130320694574 +TACTGG 0.000258283943196 0.000185795044291 +ATAGTG 0.000217141368173 0.000159378687283 +GATCGA 0.000141713313966 5.19521687829e-05 +CAGGCA 0.000425139941898 0.000215733582234 +AGGAAG 0.000683423885093 0.000397125900358 +CATCCG 9.37136431064e-05 0.000103904337566 +AGGAAA 0.000580567447538 0.000574996037546 +CATCCA 0.000258283943196 0.000328443372136 +CAGGCG 0.000356568983527 0.000105665428033 +CATCCC 0.000182855888988 0.000194600496628 +TTATGA 1.59998902865e-05 0.000427944983534 +ATAGAA 0.000191998683438 0.000241269394009 +CGGAGT 7.99994514323e-05 8.71739781273e-05 +TTATGG 6.39995611459e-05 0.000249194301111 +CATCCT 0.000251426847359 0.000279132839054 +AGGAAT 0.000217141368173 0.00029762428896 +CAGGCT 0.000596567337824 0.000235986122607 +ACGGAT 0.000157713204252 0.000110948699435 +CGGAGA 0.000150856108415 0.000124156877939 +CGGAGC 0.000150856108415 0.000109187608967 +TTATGT 6.17138625335e-05 0.000460525157177 +CAGGCC 0.000685709583706 0.000194600496628 +CGGAGG 0.000127999122292 0.000132081785041 +ATGGAA 0.000477711009982 0.00036366518148 +ATGGAC 0.00092342223939 0.000236866667841 +ATGGAG 0.00152456097444 0.000401528626526 +CGCTTG 9.37136431064e-05 0.000103023792332 +CGCTTA 3.65711777976e-05 5.98770758854e-05 +CGCTTC 0.000447996928021 0.000134723420742 +TGAACA 9.14279444941e-06 0.000567071130444 +ATGGAT 0.000685709583706 0.000324921191202 +TTTGCA 0.000370283175201 0.000539774228202 +CGCTTT 0.000276569532095 0.000210450310832 +GCCGTG 0.000338283394628 0.000120634697004 +GATCGT 0.000132570519516 5.01910783157e-05 +TTTGCC 0.000459425421083 0.000296743743726 +TTGTGA 2.28569861235e-05 0.000401528626526 +GAAAAG 0.00085485128102 0.000466688973813 +GAAAAC 0.000850279883795 0.000571473856612 +ATAGAT 0.000201141477887 0.000225419579804 +GAAAAA 0.00069942377538 0.000825070883891 +GTTCCT 0.000445711229409 0.000233344486906 +TAAGTA 0.0 0.00022453903457 +TAGGGC 2.28569861235e-06 0.000109187608967 +ACGGTT 0.000148570409803 9.77405209306e-05 +GAAAAT 0.000555424762802 0.000568832220911 +ACAAGG 8.68565472694e-05 0.000205167039431 +ACAAGA 0.000143999012578 0.000364545726714 +ACAAGC 0.000180570190376 0.000267685751017 +ACACGT 0.000105142136168 0.000141767782611 +CTGCGT 0.000269712436258 0.000159378687283 +GACGTC 0.000303997915443 0.000181392318123 +GCGGAG 0.000461711119695 0.000132081785041 +CATAAG 0.000169141697314 0.000175228501488 +GCGGAA 0.000150856108415 9.24572495289e-05 +CATAAA 0.000201141477887 0.000530968775866 +GCCAAC 0.000596567337824 0.000143528873078 +CATAAC 0.000127999122292 0.000228941760738 +ACACGC 0.000150856108415 0.000162900868218 +ACAAGT 0.000150856108415 0.000272088477185 +ACACGA 0.000130284820904 0.000138245601677 +ACACGG 9.59993417188e-05 0.000115351425603 +CATAAT 0.000121142026455 0.000370709543349 +GCGGAT 0.00019428438205 0.000103023792332 +GCACAT 0.000185141587601 0.000316115738866 +CTGCGG 0.000381711668263 0.000169064684853 +CTGCGA 0.00019428438205 0.000104784882799 +CTGCGC 0.000484568105819 0.000153214870648 +CGGCAA 7.31423555953e-05 7.39657996231e-05 +ACCGGT 0.000118856327842 6.33992568198e-05 +CGGCAG 0.000297140819606 0.000106545973267 +GACGCG 0.000159998902865 7.13241639223e-05 +CCTCCT 0.0007771375282 0.000367187362415 +TGTTGA 9.14279444941e-06 0.000535371502034 +CGGCAT 0.000107427834781 7.57268900903e-05 +CCTCCC 0.000219427066786 0.00015849814205 +CCTCCA 0.00073828065179 0.000338129369706 +CACGCC 0.000173713094539 9.6859975697e-05 +CCTCCG 0.000269712436258 0.000132962330275 diff --git a/bin/cpat_model/zebrafish_cutoff.txt b/bin/cpat_model/zebrafish_cutoff.txt new file mode 100755 index 0000000..40b6161 --- /dev/null +++ b/bin/cpat_model/zebrafish_cutoff.txt @@ -0,0 +1,2 @@ +Coding Probability Cutoff: 0.381 +Achieved Sensitivity and Specificity: 0.984 diff --git a/bin/rename_lncRNA_2.pl b/bin/rename_lncRNA_2.pl index 62296c7..537b511 100644 --- a/bin/rename_lncRNA_2.pl +++ b/bin/rename_lncRNA_2.pl @@ -1,43 +1,89 @@ #!/usr/bin/perl -w use strict; +#die ("usage: ") unless @ARGV > 2; +#print "#Query file ".$ARGV[0]." with file_number ".$ARGV[1]."\n"; my %know_lnc; open FH,"known.lncRNA.bed" or die; while(){ chomp; my @field=split "\t"; - $know_lnc{$field[0].'\t'.$field[1].'\t'.$field[2].'\t'.$field[5].'\t'.$field[7]} = $field[3]; + if ($field[7] eq "exon"){ + $know_lnc{$field[0].'\t'.$field[1].'\t'.$field[5]} = $field[3]; + $know_lnc{$field[0].'\t'.$field[2].'\t'.$field[5]} = $field[3]; + } } -my %genecode; -open FH,"gencode.v25.annotation.chrX.gtf_mod.gtf" or die; +my %genecode;my %lncpedia; +if (@ARGV == 2){ +open FH,"$ARGV[0]" or die; while(){ chomp; + if ($_ =~ /^#/){ + next; + } my @field=split "\t"; - $_=~/gene_name "(.+?)"/; + if ($field[2] ne "exon"){ + next; + } + $_=~/gene_id "(.+?)"/; my $gene_name=$1; - my $loc = $field[0].'\t'.($field[3]-1).'\t'.$field[4].'\t'.$field[6].'\t'.$field[2]; - foreach my $location (keys %know_lnc){ - if($location eq $loc){ - $genecode{$know_lnc{$loc}} = $gene_name; - } + my $loc1 = $field[0].'\t'.($field[3]-1).'\t'.$field[6]; + my $loc2 = $field[0].'\t'.$field[4].'\t'.$field[6]; + if (defined($know_lnc{$loc1})){ + $genecode{$know_lnc{$loc1}} = $gene_name; + } + if (defined($know_lnc{$loc2})){ + $genecode{$know_lnc{$loc2}} = $gene_name; } } -open FH,"lncipedia_4_0.chrX.gtf_mod.gtf" or die; +open FH,"$ARGV[1]" or die; while(){ chomp; + if ($_ =~ /^#/){ + next; + } my @field=split "\t"; + if ($field[2] ne "exon"){ + next; + } $_=~/gene_id "(.+?)"/; my $gene_name=$1; - my $loc = $field[0].'\t'.($field[3]-1).'\t'.$field[4].'\t'.$field[6].'\t'.$field[2]; - foreach my $location (keys %know_lnc){ - if($location eq $loc){ - $genecode{$know_lnc{$loc}} = $gene_name; - } + my $loc1 = $field[0].'\t'.($field[3]-1).'\t'.$field[6]; + my $loc2 = $field[0].'\t'.$field[4].'\t'.$field[6]; + if (defined($know_lnc{$loc1})){ + $lncpedia{$know_lnc{$loc1}} = $gene_name; + } + if (defined($know_lnc{$loc2})){ + $lncpedia{$know_lnc{$loc2}} = $gene_name; } } - +}elsif (@ARGV == 1){ +open FH,"$ARGV[0]" or die; +while(){ + chomp; + if ($_ =~ /^#/){ + next; + } + my @field=split "\t"; + if ($field[2] ne "exon"){ + next; + } + $_=~/gene_id "(.+?)"/; + my $gene_name=$1; + my $loc1 = $field[0].'\t'.($field[3]-1).'\t'.$field[6]; + my $loc2 = $field[0].'\t'.$field[4].'\t'.$field[6]; + if (defined($know_lnc{$loc1})){ + $genecode{$know_lnc{$loc1}} = $gene_name; + } + if (defined($know_lnc{$loc2})){ + $genecode{$know_lnc{$loc2}} = $gene_name; + } +} +}else{ + die ("usage: at least one gtf file is needed!!!") +} my %exon; my %gene; @@ -66,7 +112,7 @@ }else{ $gene{$genename}{END}=$end; } - + } open FH,"novel.lncRNA.stringent.filter.bed" or die; @@ -94,7 +140,7 @@ }else{ $gene{$genename}{END}=$end; } - + } open OUT,">lncRNA.for_anno.bed" or die; foreach my $k (keys %gene){ @@ -143,7 +189,7 @@ my $genename="NA-$naidx"; $map{$genename}{$geneid}="NA"; #print $genename."\n"; - }else{ + }else{ if($up_dist < $down_dist){ my $genename; if($up_dist==0){ @@ -155,7 +201,7 @@ }else{ $map{$genename}{$geneid}=$up_dist; } - + }else{ #print "LALALALALA"."\n"; } @@ -223,13 +269,24 @@ } } - } + } } -open OUT3,">lncRNA.mapping.file" or die; +my %all_data; foreach my $mstr(sort(keys %genecode)){ - if(defined($MSTRG2genename{$mstr})){ - print OUT3 $mstr."\t".$genecode{$mstr}."\t".$MSTRG2genename{$mstr}."\n"; + $all_data{$mstr} = 1; +} +foreach my $mstr(sort(keys %lncpedia)){ + $all_data{$mstr} = 1; +} +open OUT3,">lncRNA.mapping.file" or die; +foreach my $mstr(sort(keys %all_data)){ + if(defined($MSTRG2genename{$mstr}) && defined($lncpedia{$mstr}) && defined($genecode{$mstr})){ + print OUT3 $mstr."\t".$MSTRG2genename{$mstr}."\t".$genecode{$mstr}."\t".$lncpedia{$mstr}."\n"; + }elsif (defined($MSTRG2genename{$mstr}) && defined($genecode{$mstr})){ + print OUT3 $mstr."\t".$MSTRG2genename{$mstr}."\t".$genecode{$mstr}."\t\n" + }elsif (defined($MSTRG2genename{$mstr}) && defined($lncpedia{$mstr})){ + print OUT3 $mstr."\t".$MSTRG2genename{$mstr}."\t\t".$lncpedia{$mstr}."\n" }else{ - #print $mstr."\n"; + next; } -} \ No newline at end of file +} diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 3068ad8..2f5fd13 100644 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -13,7 +13,8 @@ results['nf-core/lncpipe'] = 'N/A' results['Nextflow'] = 'N/A' results['FastQC'] = 'N/A' -results['MultiQC'] = 'N/A' +results['fastp'] = 'N/A' + # Search each file using its regex for k, v in regexes.items(): diff --git a/conf/awsbatch.config b/conf/awsbatch.config index 79078c7..65352d8 100644 --- a/conf/awsbatch.config +++ b/conf/awsbatch.config @@ -1,6 +1,6 @@ /* * ------------------------------------------------- - * Nextflow config file for AWS Batch + * Nextflow config file for AWS Batch * ------------------------------------------------- * Imported under the 'awsbatch' Nextflow profile in nextflow.config * Uses docker for software depedencies automagically, so not specified here. diff --git a/conf/base.config b/conf/base.config index 6caa969..5a181b3 100644 --- a/conf/base.config +++ b/conf/base.config @@ -22,14 +22,137 @@ process { maxErrors = '-1' // Process-specific resource requirements - withName: fastqc { - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } + + withName:combine_public_annotation { + cpus = { check_max( 10, 'cpus' ) } + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:Run_fastQC { + cpus = { check_max( 10, 'cpus' ) } + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:Run_FastP { + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withName:fastq_star_alignment_For_discovery { + cpus = { check_max (10, 'cpus')} + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withName:fastq_tophat_alignment_For_discovery { + cpus = { check_max (10, 'cpus')} + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withName:fastq_hisat2_alignment_For_discovery { + cpus = { check_max (10, 'cpus')} + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withName:StringTie_assembly { + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withName:StringTie_merge_assembled_gtf { + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withName:Cufflinks_assembly { + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withName:cuffmerge_assembled_gtf { + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 80.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } } - withName: multiqc { - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } + + withName:Merge_assembled_gtf_with_GENCODE { + cpus = { check_max( 4, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + + withName:Identify_novel_lncRNA_with_criterions { + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 7.h * task.attempt, 'time' ) } + errorStrategy = 'ignore' + } + + withName:Predict_coding_abilities_by_PLEK { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 7.h * task.attempt, 'time' ) } + } + withName:Predict_coding_abilities_by_CPAT { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 7.h * task.attempt, 'time' ) } + } + + + withName:Filter_lncRNA_by_coding_potential_result { + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + + withName:Summary_renaming_and_classification { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + + + withName:Rerun_CPAT_to_evaluate_lncRNA { + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + + withName:Rerun_CPAT_to_evaluate_coding { + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + + + withName:Run_htseq_for_quantification { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + + withName:Run_htseq_for_quantification { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + withName:Build_kallisto_index_of_GTF_for_quantification { + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 7.h * task.attempt, 'time' ) } + } + withName:Run_kallisto_for_quantification { + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 7.h * task.attempt, 'time' ) } + } + + + + withName:get_software_versions { + memory = { check_max( 2.GB, 'memory' ) } + cache = false + errorStrategy = 'ignore' + } + withName:workflow_summary_mqc { + memory = { check_max( 2.GB, 'memory' ) } + cache = false + executor = 'local' + errorStrategy = 'ignore' } } + params { // Defaults only, expecting to be overwritten max_memory = 128.GB diff --git a/conf/igenomes.config b/conf/igenomes.config deleted file mode 100644 index 0815499..0000000 --- a/conf/igenomes.config +++ /dev/null @@ -1,146 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for iGenomes paths - * ------------------------------------------------- - * Defines reference genomes, using iGenome paths - * Can be used by any config that customises the base - * path using $params.igenomes_base / --igenomes_base - */ - -params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - } - 'GRCm38' { - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - } - 'TAIR10' { - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - } - 'EB2' { - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - } - 'UMD3.1' { - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - } - 'WBcel235' { - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - } - 'CanFam3.1' { - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - } - 'GRCz10' { - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - } - 'BDGP6' { - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - } - 'EquCab2' { - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - } - 'EB1' { - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - } - 'Galgal4' { - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - } - 'Gm01' { - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - } - 'Mmul_1' { - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - } - 'IRGSP-1.0' { - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - } - 'CHIMP2.1.4' { - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - } - 'Rnor_6.0' { - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - } - 'R64-1-1' { - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - } - 'EF2' { - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - } - 'Sbi1' { - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - } - 'Sscrofa10.2' { - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - } - 'AGPv3' { - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - } - } -} diff --git a/conf/test.config b/conf/test.config index 620dd4e..3a5c91d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -4,7 +4,7 @@ * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run nf-core/methylseq -profile test + * nextflow run nf-core/lncpipe -profile test */ params { @@ -12,5 +12,27 @@ params { max_memory = 6.GB max_time = 48.h // Input data - singleEnd = false + /* + User setting options (mandatory) + */ +// input file and genome reference() + reads = 'Fastq/*_{1,2}.fastq.gz' + fasta = './Genome/chr22.fa' + design = './design.file' + hisat2_index = "./Genome/hisat_index/chr22" + gencode_annotation_gtf = "Genome/gencode.chr22.gtf" + lncipedia_gtf = "./Genome/lncipedia.chr22.gtf" + +/* + User setting options (optional) + */ + + //other setting + + +/* +Don't modify when using docker image +*/ + cpatpath ='/opt/CPAT-1.2.3' + } diff --git a/docker.config b/docker.config deleted file mode 100644 index 45a1a11..0000000 --- a/docker.config +++ /dev/null @@ -1,85 +0,0 @@ - - -params { -/* - User setting options (mandatory) - */ -// input file and genome reference() - - fastq_ext = '*_{1,2}.fq.gz' - fasta_ref = '/data/database/hg38/genome.fa' - design = 'design.file' // or null - hisat2_index = '/data/database/hg38/hisatIndex/grch38_snp_tran/genome_snp_tran' - cpatpath='/opt/CPAT-1.2.3' - //human gtf only - gencode_annotation_gtf = "/data/database/hg38/Annotation/gencode.v24.annotation.gtf" - lncipedia_gtf = "/data/database/hg38/Annotation/lncipedia_4_0_hg38.gtf" // set "null" if you are going to perform analysis on other species - -/* - User setting options (optional) - */ - // tools setting - hisat_strand = 'RF' - - star_index = ''//set if star used - bowtie2_index = ''//set if tophat used - aligner = "hisat" // or "star","tophat" - sam_processor="sambamba"//or "samtools(deprecated)" - qctools ="fastp" // or "afterqc","fastp","fastqc","none" to skip qc step - detools = "edger"//or "deseq2" - quant = "kallisto"// or 'htseq' - //other setting - singleEnd = false - unstrand = false - skip_combine = false - lncRep_Output = 'reporter.html' - lncRep_theme = 'npg' - lncRep_cdf_percent = 10 - lncRep_max_lnc_len = 10000 - lncRep_min_expressed_sample = 50 - mem=60 - cpu=30 - /* - for non-human setting - */ - species="human"// mouse , zebrafish, fly - known_coding_gtf="" - known_lncRNA_gtf="" - - -/* -Don't modify -*/ - cpatpath ='/opt/CPAT-1.2.3' - - -} - -/* -Don't modify either -*/ -// Docker options -docker.runOptions = '-u $(id -u):$(id -g)' -docker.enabled = true -process.container = 'bioinformatist/lncpipe:latest' - -// individual process setting -process.cache = 'deep' - -process { - withLabel: para { - maxForks = 6 - } - - withLabel: 'qc' { - maxForks = 6 - } - -} - -manifest { - homePage = 'https//github.com/likelet/LncPipe' - description = 'LncPipe:a Nextflow-based Long non-coding RNA analysis PIPELINE' - mainScript = 'LncRNAanalysisPipe.nf' -} - diff --git a/README_for_non_human_genome.md b/docs/README_for_non_human_genome.md similarity index 100% rename from README_for_non_human_genome.md rename to docs/README_for_non_human_genome.md diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md index db991e2..3fe9832 100644 --- a/docs/configuration/reference_genomes.md +++ b/docs/configuration/reference_genomes.md @@ -1,5 +1,7 @@ # nf-core/lncpipe: Reference Genomes Configuration +We are sorry that the current lncPipe only support few types of organism: human, mouse, fly and zebrafish, as one essential tool CPAT included in LncPipe only available for 4 species + The nf-core/lncpipe pipeline needs a reference genome for alignment and annotation. These paths can be supplied on the command line at run time (see the [usage docs](../usage.md)), @@ -29,21 +31,3 @@ params { genome = 'YOUR-ID' } ``` - -You can add as many genomes as you like as long as they have unique IDs. - -## illumina iGenomes -To make the use of reference genomes easier, illumina has developed a centralised resource called [iGenomes](https://support.illumina.com/sequencing/sequencing_software/igenome.html). -Multiple reference index types are held together with consistent structure for multiple genomes. - -We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is configured to use this by default. -The hosting fees for AWS iGenomes are currently kindly funded by a grant from Amazon. -The pipeline will automatically download the required reference files when you run the pipeline. -For more information about the AWS iGenomes, see https://ewels.github.io/AWS-iGenomes/ - -Downloading the files takes time and bandwidth, so we recommend making a local copy of the iGenomes resource. -Once downloaded, you can customise the variable `params.igenomes_base` in your custom configuration file to point to the reference location. -For example: -```nextflow -params.igenomes_base = '/path/to/data/igenomes/' -``` diff --git a/docs/img/classification.svg b/docs/img/classification.svg new file mode 100644 index 0000000..b98a5db --- /dev/null +++ b/docs/img/classification.svg @@ -0,0 +1,32980 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Exonic Sense +Intronic Sense +Antisense +Bidirectional +Intergenic + + + + + + + + + + + + + + + + + + + + + + + +Coding Gene +lncRNA + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Coding Gene +lncRNA + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Coding Gene +lncRNA + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Coding Gene +lncRNA + + + + + + + + + +<1kb + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Coding Gene +lncRNA +Coding Gene + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/img/nf-core-lncpipe_logo.png b/docs/img/nf-core-lncpipe_logo.png index 0af5a52..6cbd2e1 100644 Binary files a/docs/img/nf-core-lncpipe_logo.png and b/docs/img/nf-core-lncpipe_logo.png differ diff --git a/docs/img/nf-core-lncpipe_logo.svg b/docs/img/nf-core-lncpipe_logo.svg index 849a7ba..ecaa4e8 100644 --- a/docs/img/nf-core-lncpipe_logo.svg +++ b/docs/img/nf-core-lncpipe_logo.svg @@ -1,141 +1,205 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nf- - core/ - lncpipe - - - - - - - - - - - - + +image/svg+xmlnf- +core/ +lncpipe + diff --git a/docs/img/workflow.png b/docs/img/workflow.png new file mode 100644 index 0000000..b0f6965 Binary files /dev/null and b/docs/img/workflow.png differ diff --git a/docs/output.md b/docs/output.md index bdccfc9..76c0b0c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,23 +1,58 @@ # nf-core/lncpipe: Output -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report and LncpipeReporter, that summarizes the result at the end of the pipeline. ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) -and processes data using the following steps: +Plz see a detailed illustration in our [literature](https://linkinghub.elsevier.com/retrieve/pii/S1673-8527(18)30117-6). -* [FastQC](#fastqc) - read quality control -* [MultiQC](#multiqc) - aggregate report, describing results of the whole pipeline +## Result Folder structure -## FastQC -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%T/A/G/C). You get information about adapter contamination and other overrepresented sequences. +`Result` folder under current path(default) or output_folder set by user. A typical structure of `Result` is follows: -For further reading and documentation see the [FastQC help](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + Result/ + ├── QC + │ ├── N1141_1.clean_fastqc.html + │ ├── N1141_2.clean_fastqc.html + │ ├── N1177_1.clean_fastqc.html + │ └── N1177_2.clean_fastqc.html + ├── Identified_lncRNA + │ ├── all_lncRNA_for_classifier.gtf + │ ├── final_all.fa + │ ├── final_all.gtf + │ ├── lncRNA.fa + │ ├── protein_coding.fa + │ └── protein_coding.final.gtf + ├── LncReporter + │ ├── Differential_Expression_analysis.csv + │ └── Report.html + ├── Quantification + │ ├── kallisto.count.txt + │ └── kallisto.tpm.txt + └── Star_alignment + ├── STAR_N1141 + │ ├── N1141Aligned.sortedByCoord.out.bam + │ ├── N1141Log.final.out + │ ├── N1141Log.out + │ ├── N1141Log.progress.out + │ └── N1141SJ.out.tab + └── STAR_N1177 + ├── N1177Aligned.sortedByCoord.out.bam + ├── N1177Log.final.out + ├── N1177Log.out + ├── N1177Log.progress.out + └── N1177SJ.out.tab -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. To see how your reads look after trimming, look at the FastQC reports in the `trim_galore` directory. +* `QC` stored the Quality control output generated by FastQC or AfterQC software.
+* `Identified_lncRNA` contains all assembled lncRNA and their sequences. *all_lncRNA_for_classifier.gtf* includes both novel and known lncRNA features in [GTF format](http://www.ensembl.org/info/website/upload/gff.html); +*lncRNA.fa* is all lncRNA sequences in fasta format. *protein_coding.final.gtf* and *protein_coding.fa* are protein coding information extracted from gencode annotation. *final_all.gtf* and *final_all.fa* are combined files for further analysis.
+* `Alignment` are hisat/tophat/STAR aligner standard output
+* `Quantification` are estimated abundance using kallisto. *kallisto.count.txt* stored reads count matrix and *kallisto.tpm.txt* are tpm(Transcripts Per Kilobase Million) matrix. +* `LncReporter` stored the interactive report file and differential expression matrix generated by LncPipeReporter which wrapped EdgeR. -**Output directory: `results/fastqc`** + +**Output directory: `results/QC`** * `sample_fastqc.html` * FastQC report, containing quality metrics for your untrimmed raw fastq files @@ -25,16 +60,83 @@ For further reading and documentation see the [FastQC help](http://www.bioinform * zip file containing the FastQC report, tab-delimited data file and plot images -## MultiQC -[MultiQC](http://multiqc.info) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in within the report data directory. +**Output directory: `results/Combined_annotations`** + +* `known.lncRNA.gtf` + * known lncRNA gene annotation file from provided `GTF` files, can be stored for further comparison +* `gencode_protein_coding.gtf` + * known protein coding gene annotation file from provided `GTF` files +* `*_mod.gtf` (less information ) + * formatted GTF from the provided `GTF` files by users + +**Output directory: `results/*_alignment`** + +* `*_summary.txt` + * alignment summary files generated by aligner such as tophat, hisat2 and STAR. A typical content are as follow: + ``` + 453727 reads; of these: + 453727 (100.00%) were paired; of these: + 434333 (95.73%) aligned concordantly 0 times + 19339 (4.26%) aligned concordantly exactly 1 time + 55 (0.01%) aligned concordantly >1 times + ---- + 434333 pairs aligned concordantly 0 times; of these: + 279639 (64.38%) aligned discordantly 1 time + ---- + 154694 pairs aligned 0 times concordantly or discordantly; of these: + 309388 mates make up the pairs; of these: + 4095 (1.32%) aligned 0 times + 303301 (98.03%) aligned exactly 1 time + 1992 (0.64%) aligned >1 times + 99.55% overall alignment rate + ``` +* `*sort.bam` + * alignment result in bam format. + +**Output directory: `results/Identified_lncRNA`** + +* `*all_lncRNA_for_classifier.gtf ` + * A final identified lncRNA `GTF` that contains both known lncRNA and novel lncRNAs. It also includes the lncRNA relative position to the nearest genes. + +* `final_all.gtf ` + * A `GTF` that contains both lncRNA and protein coding RNA. It includes the lncRNA relative position to the nearest genes. + +* `lncRNA.fa ` + * All lncRNA sequences in `fasta` format + +* `protein_coding.fa` + * All protein sequences in `fasta` format + +* `final_all.fa` + * All protein + lncRNA sequences in `fasta` format + +* `lncRNA_classification.txt` + * A detailed classification of all lncRNAs according to their coordinate in the chromosome. A detailed illustration of the lncRNA class are shown below + ![image](img/classification.svg) + +* `lncRNA.mapping.file` + * In the output file, we renamed all lncRNA according their nearest gene for both known and new identified genes. This file contains the name map information between changed name and origin name of the known lncRNAs. + + + +**Output directory: `results/LncPipeReports`** + + * LncPipeReporter generated report, plz open the reporter.html directly. + +**Output directory: `results/Merged_assemblies`** + + * `merged.gtf ` + * Initial merged GTF by merge assemblies from individual sample. Can be used as input of `--merged_gtf` parameter + +**Output directory: `results/Quantification`** + +* `kallisto.count.txt ` + * reads count matrix at gene level with each line represent a gene and each column represent a sample. NOTE: The second column recorded the gene type, e.g. `proten_coding` or `known` lncRNA or `novel` lncRNA that were not included in the provided gtf + +* `kallisto.tpm.txt` + * expression matrix at gene level in `TPM` format. A detailed explanation of TPM can be found in [here](https://haroldpimentel.wordpress.com/2014/05/08/what-the-fpkm-a-review-rna-seq-expression-units/) + -The pipeline has special steps which allow the software versions used to be reported in the MultiQC output for future traceability. -**Output directory: `results/multiqc`** -* `Project_multiqc_report.html` - * MultiQC report - a standalone HTML file that can be viewed in your web browser -* `Project_multiqc_data/` - * Directory containing parsed statistics from the different tools used in the pipeline -For more information about how to use MultiQC reports, see http://multiqc.info diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 7e6cbd4..c78883f 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -26,3 +26,28 @@ If you still have an issue with running the pipeline then feel free to contact u Have a look at the [pipeline website](https://github.com/nf-core/lncpipe) to find out how. If you have problems that are related to Nextflow and not our pipeline then check out the [Nextflow gitter channel](https://gitter.im/nextflow-io/nextflow) or the [google group](https://groups.google.com/forum/#!forum/nextflow). + + +## Tips & FAQs + +## Tips + +* :blush:Plz keep the consistency of your genome sequence, index library and annotation files: genome version, chromosome format, gtf coordinated e.g. The third-party software may stop for any of the above reasons. +* :confused:Setting your analysis parameters always in config file, differ project should corresponding to differ configurations for reproductive analysis. To rerun a project, you can just specify -c `your.config` in your command, which can also help you to record analysis parameters. +* :open_mouth:Run analysis on docker container, no much to say. +* :grimacing:Always use the latest version to be away from the known bugs. + + +## FAQ +In local mode: +* *1. PLEK throws an error "/data/software/PLEK.1.2/PLEK.py:line12: $'\r': can not find command", how to fix?* +>A: using the follow command as suggested in the installation section. + + perl -CD -pi -e'tr/\x{feff}//d && s/[\r\n]+/\n/' *.py + +* *2. IOError: [Errno 2] No such file or directory: '/opt/CPAT-1.2.3/dat/Human_Hexamer.tsv'?* +>A: The cpat command required the `Human_Hexamer.tsv` to predict lncRNA coding potential, plz check your `cpatpath` parameters. +* *3. When using htseq to quantify transicript, it throws "Error occured when reading beginning of SAM/BAM file. 'csamtools.AlignedRead' object has no attribute 'reference_start' "* +>A: It's a version conflict caused by htseq and hisat generated bamfile, a possible solution for this is to install the old version of htseq + + diff --git a/docs/usage.md b/docs/usage.md index 241a7d7..7bdffc9 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -14,6 +14,7 @@ * [`none`](#none) * [`--reads`](#--reads) * [`--singleEnd`](#--singleend) + * [`--unstrand`](#--unstrand) * [Reference Genomes](#reference-genomes) * [`--genome`](#--genome) * [`--fasta`](#--fasta) @@ -64,6 +65,41 @@ results # Finished results (configurable, see below) # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +## Prepare Input file + +* Reference files for humans + + 1. hisat index built from Genome: + http://cancerbio.info/pub/hg38_hisat_index.tar.gz + + 2. Genome reference: + ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_27/GRCh38.p10.genome.fa.gz + + 3. GENCODE gene annotation: + ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_27/gencode.v27.annotation.gtf.gz + + 4. LNCipedia gene annotation: + https://lncipedia.org/downloads/lncipedia_5_0_hc_hg38.gtf + + 5. Raw sequence file with \*.fastq.gz / \*.fq.gz suffixed + + 6. `design` file (optional) + +* Reference files for mouse + + 1. hisat index built from Genome + + 2. Genome reference: + ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M16/GRCm38.p5.genome.fa.gz + + 3. GENCODE gene annotation: + ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M16/gencode.vM16.annotation.gtf.gz + + 4. Raw sequence file with \*.fastq.gz / \*.fq.gz suffixed + + 5. `design` file (optional) + + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -101,6 +137,7 @@ Use this parameter to choose a configuration profile. Profiles can give configur * `test` * A profile with a complete configuration for automated testing * Includes links to test data so needs no other parameters + * NOTE: in lncPipe, test data should be downloaded separately, plz see details in [here]() * `none` * No configuration at all. Useful if you want to build your own config from scratch and want to avoid loading in the default `base` config profile (not recommended). @@ -131,38 +168,24 @@ It is not possible to run a mixture of single-end and paired-end files in one ru ## Reference Genomes -The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. +The lncPipe currently support `human` species only, other species will be supported in the near feature. -### `--genome` (using iGenomes) -There are 31 different species supported in the iGenomes references. To run the pipeline, you must specify which to use with the `--genome` flag. +We recommended users adopted the reference file from [GENCODE](https://www.gencodegenes.org/) -You can find the keys to specify the genomes in the [iGenomes config file](../conf/igenomes.config). Common genomes that are supported are: -* Human - * `--genome GRCh37` -* Mouse - * `--genome GRCm38` -* _Drosophila_ - * `--genome BDGP6` -* _S. cerevisiae_ - * `--genome 'R64-1-1'` +## Annotation file -> There are numerous others - check the config file for more. +### `--gencode_annotation_gtf` +An annotation file from GENCODE database for annotating lncRNAs(required if not set in config file). e.g. [gencode.v26.annotation.gtf](ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_26/gencode.v26.annotation.gtf.gz) -Note that you can use the same configuration setup to save sets of reference files for your own use, even if they are not part of the iGenomes resource. See the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for instructions on where to save such a file. +### `--lncipedia_gtf` +An annotation file from LNCipedia database for annotating lncRNAs(required if not set in config file) e.g. [lncipedia_4_0_hc_hg38.gtf](http://www.lncipedia.org/downloads/lncipedia_4_0_hc_hg38.gtf) | -The syntax for this reference configuration is as follows: +### `--star_index/--bowtie2_index/--hisat2_index` + +> This parameter is *required* when not configured in nextflow.config file. It specify the star/tophat/hisat2(mutually exclusive) index folder built before running [LncPipe](https://github.com/likelet/LncPipe) . +If you don't know what it is?You can use `--fasta` to specify the reference sequence data. The index file would be built by [LncPipe](https://github.com/likelet/LncPipe) automatically. -```nextflow -params { - genomes { - 'GRCh37' { - fasta = '' // Used if no star index given - } - // Any number of additional genomes, key is used with --genome - } -} -``` ### `--fasta` If you prefer, you can specify the full path to your reference genome when you run the pipeline: @@ -171,6 +194,23 @@ If you prefer, you can specify the full path to your reference genome when you r --fasta '[path to Fasta reference]' ``` +### `--design` +> Experimental design file matrix for differential expression analysis. Default: `null` +Format: + + WT:Sample1,Sample2,Sample3 + KO:Sample1,Sample2,Sample3 + +While `KO/WT` represents the two experimental condition, and sample1, sample2, sample3 are replicates which should be comma-delimited in the same line . + +For sample names, it should be the sample as the prefix of fastq files which was trimmed by `--fastq_ext`. + +For example: + + if fastq file names are `Sample1_1.fq.gz, Sample1_2.fq.gz` that comes from one sample and your `--fastq_ext` is set as `*_{1,2}.fq.gz`, the sample name +should be Sample1. + + ## Job Resources ### Automatic resubmission Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. diff --git a/environment.yml b/environment.yml index 45d1b28..ea2453f 100644 --- a/environment.yml +++ b/environment.yml @@ -14,3 +14,13 @@ dependencies: - sambamba=0.6.8 - cpat=1.2.3 - htseq=0.9.1 + - cufflinks=2.2.1 + - pandoc=2.3.1 + - bedops=2.4.35 + - afterqc=0.9.7 + - gffcompare=0.10.1 + - sambamba=0.6.8 + - plek=1.2 + - r-lncpipereporter=0.1.1 + - multiqc-bcbio=0.2.6 + diff --git a/LncRNAanalysisPipe.nf b/main.nf old mode 100644 new mode 100755 similarity index 75% rename from LncRNAanalysisPipe.nf rename to main.nf index f387a7d..3460482 --- a/LncRNAanalysisPipe.nf +++ b/main.nf @@ -1,1655 +1,1630 @@ -#!/usr/bin/env nextflow -/* -======================================================================================== - nf-core/lncpipe -======================================================================================== - nf-core/lncpipe Analysis Pipeline. - #### Homepage / Documentation - https://github.com/nf-core/lncpipe - https://github.com/likelet/LncPipe - #### Authors - Qi Zhao @qi_likelet ----------------------------------------------------------------------------------------- -*/ -/* - * LncPipe was implemented by Dr. Qi Zhao from Sun Yat-sen University Cancer Center, China. - * - * - * LncPipe is a free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * See the GNU General Public License for more details. - * - * - */ - -/* - * LncPipe: A nextflow-based lncRNA identification and analysis pipeline from RNA sequencing data - * - * Authors: - * Qi Zhao : design and implement the pipeline. - * Yu Sun : design and implement the analysis report sections. - * Zhixiang Zuo : design the project and perform the testing. - */ - -// requirement: -// - fastp/fastqc/AfterQC -// - STAR/tophat2/bowtie2/hisat2/StringTie -// - samtools/sambamba -// - Cufflinks/gffcompare -// - Bedops -// - CPAT -// - PLEK -// - CNCI -// - kallisto [https://pachterlab.github.io/kallisto/starting] - -//pre-defined functions for render command -//======================================================================================= -ANSI_RESET = "\u001B[0m"; -ANSI_BLACK = "\u001B[30m"; -ANSI_RED = "\u001B[31m"; -ANSI_GREEN = "\u001B[32m"; -ANSI_YELLOW = "\u001B[33m"; -ANSI_BLUE = "\u001B[34m"; -ANSI_PURPLE = "\u001B[35m"; -ANSI_CYAN = "\u001B[36m"; -ANSI_WHITE = "\u001B[37m"; - - -def print_red = { str -> ANSI_RED + str + ANSI_RESET } -def print_black = { str -> ANSI_BLACK + str + ANSI_RESET } -def print_green = { str -> ANSI_GREEN + str + ANSI_RESET } -def print_yellow = { str -> ANSI_YELLOW + str + ANSI_RESET } -def print_blue = { str -> ANSI_BLUE + str + ANSI_RESET } -def print_cyan = { str -> ANSI_CYAN + str + ANSI_RESET } -def print_purple = { str -> ANSI_PURPLE + str + ANSI_RESET } -def print_white = { str -> ANSI_WHITE + str + ANSI_RESET } - -//Help information -// Nextflow version -version="v0.2.42" -//======================================================================================= -// Nextflow Version check -if( !nextflow.version.matches('0.26+') ) { - println print_yellow("This workflow requires Nextflow version 0.26 or greater -- You are running version ")+ print_red(nextflow.version) - exit 1 -} -//help information -params.help = null -if (params.help) { - log.info '' - log.info print_purple('------------------------------------------------------------------------') - log.info "LncPipe: a Nextflow-based Long non-coding RNA analysis Pipeline v$version" - log.info "LncPipe integrates several NGS processing tools to identify novel long non-coding RNAs from" - log.info "un-processed RNA sequencing data. To run this pipeline, users either need to install required tools manually" - log.info "or use the docker image for LncPipe that comes with all tools pre-installed. (note: docker needs to be installed on your system). More information on usage can be found at https://github.com/likelet/LncPipe ." - log.info "Bugs or new feature requests can be reported by opening issues in our github repository." - log.info print_purple('------------------------------------------------------------------------') - log.info '' - log.info print_yellow('Usage: ') - log.info print_yellow(' The typical command for running the pipeline is as follows (we do not recommend users passing configuration parameters through command line, please modify the config.file instead):\n') + - print_purple(' Nextflow run LncRNAanalysisPipe.nf \n') + - - print_yellow(' General arguments: Input and output setting\n') + - print_cyan(' --input_folder ') + print_green('Path to input data(optional), current path default\n') + - print_cyan(' --fastq_ext <*_fq.gz> ') + print_green('Filename pattern for pairing raw reads, e.g: *_{1,2}.fastq.gz for paired reads\n') + - print_cyan(' --out_folder ') + print_green('The output directory where the results will be saved(optional), current path is default\n') + - print_cyan(' --aligner ') + print_green('Aligner for reads mapping (optional),"hisat"(defalt)/"star"/"tophat"\n') + - print_cyan(' --qctools ') + print_green('Tools for assess reads quality, fastp(default)/afterqc/fastqc/none(skip QC step)\n') + - print_cyan(' --detools ') + print_green('Tools for differential analysis, edger(default)/deseq/noiseq\n') + - print_cyan(' --quant ') + print_green('Tools for estimating abundance of transcript, kallisto(default)/htseq\n') + - '\n' + - print_yellow(' Options: General options for run this pipeline\n') + - print_cyan(' --merged_gtf ') + print_green('Start analysis with assemblies already produced and skip fastqc/alignment step, DEFAOUL NULL\n') + - print_cyan(' --design ') + print_green('A flat file stored the experimental design information ( required when perform differential expression analysis)\n') + - print_cyan(' --singleEnd ') + print_green('Reads type, True for single ended \n') + - print_cyan(' --unstrand ') + print_green('RNA library construction strategy, specified for \'unstranded\' library \n') + - '\n' + - print_yellow(' References: If not specified in the configuration file or you wish to overwrite any of the references.\n') + - print_cyan(' --fasta ') + print_green('Path to Fasta reference(required)\n') + - print_cyan(' --gencode_annotation_gtf ') + print_green('An annotation file from GENCODE database in GTF format (required)\n') + - print_cyan(' --lncipedia_gtf ') + print_green('An annotation file from LNCipedia database in GTF format (required)\n') + - '\n' + - print_yellow(' LncPipeReporter Options: LncPipeReporter setting \n') + - print_cyan(' --lncRep_Output ') + print_green('Specify report file name, \"report.html\" default.\n') + - print_cyan(' --lncRep_theme ') + print_green('Plot theme setting in interactive plot, \"npg\" default.\n') + - print_cyan(' --lncRep_min_expressed_sample ') + print_green('Minimum expressed gene allowed in each sample, 50 default.\n') + - '\n' + - print_yellow(' Other options: Specify the email and \n') + - print_cyan(' --sam_processor ') + print_green('program to process samfile generated by hisat2 if aligner is hisat2. Default \"sambamba\". \n') + - print_cyan(' --mail ') + print_green('email info for reporting status of your LncPipe execution \n') + - - - - log.info '------------------------------------------------------------------------' - log.info print_yellow('Contact information: zhaoqi@sysucc.org.cn') - log.info print_yellow('Copyright (c) 2013-2017, Sun Yat-sen University Cancer Center.') - log.info '------------------------------------------------------------------------' - exit 0 -} - -//check parameters -/* -allowed_params = ["input_folder","fastq_ext","out_folder","aligner","qctools","detools","quant", - "merged_gtf","design","singleEnd","unstrand", - "fasta","gencode_annotation_gtf","lncipedia_gtf", - "lncRep_Output", "lncRep_theme","lncRep_min_expressed_sample", - "sam_processor","mail"] -params.each { entry -> - if (! allowed_params.contains(entry.key)) { - println("The parameter <${entry}.key> is not known"); - System.exit(2); - } -} -*/ - -//default values -params.input_folder = './' -params.out_folder = './' - - -//dose merged_gtf provided -params.merged_gtf = null - - -singleEnd = params.singleEnd ? true : false -skip_combine = params.skip_combine ? true : false -unstrand = params.unstrand ? true : false -//Checking parameters -log.info print_purple("You are running LncPipe with the following parameters:") -log.info print_purple("Checking parameters ...") -log.info print_yellow("=====================================") -log.info print_yellow("Species: ") + print_green(params.species) -log.info print_yellow("Fastq file extension: ") + print_green(params.fastq_ext) -log.info print_yellow("Single end : ") + print_green(params.singleEnd) -log.info print_yellow("skip annotation process: ") + print_green(params.skip_combine) -log.info print_yellow("Input folder: ") + print_green(params.input_folder) -log.info print_yellow("Output folder: ") + print_green(params.out_folder) -log.info print_yellow("Genome sequence location: ") + print_green(params.fasta_ref) -log.info print_yellow("STAR index path: ") + print_green(params.star_index) -log.info print_yellow("HISAT2 index path: ") + print_green(params.hisat2_index) -log.info print_yellow("bowtie/tophat index path: ") + print_green(params.bowtie2_index) -log.info print_yellow("GENCODE annotation location: ") + print_green(params.gencode_annotation_gtf) -log.info print_yellow("lncipedia annotation location: ") + print_green(params.lncipedia_gtf) -log.info print_yellow("=====================================") -log.info "\n" - -// run information of system file -//automatic set optimize resource for analysis based on current system resources -ava_mem = (double) (Runtime.getRuntime().freeMemory()) -ava_cpu = Runtime.getRuntime().availableProcessors() -if (params.cpu != null && ava_cpu > params.cpu) { - ava_cpu = params.cpu -} else if (params.cpu != null && ava_cpu < params.cpu) { - print print_yellow("Exceeding the max available processors, \n use default parameter to run pipe. ") -} -if (params.mem != null && ava_mem > params.mem) { - ava_mem = params.mem -} else if (params.mem != null && ava_mem < params.mem) { - print print_yellow("Exceeding the max available memory, \n use default parameter to run pipe. ") -} -// set individual cpu for fork run -idv_cpu = 40 -int fork_number = ava_cpu / idv_cpu -if (fork_number < 1) { - fork_number = 1 -} - -// read file -fasta_ref = file(params.fasta_ref) -if (!fasta_ref.exists()) exit 1, "Reference genome not found: ${params.fasta_ref}" -if(params.aligner=='star'){ - star_index = file(params.star_index) - if (!star_index.exists()) exit 1, "STAR index not found: ${params.star_index}" -}else if(params.aligner =='hisat'){ - hisat2_index = Channel.fromPath("${params.hisat2_index}*") - .ifEmpty { exit 1, "HISAT2 index not found: ${params.hisat2_index}" } -}else if(params.aligner =='tophat'){ - bowtie2_index = Channel.fromPath("${params.bowtie2_index}*") - .ifEmpty { exit 1, "bowtie2 index for tophat not found: ${params.bowtie2_index}" } -} - -input_folder = file(params.input_folder) - - -/* -*Step 1: Prepare Annotations - */ - -println print_purple("Combining known annotations from GTFs") -if (params.species=="human") { - gencode_annotation_gtf = file(params.gencode_annotation_gtf) - if (!gencode_annotation_gtf.exists()) exit 1, "GENCODE annotation file not found: ${params.gencode_annotation_gtf}" - lncipedia_gtf = file(params.lncipedia_gtf) - if (!lncipedia_gtf.exists()) exit 1, "lncipedia annotation file not found: ${params.lncipedia_gtf}" -//Prepare annotations - annotation_channel = Channel.from(gencode_annotation_gtf, lncipedia_gtf) - annotation_channel.collectFile { file -> ['lncRNA.gtflist', file.name + '\n'] } - .set { LncRNA_gtflist } - process combine_public_annotation { - storeDir { params.out_folder + "/Combined_annotations" } - input: - file lncRNA_gtflistfile from LncRNA_gtflist - file gencode_annotation_gtf - file lncipedia_gtf - output: - file "gencode_protein_coding.gtf" into proteinCodingGTF, proteinCodingGTF_forClass - file "known.lncRNA.gtf" into KnownLncRNAgtf - file "*_mod.gtf" into mod_file_for_rename - - shell: - cufflinks_threads = ava_cpu- 1 - - - if(params.aligner=='hisat'){//fix the gtf format required by hisat - ''' - set -o pipefail - touch filenames.txt - for file in *.gtf - do - perl -lpe 's/ ([^"]\\S+) ;/ "$1" ;/g' $file > ${file}_mod.gtf - echo ${file}_mod.gtf >>filenames.txt - - done - - stringtie --merge -o merged_lncRNA.gtf filenames.txt - cat !{gencode_annotation_gtf}_mod.gtf |grep "protein_coding" > gencode_protein_coding.gtf - gffcompare -r gencode_protein_coding.gtf -p !{cufflinks_threads} merged_lncRNA.gtf - awk '$3 =="u"||$3=="x"{print $5}' gffcmp.merged_lncRNA.gtf.tmap |sort|uniq|perl !{baseDir}/bin/extract_gtf_by_name.pl merged_lncRNA.gtf - > merged.filter.gtf - mv merged.filter.gtf known.lncRNA.gtf - - ''' - }else { - - ''' - set -o pipefail - cuffmerge -o merged_lncRNA !{lncRNA_gtflistfile} - cat !{gencode_annotation_gtf} |grep "protein_coding" > gencode_protein_coding.gtf - cuffcompare -o merged_lncRNA -r gencode_protein_coding.gtf -p !{cufflinks_threads} merged_lncRNA/merged.gtf - awk '$3 =="u"||$3=="x"{print $5}' merged_lncRNA/merged_lncRNA.merged.gtf.tmap |sort|uniq|perl !{baseDir}/bin/extract_gtf_by_name.pl merged_lncRNA/merged.gtf - > merged.filter.gtf - mv merged.filter.gtf known.lncRNA.gtf - - ''' - } - } -} -else {// for mouse or other species, user should provide known_protein_coding and known_lncRNA GTF file for analysis - - KnownLncRNAgtf=file(params.known_lncRNA_gtf) - if (!KnownLncRNAgtf.exists()) exit 1, print_red("In non-human mode, known lncRNA GTF annotation file not found: ${params.known_lncRNA_gtf}") - known_coding_gtf=file(params.known_coding_gtf) - if (!known_coding_gtf.exists()) exit 1, print_red("In non-human mode, known protein coding GTF annotation file not found: ${params.known_coding_gtf}") - gencode_annotation_gtf = file(params.gencode_annotation_gtf) - if (!gencode_annotation_gtf.exists()) exit 1, print_red("GENCODE annotation file not found: ${params.gencode_annotation_gtf}") - gencode_annotation_gtf.into{proteinCodingGTF; proteinCodingGTF_forClass} - knownLncRNAgtf.set{knownLncRNAgtf} - -} - - -// whether the merged gtf have already produced. -if (!params.merged_gtf) { - /* - * Step 2: Build read aligner (STAR/tophat/HISAT2) index, if not provided - */ - //star_index if not exist - /*if (params.aligner == 'star' && params.star_index == false && fasta_ref) { - process Make_STARindex { - tag fasta_ref - - storeDir { params.out_folder + "/STARIndex" } - - input: - file fasta_ref from fasta_ref - file gencode_annotation_gtf - - output: - file "star_index" into star_index - - shell: - star_threads = ava_cpu- 1 - """ - mkdir star_index - STAR \ - --runMode genomeGenerate \ - --runThreadN ${star_threads} \ - --sjdbGTFfile $gencode_annotation_gtf \ - --sjdbOverhang 149 \ - --genomeDir star_index/ \ - --genomeFastaFiles $fasta_ref - """ - } - } else if (params.aligner == 'star' && params.star_index == false && !fasta_ref) { - println print_red("No reference fasta sequence loaded! please specify ") + print_red("--fasta_ref") + print_red(" with reference.") - - } else if (params.aligner == 'tophat' && params.bowtie2_index == false && !fasta_ref) { - process Make_bowtie2_index { - - tag fasta_ref - storeDir { params.out_folder + "/bowtie2Index" } - - input: - file fasta_ref from fasta_ref - - output: - file "genome_bt2.*" into bowtie2_index - - shell: - """ - bowtie2-build !{fasta_ref} genome_bt2 - """ - } - } else if (params.aligner == 'tophat' && !fasta_ref) { - println print_red("No reference fasta equence loaded! please specify ") + print_red("--fasta_ref") + print_red(" with reference.") - } else if (params.aligner == 'hisat' && !fasta_ref) { - process Make_hisat_index { - - tag fasta_ref - - storeDir { params.out_folder + "/hisatIndex" } - - input: - file fasta_ref from fasta_ref - file gencode_annotation_gtf - - output: - file "genome_ht2.*" into hisat2_index - - shell: - hisat2_index_threads = ava_cpu- 1 - """ - #for human genome it will take more than 160GB memory and take really long time (6 more hours), thus we recommand to down pre-build genome from hisat website - extract_splice_sites.py !{gencode_annotation_gtf} >genome_ht2.ss - extract_exons.py !{gencode_annotation_gtf} > genome_ht2.exon - hisat2-build -p !{hisat2_index_threads} --ss genome_ht2.ss --exo genome_ht2.exon !{fasta_ref} genome_ht2 - """ - } - } else if (params.aligner == 'tophat' && params.hisat_index == false && !fasta_ref) { - println print_red("No reference fasta sequence loaded! please specify ") + print_red("--fasta_ref") + print_red(" with reference.") - }*/ - - println print_purple("Analysis from fastq file") - //Match the pairs on two channels - - reads = params.input_folder + params.fastq_ext - - /* - * Step 3: QC (FastQC/AfterQC/Fastp) of raw reads - */ - println print_purple("Perform quality control of raw fastq files ") - if (params.qctools == 'fastqc') { - Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) - .ifEmpty { - exit 1, print_red("Cannot find any reads matching: ${reads}\nNB: Path needs to be enclosed in quotes!\n") - } - .into { reads_for_fastqc; readPairs_for_discovery;readPairs_for_kallisto} - process Run_fastQC { - tag { fastq_tag } - label 'qc' - - publishDir pattern: "*.html", - path: { params.out_folder + "/Result/QC" }, mode: 'copy', overwrite: true - - input: - set val(samplename), file(fastq_file) from reads_for_fastqc - - output: - file "*.html" into fastqc_for_waiting - shell: - fastq_tag = samplename - fastq_threads = idv_cpu - 1 - ''' - fastqc -t !{fastq_threads} !{fastq_file[0]} !{fastq_file[1]} - ''' - } - } - else if (params.qctools == 'afterqc'){ - Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) - .ifEmpty { - exit 1, print_red("Cannot find any reads matching: ${reads}\nPlz check your fasta_ref string in nextflow.config file \n") - }.set { reads_for_fastqc} - process Run_afterQC { - - tag { fastq_tag } - label 'qc' - publishDir pattern: "QC/*.html", - path: { params.out_folder + "/Result/QC" }, mode: 'copy', overwrite: true - - input: - set val(samplename), file(fastq_file) from reads_for_fastqc - - output: - file "QC/*.html" into fastqc_for_waiting - set val(fastq_tag), file('*.good.fq.gz') into readPairs_for_discovery,readPairs_for_kallisto - shell: - fastq_tag = samplename - fastq_threads = idv_cpu - 1 - if (params.singleEnd) { - ''' - after.py -z -1 !{fastq_file[0]} -g ./ - ''' - } else { - ''' - after.py -z -1 !{fastq_file[0]} -2 !{fastq_file[1]} -g ./ - ''' - } - } - } - else if (params.qctools == 'fastp'){ - Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) - .ifEmpty { - exit 1, print_red("Cannot find any reads matching: ${reads}\nPlz check your fasta_ref string in nextflow.config file \n") - } - .set { reads_for_fastqc} - process Run_FastP { - - tag { fastq_tag } - label 'qc' - - publishDir pattern: "*.html", - path: { params.out_folder + "/Result/QC" }, mode: 'copy', overwrite: true - - input: - set val(samplename), file(fastq_file) from reads_for_fastqc - - output: - file "*.html" into fastqc_for_waiting - set val(fastq_tag), file('*qc.fq.gz') into readPairs_for_discovery,readPairs_for_kallisto - shell: - fastq_tag = samplename - fastq_threads = idv_cpu - 1 - if (params.singleEnd) { - ''' - fastp -i !{fastq_file[0]} -o !{samplename}.qc.gz -h !{samplename}_fastp.html - - ''' - } else { - ''' - fastp -i !{fastq_file[0]} -I !{fastq_file[1]} -o !{samplename}_1.qc.fq.gz -O !{samplename}_2.qc.fq.gz -h !{samplename}_fastp.html - ''' - } - } - }else{ - Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) - .ifEmpty { - exit 1, print_red("Cannot find any reads matching: ${reads}\nPlz check your fasta_ref string in nextflow.config file \n") - } - .into{readPairs_for_discovery; readPairs_for_kallisto;fastqc_for_waiting} - } - fastqc_for_waiting = fastqc_for_waiting.first() - - /* - * Step 4: Initialize read alignment (STAR/HISAT2/tophat) - */ - if (params.aligner == 'star') { - process fastq_star_alignment_For_discovery { - - tag { file_tag } - - publishDir pattern: "", - path: { params.out_folder + "/Result/Star_alignment" }, mode: 'copy', overwrite: true - - input: - set val(samplename), file(pair) from readPairs_for_discovery - file tempfiles from fastqc_for_waiting // just for waiting - file fasta_ref - file star_index - - output: - set val(file_tag_new), file("${file_tag_new}Aligned.sortedByCoord.out.bam") into mappedReads,forHtseqMappedReads - file "${file_tag_new}Log.final.out" into alignment_logs - shell: - println print_purple("Start mapping with STAR aligner " + samplename) - file_tag = samplename - file_tag_new = file_tag - star_threads = ava_cpu - 1 - - if (params.singleEnd) { - println print_purple("Initial reads mapping of " + samplename + " performed by STAR in single-end mode") - """ - STAR --runThreadN !{star_threads} \ - --twopassMode Basic \ - --genomeDir !{star_index} \ - --readFilesIn !{pair} \ - --readFilesCommand zcat \ - --outSAMtype BAM SortedByCoordinate \ - --chimSegmentMin 20 \ - --outFilterIntronMotifs RemoveNoncanonical \ - --outFilterMultimapNmax 20 \ - --alignIntronMin 20 \ - --alignIntronMax 1000000 \ - --alignMatesGapMax 1000000 \ - --outFilterType BySJout \ - --alignSJoverhangMin 8 \ - --alignSJDBoverhangMin 1 \ - --outFileNamePrefix !{file_tag_new} - """ - } else { - println print_purple("Initial reads mapping of " + samplename + " performed by STAR in paired-end mode") - ''' - STAR --runThreadN !{star_threads} \ - --twopassMode Basic --genomeDir !{star_index} \ - --readFilesIn !{pair[0]} !{pair[1]} \ - --readFilesCommand zcat \ - --outSAMtype BAM SortedByCoordinate \ - --chimSegmentMin 20 \ - --outFilterIntronMotifs RemoveNoncanonical \ - --outFilterMultimapNmax 20 \ - --alignIntronMin 20 \ - --alignIntronMax 1000000 \ - --alignMatesGapMax 1000000 \ - --outFilterType BySJout \ - --alignSJoverhangMin 8 \ - --alignSJDBoverhangMin 1 \ - --outFileNamePrefix !{file_tag_new} - ''' - } - } - } - else if (params.aligner == 'tophat') - { - process fastq_tophat_alignment_For_discovery { - - tag { file_tag } - - publishDir pattern: "", - path: { params.out_folder + "/Result/tophat_alignment" }, mode: 'copy', overwrite: true - - input: - set val(samplename), file(pair) from readPairs_for_discovery - file tempfiles from fastqc_for_waiting // just for waiting - file fasta_ref - file bowtie2_index from bowtie2_index.collect() - file gtf from gencode_annotation_gtf - - output: - set val(samplename),file("${file_tag_new}_thout/accepted.bam") into mappedReads,forHtseqMappedReads - file "${file_tag_new}_thout/Alignment_summary.txt" into alignment_logs - //align_summary.txt as log file - shell: - println print_purple("Start mapping with tophat2 aligner " + samplename) - file_tag = samplename - file_tag_new = file_tag - tophat_threads = ava_cpu- 1 - index_base = bowtie2_index[0].toString() - ~/.\d.bt2/ - strand_str="fr-firststrand" - if(unstrand){ - strand_str="fr-unstranded" - } - if (params.singleEnd) { - println print_purple("Initial reads mapping of " + samplename + " performed by Tophat in single-end mode") - ''' - tophat -p !{tophat_threads} -G !{gtf} -–no-novel-juncs -o !{samplename}_thout --library-type !{strand_str} !{index_base} !{pair} - - ''' - } else { - println print_purple("Initial reads mapping of " + samplename + " performed by Tophat in paired-end mode") - ''' - tophat -p !{tophat_threads} -G !{gtf} -–no-novel-juncs -o !{samplename}_thout --library-type !{strand_str} !{index_base} !{pair[0]} !{pair[1]} - ''' - } - } - } - else if (params.aligner == 'hisat') { - process fastq_hisat2_alignment_For_discovery { - - tag { file_tag } - label 'para' - publishDir pattern: "", - path: { params.out_folder + "/Result/hisat_alignment" }, mode: 'copy', overwrite: true - - input: - set val(samplename), file(pair) from readPairs_for_discovery - file tempfiles from fastqc_for_waiting // just for waiting - file fasta_ref - file hisat2_id from hisat2_index.collect() - - output: - set val(file_tag_new),file("${file_tag_new}.sort.bam") into hisat_mappedReads,forHtseqMappedReads - file "${file_tag_new}.hisat2_summary.txt" into alignment_logs - //align_summary.txt as log file - shell: - println print_purple("Start mapping with hisat2 aligner " + samplename) - file_tag = samplename - file_tag_new = file_tag - hisat2_threads = ava_cpu- 2 - index_base = hisat2_id[0].toString() - ~/.\d.ht2/ - - if(unstrand){ - if (params.singleEnd) { - println print_purple("Initial reads mapping of " + samplename + " performed by hisat2 in single-end mode") - ''' - mkdir tmp - hisat2 -p !{hisat2_threads} --dta -x !{index_base} -U !{pair} -S !{file_tag_new}.sam 2>!{file_tag_new}.hisat2_summary.txt - sambamba view -S -f bam -t !{hisat2_threads} !{file_tag_new}.sam -o temp.bam - sambamba sort -o !{file_tag_new}.sort.bam --tmpdir ./tmp -t !{hisat2_threads} temp.bam - rm !{file_tag_new}.sam - rm temp.bam - - ''' - } else { - println print_purple("Initial reads mapping of " + samplename + " performed by hisat2 in paired-end mode") - ''' - mkdir tmp - hisat2 -p !{hisat2_threads} --dta -x !{index_base} -1 !{pair[0]} -2 !{pair[1]} -S !{file_tag_new}.sam 2> !{file_tag_new}.hisat2_summary.txt - sambamba view -S -f bam -t !{hisat2_threads} !{file_tag_new}.sam -o temp.bam - sambamba sort -o !{file_tag_new}.sort.bam --tmpdir ./tmp -t !{hisat2_threads} temp.bam - rm !{file_tag_new}.sam - ''' - } - }else { - if (params.singleEnd) { - println print_purple("Initial reads mapping of " + samplename + " performed by hisat2 in single-end mode") - ''' - mkdir tmp - hisat2 -p !{hisat2_threads} --dta --rna-strandness !{params.hisat_strand} -x !{index_base} -U !{pair} -S !{file_tag_new}.sam 2>!{file_tag_new}.hisat2_summary.txt - sambamba view -S -f bam -t !{hisat2_threads} !{file_tag_new}.sam -o temp.bam - sambamba sort -o !{file_tag_new}.sort.bam --tmpdir ./tmp -t !{hisat2_threads} temp.bam - rm !{file_tag_new}.sam - rm temp.bam - - ''' - } else { - println print_purple("Initial reads mapping of " + samplename + " performed by hisat2 in paired-end mode") - ''' - mkdir tmp - hisat2 -p !{hisat2_threads} --dta --rna-strandness !{params.hisat_strand} -x !{index_base} -1 !{pair[0]} -2 !{pair[1]} -S !{file_tag_new}.sam 2> !{file_tag_new}.hisat2_summary.txt - sambamba view -S -f bam -t !{hisat2_threads} !{file_tag_new}.sam -o temp.bam - sambamba sort -o !{file_tag_new}.sort.bam --tmpdir ./tmp -t !{hisat2_threads} temp.bam - rm !{file_tag_new}.sam - ''' - } - } - } - } - - /* - * Step 5: Transcript assembly using Stringtie - */ - if(params.aligner == 'hisat'){ - process StringTie_assembly { - - tag { file_tag } - - input: - set val(samplename),file(alignment_bam) from hisat_mappedReads - file fasta_ref - file gencode_annotation_gtf - - output: - - file "stringtie_${file_tag_new}_transcripts.gtf" into stringTieoutgtf, StringTieOutGtf_fn - - shell: - file_tag = samplename - file_tag_new = file_tag - stringtie_threads = ava_cpu- 2 - - if(unstrand){ - ''' - #run stringtie - stringtie -p !{stringtie_threads} -G !{gencode_annotation_gtf} -l stringtie_!{file_tag_new} -o stringtie_!{file_tag_new}_transcripts.gtf !{alignment_bam} - ''' - }else{ - ''' - #run stringtie - stringtie -p !{stringtie_threads} -G !{gencode_annotation_gtf} --rf -l stringtie_!{file_tag_new} -o stringtie_!{file_tag_new}_transcripts.gtf !{alignment_bam} - ''' - } - - } -// Create a file 'gtf_filenames' containing the filenames of each post processes cufflinks gtf - stringTieoutgtf.collectFile { file -> ['gtf_filenames.txt', file.name + '\n'] } - .set { GTFfilenames } - /* - * Step 6: Merged GTFs into one - */ - process StringTie_merge_assembled_gtf { - - tag { file_tag } - label 'para' - publishDir pattern: "merged.gtf", - path: { params.out_folder + "/Result/Merged_assemblies" }, mode: 'copy', overwrite: true - - input: - file gtf_filenames from GTFfilenames - file cufflinksgtf_file from StringTieOutGtf_fn.toList() // not used but just send the file in current running folder - file fasta_ref - - - output: - file "merged.gtf" into mergeTranscripts_forCompare, mergeTranscripts_forExtract, mergeTranscripts_forCodeingProtential - shell: - - stringtie_threads = ava_cpu- 1 - - ''' - stringtie --merge -p !{stringtie_threads} -o merged.gtf !{gtf_filenames} - - - ''' - } - } - else{ - process Cufflinks_assembly { - - tag { file_tag } - - input: - set val(file_tag), file(alignment_bam) from mappedReads - file fasta_ref - file gencode_annotation_gtf - - output: - - file "Cufout_${file_tag_new}_transcripts.gtf" into cuflinksoutgtf, cuflinksoutgtf_fn - - shell: - file_tag_new = file_tag - cufflinks_threads = ava_cpu- 1 - strand_str="fr-firststrand" - if(unstrand){ - strand_str="fr-unstranded" - } - if (params.aligner == 'tophat') { - ''' - #run cufflinks - - cufflinks -g !{gencode_annotation_gtf} \ - -b !{fasta_ref} \ - --library-type !{strand_str}\ - --max-multiread-fraction 0.25 \ - --3-overhang-tolerance 2000 \ - -o Cufout_!{file_tag_new} \ - -p !{cufflinks_threads} !{alignment_bam} - - mv Cufout_!{file_tag_new}/transcripts.gtf Cufout_!{file_tag_new}_transcripts.gtf - ''' - - } else if (params.aligner == 'star') { - ''' - #run cufflinks - - cufflinks -g !{gencode_annotation_gtf} \ - -b !{fasta_ref} \ - --library-type !{strand_str} \ - --max-multiread-fraction 0.25 \ - --3-overhang-tolerance 2000 \ - -o Cufout_!{file_tag_new} \ - -p !{cufflinks_threads} !{alignment_bam} - - mv Cufout_!{file_tag_new}/transcripts.gtf Cufout_!{file_tag_new}_transcripts.gtf - ''' - - } - - - } - -// Create a file 'gtf_filenames' containing the filenames of each post processes cufflinks gtf - - cuflinksoutgtf.collectFile { file -> ['gtf_filenames.txt', file.name + '\n'] } - .set { GTFfilenames } - - /* - * Step 6: Merged GTFs into one - */ - process cuffmerge_assembled_gtf { - - tag { file_tag } - label 'para' - publishDir pattern: "CUFFMERGE/merged.gtf", - path: { params.out_folder + "/Result/All_assemblies" }, mode: 'copy', overwrite: true - - input: - file gtf_filenames from GTFfilenames - file cufflinksgtf_file from cuflinksoutgtf_fn.toList() // not used but just send the file in current running folder - - file fasta_ref - - - output: - file "CUFFMERGE/merged.gtf" into mergeTranscripts_forCompare, mergeTranscripts_forExtract, mergeTranscripts_forCodeingProtential - shell: - - cufflinks_threads = ava_cpu- 1 - - ''' - mkdir CUFFMERGE - cuffmerge -o CUFFMERGE \ - -s !{fasta_ref} \ - -p !{cufflinks_threads} \ - !{gtf_filenames} - - ''' - } - } - - -} -else { - println print_yellow("Raw reads quality check step was skipped due to provided ") + print_green("--merged_gtf") + print_yellow(" option\n") - println print_yellow("Reads mapping step was skipped due to provided ") + print_green("--merged_gtf") + print_yellow(" option\n") - - merged_gtf = file(params.merged_gtf) - Channel.fromPath(merged_gtf) - .ifEmpty { exit 1, "Cannot find merged gtf : ${merged_gtf}" } - .into { - mergeTranscripts_forCompare; mergeTranscripts_forExtract; mergeTranscripts_forCodeingProtential - } - - // add fastq when do quantification - reads = params.input_folder + params.fastq_ext - if (params.qctools == 'fastqc') { - Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) - .ifEmpty { - exit 1, print_red("Fastq file not found, plz check your file path : ${reads}\n") - } - .into { reads_for_fastqc; readPairs_for_discovery;readPairs_for_kallisto} - process Run_fastQC { - tag { fastq_tag } - label 'qc' - - publishDir pattern: "*.html", - path: { params.out_folder + "/Result/QC" }, mode: 'copy', overwrite: true - - input: - set val(samplename), file(fastq_file) from reads_for_fastqc - - output: - file "*.html" into fastqc_for_waiting - shell: - fastq_tag = samplename - fastq_threads = idv_cpu - 1 - ''' - fastqc -t !{fastq_threads} !{fastq_file[0]} !{fastq_file[1]} - ''' - } - } - else if (params.qctools == 'afterqc'){ - Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) - .ifEmpty { - exit 1, print_red("Fastq file not found : ${reads}\nPlz check your fasta_ref string in nextflow.config file \n") - } - .set { reads_for_fastqc} - process Run_afterQC { - - tag { fastq_tag } - label 'qc' - - publishDir pattern: "QC/*.html", - path: { params.out_folder + "/Result/QC" }, mode: 'copy', overwrite: true - - input: - set val(samplename), file(fastq_file) from reads_for_fastqc - - output: - file "QC/*.html" into fastqc_for_waiting - set val(fastq_tag), file('*.good.fq.gz') into readPairs_for_discovery,readPairs_for_kallisto - shell: - fastq_tag = samplename - fastq_threads = idv_cpu - 1 - if (params.singleEnd) { - ''' - after.py -z -1 !{fastq_file[0]} -g ./ - ''' - } else { - ''' - after.py -z -1 !{fastq_file[0]} -2 !{fastq_file[1]} -g ./ - ''' - } - } - } - else if (params.qctools == 'fastp'){ - Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) - .ifEmpty { - exit 1, print_red("Fastq file not found : ${reads}\nPlz check your fasta_ref string in nextflow.config file \n") - } - .set { reads_for_fastqc} - process Run_FastP { - - tag { fastq_tag } - label 'qc' - - publishDir pattern: "*.html", - path: { params.out_folder + "/Result/QC" }, mode: 'copy', overwrite: true - - input: - set val(samplename), file(fastq_file) from reads_for_fastqc - - output: - file "*.html" into fastqc_for_waiting - set val(fastq_tag), file('*qc.fq.gz') into readPairs_for_discovery,readPairs_for_kallisto - shell: - fastq_tag = samplename - fastq_threads = idv_cpu - 1 - if (params.singleEnd) { - ''' - fastp -i !{fastq_file[0]} -o !{samplename}.qc.gz -h !{samplename}_fastp.html - - ''' - } else { - ''' - fastp -i !{fastq_file[0]} -I !{fastq_file[1]} -o !{samplename}_1.qc.fq.gz -O !{samplename}_2.qc.fq.gz -h !{samplename}_fastp.html - ''' - } - } - } - else{ - Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) - .ifEmpty { - exit 1, print_red("Cannot find any reads matching: ${reads}\nPlz check your fasta_ref string in nextflow.config file \n") - } - .into{readPairs_for_discovery; readPairs_for_kallisto;fastqc_for_waiting} - } - fastqc_for_waiting2 = fastqc_for_waiting.first() - -} - -/* -*Step 7: Compare assembled gtf with known annotations (GENCODE) -*/ - process Merge_assembled_gtf_with_GENCODE { - - tag { file_tag } - input: - file mergeGtfFile from mergeTranscripts_forCompare - file gencode_annotation_gtf - - output: - file "merged_lncRNA.merged.gtf.tmap" into comparedGTF_tmap - shell: - - gffcompare_threads = ava_cpu- 1 - ''' - #!/bin/sh - gffcompare -r !{gencode_annotation_gtf} -p !{gffcompare_threads} !{mergeGtfFile} -o merged_lncRNA - ''' - } - - - -/* -*Step 8: Filter GTFs to distinguish novel lncRNAs -*/ -process Identify_novel_lncRNA_with_criterions { - - input: - file comparedTmap from comparedGTF_tmap - file fasta_ref - file mergedGTF from mergeTranscripts_forExtract - - output: - file "novel.gtf.tmap" into noveltmap - file "novel.longRNA.fa" into novelLncRnaFasta - file "novel.longRNA.exoncount.txt" into novelLncRnaExonCount - - shell: - ''' - # filtering novel lncRNA based on cuffmerged trascripts - set -o pipefail - awk '$3 =="x"||$3=="u"||$3=="i"{print $0}' !{comparedTmap} > novel.gtf.tmap - # excluding length smaller than 200 nt - awk '$10 >200{print}' novel.gtf.tmap > novel.longRNA.gtf.tmap - # extract gtf - awk '{print $5}' novel.longRNA.gtf.tmap |perl !{baseDir}/bin/extract_gtf_by_name.pl !{mergedGTF} - >novel.longRNA.gtf - awk '{if($3=="exon"){print $0}}' novel.longRNA.gtf > novel.longRNA.format.gtf - perl !{baseDir}/bin/get_exoncount.pl novel.longRNA.format.gtf > novel.longRNA.exoncount.txt - # gtf2gff3 - #check whether required - # get fasta from gtf - gffread novel.longRNA.gtf -g !{fasta_ref} -w novel.longRNA.fa -W - ''' -} - -/* -*Step 9: Predict coding potential abilities using CPAT and PLEK (CNCI functionality coming soon!) -*/ -novelLncRnaFasta.into { novelLncRnaFasta_for_PLEK; novelLncRnaFasta_for_CPAT; } - -process Predict_coding_abilities_by_PLEK { - - // as PLEK can not return valid exit status even run smoothly, we manually set the exit status into 0 to promote analysis - validExitStatus 0, 1, 2 - input: - file novel_lncRNA_fasta from novelLncRnaFasta_for_PLEK - output: - file "novel.longRNA.PLEK.out" into novel_longRNA_PLEK_result - shell: - plek_threads = ava_cpu- 1 - ''' - PLEK.py -fasta !{novel_lncRNA_fasta} \ - -out novel.longRNA.PLEK.out \ - -thread !{plek_threads} - exit 0 - ''' - -} -process Predict_coding_abilities_by_CPAT { - input: - file novel_lncRNA_fasta from novelLncRnaFasta_for_CPAT - output: - file "novel.longRNA.CPAT.out" into novel_longRNA_CPAT_result - shell: - if(params.species=="human"){ - ''' - cpat.py -g !{novel_lncRNA_fasta} \ - -x !{params.cpatpath}/dat/Human_Hexamer.tsv \ - -d !{params.cpatpath}/dat/Human_logitModel.RData \ - -o novel.longRNA.CPAT.out - ''' - }else if (params.species=="mouse"){ - ''' - cpat.py -g !{novel_lncRNA_fasta} \ - -x !{params.cpatpath}/dat/Mouse_Hexamer.tsv \ - -d !{params.cpatpath}/dat/Mouse_logitModel.RData \ - -o novel.longRNA.CPAT.out - ''' - - }else if (params.species=="zebrafish"){ - ''' - cpat.py -g !{novel_lncRNA_fasta} \ - -x !{params.cpatpath}/dat/zebrafish_Hexamer.tsv \ - -d !{params.cpatpath}/dat/zebrafish_logitModel.RData \ - -o novel.longRNA.CPAT.out - ''' - }else { - ''' - cpat.py -g !{novel_lncRNA_fasta} \ - -x !{params.cpatpath}/dat/fly_Hexamer.tsv \ - -d !{params.cpatpath}/dat/fly_logitModel.RData \ - -o novel.longRNA.CPAT.out - ''' - } - -} - - -/* -*Step 9: Merged and filter lncRNAs based on coding potential (CPAT/PLEK) -*/ -process Filter_lncRNA_by_coding_potential_result { - input: - file novel_longRNA_PLEK_ from novel_longRNA_PLEK_result - file novel_longRNA_CPAT_ from novel_longRNA_CPAT_result - file longRNA_novel_exoncount from novelLncRnaExonCount - file cuffmergegtf from mergeTranscripts_forCodeingProtential - file gencode_annotation_gtf - file fasta_ref - - output: - file "novel.longRNA.stringent.gtf" into Novel_longRNA_stringent_gtf // not used - file "novel.lncRNA.stringent.gtf" into novel_lncRNA_stringent_gtf - file "novel.TUCP.stringent.gtf" into novel_TUCP_stringent_gtf // not used - - shell: - ''' - set -o pipefail - #merged transcripts - perl !{baseDir}/bin/integrate_novel_transcripts.pl > novel.longRNA.txt - awk '$4 >1{print $1}' novel.longRNA.txt|perl !{baseDir}/bin/extract_gtf_by_name.pl !{cuffmergegtf} - > novel.longRNA.stringent.gtf - # retain lncRNA only by coding ability - awk '$4 >1&&$5=="lncRNA"{print $1}' novel.longRNA.txt|perl !{baseDir}/bin/extract_gtf_by_name.pl !{cuffmergegtf} - > novel.lncRNA.stringent.gtf - awk '$4 >1&&$5=="TUCP"{print $1}' novel.longRNA.txt|perl !{baseDir}/bin/extract_gtf_by_name.pl !{cuffmergegtf} - > novel.TUCP.stringent.gtf - ''' -} - -/* -*Step 10: Further filtered lncRNAs with known criterion -*/ -process Summary_renaming_and_classification { - publishDir "${params.out_folder}/Result/Identified_lncRNA", mode: 'copy' - - - input: - file knowlncRNAgtf from KnownLncRNAgtf - file gencode_protein_coding_gtf from proteinCodingGTF - file novel_lncRNA_stringent_Gtf from novel_lncRNA_stringent_gtf - file fasta_ref - file mod_file_for_rename - - output: -// file "lncRNA.final.v2.gtf" into finalLncRNA_gtf -// file "lncRNA.final.v2.map" into finalLncRNA_map - file "protein_coding.final.gtf" into final_protein_coding_gtf - file "all_lncRNA_for_classifier.gtf" into finalLncRNA_for_class_gtf - file "final_all.gtf" into finalGTF_for_quantification_gtf, finalGTF_for_annotate_gtf - file "final_all.fa" into finalFasta_for_quantification_gtf - file "protein_coding.fa" into final_coding_gene_for_CPAT_fa - file "lncRNA.fa" into final_lncRNA_for_CPAT_fa - file "lncRNA_classification.txt" into lncRNA_classification - file "lncRNA.mapping.file" into rename_mapping_file - //file "lncRNA.final.CPAT.out" into lncRNA_CPAT_statistic - //file "protein_coding.final.CPAT.out" into protein_coding_CPAT_statistic - - shell: - - cufflinks_threads = ava_cpu- 1 - - ''' - set -o pipefail - gffcompare -G -o filter \ - -r !{knowlncRNAgtf} \ - -p !{cufflinks_threads} !{novel_lncRNA_stringent_Gtf} - awk '$3 =="u"||$3=="x"{print $5}' filter.novel.lncRNA.stringent.gtf.tmap |sort|uniq| \ - perl !{baseDir}/bin/extract_gtf_by_name.pl !{novel_lncRNA_stringent_Gtf} - > novel.lncRNA.stringent.filter.gtf - - #rename lncRNAs according to neighbouring protein coding genes - awk '$3 =="gene"{print }' !{gencode_protein_coding_gtf} | perl -F'\\t' -lane '$F[8]=~/gene_id "(.*?)";/ && print join qq{\\t},@F[0,3,4],$1,@F[5,6,1,2,7,8,9]' - | \ - sort-bed - > gencode.protein_coding.gene.bed - gtf2bed < novel.lncRNA.stringent.filter.gtf |sort-bed - > novel.lncRNA.stringent.filter.bed - gtf2bed < !{knowlncRNAgtf} |sort-bed - > known.lncRNA.bed - perl !{baseDir}/bin/rename_lncRNA_2.pl - # mv lncRNA.final.v2.gtf all_lncRNA_for_classifier.gtf - grep -v NA-1-1 lncRNA.final.v2.gtf > all_lncRNA_for_classifier.gtf - perl !{baseDir}/bin/rename_proteincoding.pl !{gencode_protein_coding_gtf}> protein_coding.final.gtf - cat all_lncRNA_for_classifier.gtf protein_coding.final.gtf > final_all.gtf - gffread final_all.gtf -g !{fasta_ref} -w final_all.fa -W - gffread all_lncRNA_for_classifier.gtf -g !{fasta_ref} -w lncRNA.fa -W - gffread protein_coding.final.gtf -g !{fasta_ref} -w protein_coding.fa -W - #classification - perl !{baseDir}/bin/lincRNA_classification.pl all_lncRNA_for_classifier.gtf !{gencode_protein_coding_gtf} lncRNA_classification.txt - - - ''' -} - -/* -*Step 11: Rerun CPAT to evaluate the results -*/ -//evaluate lncRNA -process Rerun_CPAT_to_evaluate_lncRNA { - input: - file lncRNA_final_cpat_fasta from final_lncRNA_for_CPAT_fa - output: - file "lncRNA.final.CPAT.out" into final_lncRNA_CPAT_result - shell: - - if(params.species=="human"){ - ''' - cpat.py -g !{lncRNA_final_cpat_fasta} \ - -x !{params.cpatpath}/dat/Human_Hexamer.tsv \ - -d !{params.cpatpath}/dat/Human_logitModel.RData \ - -o lncRNA.final.CPAT.out - ''' - }else if (params.species=="mouse"){ - ''' - cpat.py -g !{lncRNA_final_cpat_fasta} \ - -x !{params.cpatpath}/dat/Mouse_Hexamer.tsv \ - -d !{params.cpatpath}/dat/Mouse_logitModel.RData \ - -o lncRNA.final.CPAT.out - ''' - - }else if (params.species=="zebrafish"){ - ''' - cpat.py -g !{lncRNA_final_cpat_fasta} \ - -x !{params.cpatpath}/dat/zebrafish_Hexamer.tsv \ - -d !{params.cpatpath}/dat/zebrafish_logitModel.RData \ - -o lncRNA.final.CPAT.out - ''' - }else { - ''' - cpat.py -g !{lncRNA_final_cpat_fasta} \ - -x !{params.cpatpath}/dat/fly_Hexamer.tsv \ - -d !{params.cpatpath}/dat/fly_logitModel.RData \ - -o lncRNA.final.CPAT.out - ''' - } -} -//evaluate coding -process Rerun_CPAT_to_evaluate_coding { - input: - file final_coding_gene_for_CPAT from final_coding_gene_for_CPAT_fa - output: - file "protein_coding.final.CPAT.out" into final_coding_gene_CPAT_result - shell: - ''' - cpat.py -g !{final_coding_gene_for_CPAT} \ - -x !{params.cpatpath}/dat/Human_Hexamer.tsv \ - -d !{params.cpatpath}/dat/Human_logitModel.RData \ - -o protein_coding.final.CPAT.out - ''' -} -//summary result -process Secondary_basic_statistic { - - input: - file protein_coding_final_gtf from final_protein_coding_gtf - file all_lncRNA_for_classifier_gtf from finalLncRNA_for_class_gtf - file lncRNA_cds from final_lncRNA_CPAT_result - file coding_gene_cds from final_coding_gene_CPAT_result - file lncRNA_class from lncRNA_classification - output: - file "basic_charac.txt" into statistic_result - - shell: - ''' - #!/usr/bin/perl -w - #since CPAT arbitrarily transforms gene names into upper case, we apply 'uc' function to keep the genenames' consistency. - use strict; - open OUT,">basic_charac.txt" or die; - - open FH,"all_lncRNA_for_classifier.gtf" or die; - - my %class; - my %g2t; - my %trans_len; - my %exon_num; - while(){ - chomp; - my @field=split "\t"; - $_=~/gene_id "(.+?)"/; - my $gid=$1; - $_=~/transcript_id "(.+?)"/; - my $tid=uc($1); - $class{$tid}=$field[1]; - $g2t{$tid}=$gid; - my $len=$field[4]-$field[3]; - $trans_len{$tid}=(exists $trans_len{$tid})?$trans_len{$tid}+$len:$len; - $exon_num{$tid}=(exists $exon_num{$tid})?$exon_num{$tid}+1:1; - } - open FH,"protein_coding.final.gtf" or die; - - while(){ - chomp; - my @field=split "\t"; - $_=~/gene_id "(.+?)"/; - my $gid=uc($1); - $_=~/transcript_id "(.+?)"/; - my $tid=$1; - $class{$tid}="protein_coding"; - $g2t{$tid}=$gid; - my $len=$field[4]-$field[3]; - $trans_len{$tid}=(exists $trans_len{$tid})?$trans_len{$tid}+$len:$len; - $exon_num{$tid}=(exists $exon_num{$tid})?$exon_num{$tid}+1:1; - } - - my %lin_class; - open IN,"lncRNA_classification.txt" or die; #change the file name - while(){ - chomp; - my @data = split /\\t/,$_; - $lin_class{$data[0]} = $data[1]; - } - open FH,"lncRNA.final.CPAT.out" or die; - - ; - - while(){ - chomp; - my @field=split "\t"; - my $tid=uc($field[0]); - my $class; - if (defined($lin_class{$tid})){ - $class = $lin_class{$tid}; - }else{ - $class = 'NA'; - } - print OUT $g2t{$tid}."\t".$tid."\t".$class{$tid}."\t".$field[5]."\t".$trans_len{$tid}."\t".$exon_num{$tid}."\t".$class."\n"; - } - - open FH,"protein_coding.final.CPAT.out" or die; - - ; - - while(){ - chomp; - my @field=split "\t"; - my $tid=uc($field[0]); - my $class; - if (defined($lin_class{$tid})){ - $class = $lin_class{$tid}; - }else{ - $class = 'protein_coding'; - } - print OUT $g2t{$tid}."\t".$tid."\t".$class{$tid}."\t".$field[5]."\t".$trans_len{$tid}."\t".$exon_num{$tid}."\t".$class."\n"; - } - - ''' -} - - - -//Keep the channel as constant variable to be used several times in quantification analysis - -//The following code is designed for use if the merged_gtf have already been generated previously. -if(!params.merged_gtf){ - /* -*Step 11: Quantification step (Kallisto/Htseq) -*/ - if(params.quant=="htseq"){ - process Run_htseq_for_quantification{ - tag { file_tag } - input: - set val(samplename),file(bamfile) from forHtseqMappedReads - file final_gtf from finalGTF_for_quantification_gtf - - output: - file "${file_tag_new}.htseq.count " into htseq_tcv_collection - - shell: - - file_tag = samplename - file_tag_new = file_tag - if(params.unstrand){ - ''' - sambamba view !{bamfile} > !{samplename}.sam # resolved error caused by bam and htseq version conflicts - htseq-count -t exon -i gene_id -s no -r pos -f sam !{samplename}.sam !{final_gtf} > !{samplename}.htseq.count - rm !{samplename}.sam - ''' - }else { - ''' - sambamba view !{bamfile} > !{samplename}.sam # resolved error caused by bam and htseq version conflicts - htseq-count -t exon -i gene_id -r pos -f sam !{samplename}.sam !{final_gtf} > !{samplename}.htseq.count - rm !{samplename}.sam - ''' - } - - - - } - }else{ - process Build_kallisto_index_of_GTF_for_quantification { - - input: - file transript_fasta from finalFasta_for_quantification_gtf - - output: - file "transcripts.idx" into final_kallisto_index - - shell: - ''' - #index kallisto reference - kallisto index -i transcripts.idx !{transript_fasta} - - ''' - } - constant_kallisto_index = final_kallisto_index.first() - process Run_kallisto_for_quantification { - - - tag { file_tag } - label 'para' - - input: - file kallistoIndex from constant_kallisto_index - set val(samplename), file(pair) from readPairs_for_kallisto - - output: - file "${file_tag_new}_abundance.tsv" into kallisto_tcv_collection - - shell: - file_tag = samplename - file_tag_new = file_tag - kallisto_threads = ava_cpu- 1 - if (params.singleEnd) { - println print_purple("Quantification by kallisto in single end mode") - ''' - #quantification by kallisto in single end mode - kallisto quant -i !{kallistoIndex} -o !{file_tag_new}_kallisto -t !{kallisto_threads} -b 100 --single -l 180 -s 20 !{pair} - mv !{file_tag_new}_kallisto/abundance.tsv !{file_tag_new}_abundance.tsv - ''' - - - } else { - println print_purple("quantification by kallisto in paired end mode") - ''' - #quantification by kallisto - kallisto quant -i !{kallistoIndex} -o !{file_tag_new}_kallisto -t !{kallisto_threads} -b 100 !{pair[0]} !{pair[1]} - mv !{file_tag_new}_kallisto/abundance.tsv !{file_tag_new}_abundance.tsv - ''' - } - } - } - -}else{ - /* -*Step 11: Quantification step (Kallisto/Htseq) -*/ - if(params.quant=="htseq"){ - exit 0, print_red("htseq can not be applicable without mapping step, plz set quant tool using `kallisto`") - }else { - process Build_kallisto_index_of_GTF_for_quantification { - - - input: - file transript_fasta from finalFasta_for_quantification_gtf - - output: - file "transcripts.idx" into final_kallisto_index - - shell: - ''' - #index kallisto reference - kallisto index -i transcripts.idx !{transript_fasta} - - ''' - } - constant_kallisto_index = final_kallisto_index.first() - process Run_kallisto_for_quantification { - - - tag { file_tag } - label 'para' - - input: - file kallistoIndex from constant_kallisto_index - set val(samplename), file(pair) from readPairs_for_kallisto - file tempfiles from fastqc_for_waiting2 - output: - file "${file_tag_new}_abundance.tsv" into kallisto_tcv_collection - - shell: - file_tag = samplename - file_tag_new = file_tag - kallisto_threads = ava_cpu - 1 - if (params.singleEnd) { - println print_purple("Quantification by kallisto in single end mode") - ''' - #quantification by kallisto in single end mode - kallisto quant -i !{kallistoIndex} -o !{file_tag_new}_kallisto -t !{kallisto_threads} -b 100 --single -l 180 -s 20 !{pair} - mv !{file_tag_new}_kallisto/abundance.tsv !{file_tag_new}_abundance.tsv - - ''' - - - } else { - println print_purple("Quantification by kallisto in paired end mode") - ''' - #quantification by kallisto - kallisto quant -i !{kallistoIndex} -o !{file_tag_new}_kallisto -t !{kallisto_threads} -b 100 !{pair[0]} !{pair[1]} - mv !{file_tag_new}_kallisto/abundance.tsv !{file_tag_new}_abundance.tsv - ''' - } - } - } -} - - -/* -*Step 12: Generate count matrix for differential expression analysis -*/ - -if(params.quant=="htseq"){ - process Get_HTseq_matrix { - tag { file_tag } - publishDir pattern: "htseq*.txt", - path: "${params.out_folder}/Result/Quantification/", mode: 'copy' - input: - file abundance_tsv_matrix from htseq_tcv_collection.collect() - file annotated_gtf from finalGTF_for_annotate_gtf - output: - file "htseq.count.txt" into expression_matrixfile_count - - shell: - file_tag = "htseq" - ''' - perl !{baseDir}/bin/get_map_table.pl final_all.gtf > map.file - R CMD BATCH !{baseDir}/bin/get_htseq_matrix.R - ''' - } -}else{ - process Get_kallisto_matrix { - tag { file_tag } - publishDir pattern: "kallisto*.txt", - path: "${params.out_folder}/Result/Quantification/", mode: 'copy' - input: - file abundance_tsv_matrix from kallisto_tcv_collection.collect() - file annotated_gtf from finalGTF_for_annotate_gtf - output: - file "kallisto.count.txt" into expression_matrixfile_count - file "kallisto.tpm.txt" into expression_matrixfile_tpm - - shell: - file_tag = "Kallisto" - ''' - perl !{baseDir}/bin/get_map_table.pl --gtf_file=final_all.gtf > map.file - R CMD BATCH !{baseDir}/bin/get_kallisto_matrix.R - ''' - } -} - -/* -Step 13: Perform Differential Expression analysis and generate report - */ - -// Initialize parameter for lncPipeReporter -lncRep_Output = params.lncRep_Output -lncRep_theme = params.lncRep_theme -lncRep_cdf_percent = params.lncRep_cdf_percent -lncRep_max_lnc_len = params.lncRep_max_lnc_len -lncRep_min_expressed_sample = params.lncRep_min_expressed_sample -detools = params.detools -design=params.design -if(design!=null){ - design = file(params.design) - if (!design.exists()) exit 1, "Design file not found, plz check your design path: ${params.design}" - - if(!params.merged_gtf) { - process Run_LncPipeReporter { - tag { file_tag } - publishDir pattern: "*", - path: "${params.out_folder}/Result/", mode: 'move' - input: - //alignmet log - file design - file alignmetlogs from alignment_logs.collect() - //gtf statistics - file basic_charac from statistic_result - //Expression matrix - file kallisto_count_matrix from expression_matrixfile_count - - output: - file "*" into final_output - shell: - file_tag = "Generating report ..." - """ - Rscript -e "library(LncPipeReporter);run_reporter(input='.', output = 'reporter.html',output_dir='./LncPipeReports',de.method=\'${detools}\',theme = 'npg',cdf.percent = ${lncRep_cdf_percent},max.lncrna.len = ${lncRep_max_lnc_len},min.expressed.sample = ${lncRep_min_expressed_sample}, ask = FALSE)" - """ - } - }else{ - process Run_LncPipeReporter { - tag { file_tag } - publishDir pattern: "*", - path: "${params.out_folder}/Result/", mode: 'move' - input: - //alignment log - file design - //gtf statistics - file basic_charac from statistic_result - //Expression matrix - file kallisto_count_matrix from expression_matrixfile_count - - output: - file "*" into final_output - shell: - file_tag = "Generating report ..." - """ - Rscript -e "library(LncPipeReporter);run_reporter(input='.', output = 'reporter.html',output_dir='./LncPipeReports',de.method=\'${detools}\',theme = 'npg',cdf.percent = ${lncRep_cdf_percent},max.lncrna.len = ${lncRep_max_lnc_len},min.expressed.sample = ${lncRep_min_expressed_sample}, ask = FALSE)" - """ - } - } - -}else{ - if(!params.merged_gtf) { - process Run_LncPipeReporter_without_Design { - tag { file_tag } - publishDir pattern: "*", - path: "${params.out_folder}/Result/", mode: 'move' - input: - //alignmet log - file alignmetlogs from alignment_logs.collect() - //gtf statistics - file basic_charac from statistic_result - //Expression matrix - file kallisto_count_matrix from expression_matrixfile_count - - output: - file "*" into final_output - shell: - file_tag = "Generating report ..." - """ - Rscript -e "library(LncPipeReporter);run_reporter(input='.', output = 'reporter.html',output_dir='./LncPipeReports',de.method=\'${detools}\',theme = 'npg',cdf.percent = ${lncRep_cdf_percent},max.lncrna.len = ${lncRep_max_lnc_len},min.expressed.sample = ${lncRep_min_expressed_sample}, ask = FALSE)" - """ - } - }else{ - process Run_LncPipeReporter_without_Design { - tag { file_tag } - publishDir pattern: "*", - path: "${params.out_folder}/Result/", mode: 'move' - input: - //alignment log - //gtf statistics - file basic_charac from statistic_result - //Expression matrix - file kallisto_count_matrix from expression_matrixfile_count - - output: - file "*" into final_output - shell: - file_tag = "Generating report ..." - """ - Rscript -e "library(LncPipeReporter);run_reporter(input='.', output = 'reporter.html',output_dir='./LncPipeReports',de.method=\'${detools}\',theme = 'npg',cdf.percent = ${lncRep_cdf_percent},max.lncrna.len = ${lncRep_max_lnc_len},min.expressed.sample = ${lncRep_min_expressed_sample}, ask = FALSE)" - """ - } - } -} - - - -//pipeline log -if(workflow.success) { - workflow.onComplete { - - log.info print_green("LncPipe Pipeline Complete!") - - //email information - if (params.mail) { - recipient = params.mail - def subject = 'My LncPipe execution' - - ['mail', '-s', subject, recipient].execute() << - """ - - LncPipe execution summary - --------------------------- - Your command line: ${workflow.commandLine} - Completed at: ${workflow.complete} - Duration : ${workflow.duration} - Success : ${workflow.success} - workDir : ${workflow.workDir} - exit status : ${workflow.exitStatus} - Error report: ${workflow.errorReport ?: '-'} - - """ - } - - - } -} -workflow.onError { - println print_yellow("Oops... Pipeline execution stopped with the following message: ")+print_red(workflow.errorMessage) -} - +#!/usr/bin/env nextflow + +/* + * LncPipe was implemented by Dr. Qi Zhao from Sun Yat-sen University Cancer Center, China. + * + * + * LncPipe is a free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * See the GNU General Public License for more details. + * + * + */ + +/* + * LncPipe: A nextflow-based lncRNA identification and analysis pipeline from RNA sequencing data + * + * Authors: + * Qi Zhao : design and implement the pipeline. + * Yu Sun : design and implement the analysis report sections. + * Zhixiang Zuo : design the project and perform the testing. + */ + + + +//pre-defined functions for render command +//======================================================================================= +ANSI_RESET = "\u001B[0m"; +ANSI_BLACK = "\u001B[30m"; +ANSI_RED = "\u001B[31m"; +ANSI_GREEN = "\u001B[32m"; +ANSI_YELLOW = "\u001B[33m"; +ANSI_BLUE = "\u001B[34m"; +ANSI_PURPLE = "\u001B[35m"; +ANSI_CYAN = "\u001B[36m"; +ANSI_WHITE = "\u001B[37m"; + + +def print_red = { str -> ANSI_RED + str + ANSI_RESET } +def print_black = { str -> ANSI_BLACK + str + ANSI_RESET } +def print_green = { str -> ANSI_GREEN + str + ANSI_RESET } +def print_yellow = { str -> ANSI_YELLOW + str + ANSI_RESET } +def print_blue = { str -> ANSI_BLUE + str + ANSI_RESET } +def print_cyan = { str -> ANSI_CYAN + str + ANSI_RESET } +def print_purple = { str -> ANSI_PURPLE + str + ANSI_RESET } +def print_white = { str -> ANSI_WHITE + str + ANSI_RESET } + +//Help information +// Nextflow version +version="v0.2.44" +//======================================================================================= +// Nextflow Version check +if( !nextflow.version.matches('0.30+') ) { + println print_yellow("This workflow requires Nextflow version 0.26 or greater -- You are running version ")+ print_red(nextflow.version) + +} +//help information +params.help = null +if (params.help) { + log.info '' + log.info print_purple('------------------------------------------------------------------------') + log.info "LncPipe: a Nextflow-based Long non-coding RNA analysis Pipeline v$version" + log.info "LncPipe integrates several NGS processing tools to identify novel long non-coding RNAs from" + log.info "un-processed RNA sequencing data. To run this pipeline, users either need to install required tools manually" + log.info "or use the docker image for LncPipe that comes with all tools pre-installed. (note: docker needs to be installed on your system). More information on usage can be found at https://github.com/likelet/LncPipe ." + log.info "Bugs or new feature requests can be reported by opening issues in our github repository." + log.info print_purple('------------------------------------------------------------------------') + log.info '' + log.info print_yellow('Usage: ') + log.info print_yellow(' The typical command for running the pipeline is as follows (we do not recommend users passing configuration parameters through command line, please modify the config.file instead):\n') + + print_purple(' Nextflow run LncRNAanalysisPipe.nf \n') + + + print_yellow(' General arguments: Input and output setting\n') + + print_cyan(' --inputdir ') + print_green('Path to input data(optional), current path default\n') + + print_cyan(' --reads <*_fq.gz> ') + print_green('Filename pattern for pairing raw reads, e.g: *_{1,2}.fastq.gz for paired reads\n') + + print_cyan(' --out_folder ') + print_green('The output directory where the results will be saved(optional), current path is default\n') + + print_cyan(' --aligner ') + print_green('Aligner for reads mapping (optional),"hisat"(defalt)/"star"/"tophat"\n') + + print_cyan(' --qctools ') + print_green('Tools for assess reads quality, fastp(default)/afterqc/fastqc/none(skip QC step)\n') + + print_cyan(' --detools ') + print_green('Tools for differential analysis, edger(default)/deseq/noiseq\n') + + print_cyan(' --quant ') + print_green('Tools for estimating abundance of transcript, kallisto(default)/htseq\n') + + '\n' + + print_yellow(' Options: General options for run this pipeline\n') + + print_cyan(' --merged_gtf ') + print_green('Start analysis with assemblies already produced and skip fastqc/alignment step, DEFAOUL NULL\n') + + print_cyan(' --design ') + print_green('A flat file stored the experimental design information ( required when perform differential expression analysis)\n') + + print_cyan(' --singleEnd ') + print_green('Reads type, True for single ended \n') + + print_cyan(' --unstrand ') + print_green('RNA library construction strategy, specified for \'unstranded\' library \n') + + '\n' + + print_yellow(' References: If not specified in the configuration file or you wish to overwrite any of the references.\n') + + print_cyan(' --fasta ') + print_green('Path to Fasta reference(required)\n') + + print_cyan(' --gencode_annotation_gtf ') + print_green('An annotation file from GENCODE database in GTF format (required)\n') + + print_cyan(' --lncipedia_gtf ') + print_green('An annotation file from LNCipedia database in GTF format (required)\n') + + '\n' + + print_yellow(' LncPipeReporter Options: LncPipeReporter setting \n') + + print_cyan(' --lncRep_Output ') + print_green('Specify report file name, \"report.html\" default.\n') + + print_cyan(' --lncRep_theme ') + print_green('Plot theme setting in interactive plot, \"npg\" default.\n') + + print_cyan(' --lncRep_min_expressed_sample ') + print_green('Minimum expressed gene allowed in each sample, 50 default.\n') + + '\n' + + print_yellow(' Other options: Specify the email and \n') + + print_cyan(' --sam_processor ') + print_green('program to process samfile generated by hisat2 if aligner is hisat2. Default \"sambamba\". \n') + + print_cyan(' --mail ') + print_green('email info for reporting status of your LncPipe execution \n') + + + + + log.info '------------------------------------------------------------------------' + log.info print_yellow('Contact information: zhaoqi@sysucc.org.cn') + log.info print_yellow('Copyright (c) 2013-2017, Sun Yat-sen University Cancer Center.') + log.info '------------------------------------------------------------------------' + exit 0 +} + +//check parameters +/* +allowed_params = ["inputdir","reads","out_folder","aligner","qctools","detools","quant", + "merged_gtf","design","singleEnd","unstrand", + "fasta","gencode_annotation_gtf","lncipedia_gtf", + "lncRep_Output", "lncRep_theme","lncRep_min_expressed_sample", + "sam_processor","mail"] +params.each { entry -> + if (! allowed_params.contains(entry.key)) { + println("The parameter <${entry}.key> is not known"); + System.exit(2); + } +} +*/ + +//default values +params.inputdir = '' +params.outdir = './' +params.multiqc_config = "$baseDir/assets/multiqc_config.yaml" // for generate qc and alignment result +params.merged_gtf = null// dose merged_gtf provided +singleEnd = params.singleEnd ? true : false +skip_combine = params.skip_combine ? true : false +unstrand = params.unstrand ? true : false +params.mail=false + +//Checking parameters +log.info print_purple("You are running LncPipe with the following parameters:") +log.info print_purple("Checking parameters ...") +log.info print_yellow("=====================================") +log.info print_yellow("Species: ") + print_green(params.species) +log.info print_yellow("Fastq file extension: ") + print_green(params.reads) +log.info print_yellow("Design file: ") + print_green(params.design) +log.info print_yellow("Single end : ") + print_green(params.singleEnd) +log.info print_yellow("skip annotation process: ") + print_green(params.skip_combine) +log.info print_yellow("Input folder: ") + print_green(params.inputdir) +log.info print_yellow("Output folder: ") + print_green(params.outdir) +log.info print_yellow("Genome sequence location: ") + print_green(params.fasta) +log.info print_yellow("STAR index path: ") + print_green(params.star_index) +log.info print_yellow("HISAT2 index path: ") + print_green(params.hisat2_index) +log.info print_yellow("bowtie/tophat index path: ") + print_green(params.bowtie2_index) +log.info print_yellow("GENCODE annotation location: ") + print_green(params.gencode_annotation_gtf) +log.info print_yellow("lncipedia annotation location: ") + print_green(params.lncipedia_gtf) +log.info print_yellow("=====================================") +log.info "\n" + +// run information of system file +//automatic set optimize resource for analysis based on current system resources + + +// read file +fasta = file(params.fasta) +if (!fasta.exists()) exit 1, "Reference genome not found: ${params.fasta}" +if(params.aligner=='star'){ + star_index = file(params.star_index) + if (!star_index.exists()) exit 1, "STAR index not found: ${params.star_index}" +}else if(params.aligner =='hisat'){ + hisat2_index = Channel.fromPath("${params.hisat2_index}*") + .ifEmpty { exit 1, "HISAT2 index not found: ${params.hisat2_index}" } +}else if(params.aligner =='tophat'){ + bowtie2_index = Channel.fromPath("${params.bowtie2_index}*") + .ifEmpty { exit 1, "bowtie2 index for tophat not found: ${params.bowtie2_index}" } +} + +inputdir = params.inputdir +multiqc_config = file(params.multiqc_config) + +/* +*Step 1: Prepare Annotations + */ + +println print_purple("Combining known annotations from GTFs") +if (params.species=="human") { + gencode_annotation_gtf = file(params.gencode_annotation_gtf) + if (!gencode_annotation_gtf.exists()) exit 1, "GENCODE annotation file not found: ${params.gencode_annotation_gtf}" + lncipedia_gtf = file(params.lncipedia_gtf) + if (!lncipedia_gtf.exists()) exit 1, "lncipedia annotation file not found: ${params.lncipedia_gtf}" +//Prepare annotations + annotation_channel = Channel.from(gencode_annotation_gtf, lncipedia_gtf) + annotation_channel.collectFile { file -> ['lncRNA.gtflist', file.name + '\n'] } + .set { LncRNA_gtflist } + process combine_public_annotation { + storeDir { params.outdir + "/Combined_annotations" } + input: + file lncRNA_gtflistfile from LncRNA_gtflist + file gencode_annotation_gtf + file lncipedia_gtf + output: + file "gencode_protein_coding.gtf" into proteinCodingGTF, proteinCodingGTF_forClass + file "known.lncRNA.gtf" into KnownLncRNAgtf + file "*_mod.gtf" into mod_file_for_rename + + shell: + + + if(params.aligner=='hisat'){//fix the gtf format required by hisat + ''' + set -o pipefail + touch filenames.txt + + perl -lpe 's/ ([^"]\\S+) ;/ "$1" ;/g' !{gencode_annotation_gtf} > gencode_annotation_gtf_mod.gtf + perl -lpe 's/ ([^"]\\S+) ;/ "$1" ;/g' !{lncipedia_gtf} > lncipedia_mod.gtf + + echo gencode_annotation_gtf_mod.gtf >>filenames.txt + echo lncipedia_mod.gtf >>filenames.txt + + + stringtie --merge -o merged_lncRNA.gtf filenames.txt + cat gencode_annotation_gtf_mod.gtf |grep "protein_coding" > gencode_protein_coding.gtf + gffcompare -r gencode_protein_coding.gtf -p !{task.cpus} merged_lncRNA.gtf + awk '$3 =="u"||$3=="x"{print $5}' gffcmp.merged_lncRNA.gtf.tmap |sort|uniq|perl !{baseDir}/bin/extract_gtf_by_name.pl merged_lncRNA.gtf - > merged.filter.gtf + mv merged.filter.gtf known.lncRNA.gtf + + ''' + }else { + + ''' + set -o pipefail + + cuffmerge -o merged_lncRNA !{lncRNA_gtflistfile} + cat !{gencode_annotation_gtf} |grep "protein_coding" > gencode_protein_coding.gtf + cuffcompare -o merged_lncRNA -r gencode_protein_coding.gtf -p !{task.cpus} merged_lncRNA/merged.gtf + awk '$3 =="u"||$3=="x"{print $5}' merged_lncRNA/merged_lncRNA.merged.gtf.tmap |sort|uniq|perl !{baseDir}/bin/extract_gtf_by_name.pl merged_lncRNA/merged.gtf - > merged.filter.gtf + mv merged.filter.gtf known.lncRNA.gtf + + ''' + } + } +} +else {// for mouse or other species, user should provide known_protein_coding and known_lncRNA GTF file for analysis + + KnownLncRNAgtf=file(params.known_lncRNA_gtf) + if (!KnownLncRNAgtf.exists()) exit 1, print_red("In non-human mode, known lncRNA GTF annotation file not found: ${params.known_lncRNA_gtf}") + known_coding_gtf=file(params.known_coding_gtf) + if (!known_coding_gtf.exists()) exit 1, print_red("In non-human mode, known protein coding GTF annotation file not found: ${params.known_coding_gtf}") + gencode_annotation_gtf = file(params.gencode_annotation_gtf) + if (!gencode_annotation_gtf.exists()) exit 1, print_red("GENCODE annotation file not found: ${params.gencode_annotation_gtf}") + gencode_annotation_gtf.into{proteinCodingGTF; proteinCodingGTF_forClass} + knownLncRNAgtf.set{knownLncRNAgtf} + +} + + +// whether the merged gtf have already produced. +if (!params.merged_gtf) { + /* + * Step 2: Build read aligner (STAR/tophat/HISAT2) index, if not provided + */ + //star_index if not exist + /*if (params.aligner == 'star' && params.star_index == false && fasta) { + process Make_STARindex { + tag fasta + + storeDir { params.outdir + "/STARIndex" } + + input: + file fasta from fasta + file gencode_annotation_gtf + + output: + file "star_index" into star_index + + shell: + star_threads = ava_cpu- 1 + """ + mkdir star_index + STAR \ + --runMode genomeGenerate \ + --runThreadN ${star_threads} \ + --sjdbGTFfile $gencode_annotation_gtf \ + --sjdbOverhang 149 \ + --genomeDir star_index/ \ + --genomeFastaFiles $fasta + """ + } + } else if (params.aligner == 'star' && params.star_index == false && !fasta) { + println print_red("No reference fasta sequence loaded! please specify ") + print_red("--fasta") + print_red(" with reference.") + + } else if (params.aligner == 'tophat' && params.bowtie2_index == false && !fasta) { + process Make_bowtie2_index { + + tag fasta + storeDir { params.outdir + "/bowtie2Index" } + + input: + file fasta from fasta + + output: + file "genome_bt2.*" into bowtie2_index + + shell: + """ + bowtie2-build !{fasta} genome_bt2 + """ + } + } else if (params.aligner == 'tophat' && !fasta) { + println print_red("No reference fasta equence loaded! please specify ") + print_red("--fasta") + print_red(" with reference.") + } else if (params.aligner == 'hisat' && !fasta) { + process Make_hisat_index { + + tag fasta + + storeDir { params.outdir + "/hisatIndex" } + + input: + file fasta from fasta + file gencode_annotation_gtf + + output: + file "genome_ht2.*" into hisat2_index + + shell: + hisat2_index_threads = ava_cpu- 1 + """ + #for human genome it will take more than 160GB memory and take really long time (6 more hours), thus we recommand to down pre-build genome from hisat website + extract_splice_sites.py !{gencode_annotation_gtf} >genome_ht2.ss + extract_exons.py !{gencode_annotation_gtf} > genome_ht2.exon + hisat2-build -p !{hisat2_index_threads} --ss genome_ht2.ss --exo genome_ht2.exon !{fasta} genome_ht2 + """ + } + } else if (params.aligner == 'tophat' && params.hisat_index == false && !fasta) { + println print_red("No reference fasta sequence loaded! please specify ") + print_red("--fasta") + print_red(" with reference.") + }*/ + + println print_purple("Analysis from fastq file") + //Match the pairs on two channels + + reads = params.inputdir + params.reads + + /* + * Step 3: QC (FastQC/AfterQC/Fastp) of raw reads + */ + println print_purple("Perform quality control of raw fastq files ") + if (params.qctools == 'fastqc') { + Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) + .ifEmpty { + exit 1, print_red("Cannot find any reads matching: ${reads}\nNB: Path needs to be enclosed in quotes!\n") + } + .into { reads_for_fastqc; readPairs_for_discovery;readPairs_for_kallisto} + process Run_fastQC { + tag { fastq_tag } + label 'qc' + + publishDir pattern: "*.html", + path: { params.outdir + "/QC" }, mode: 'copy', overwrite: true + + input: + set val(samplename), file(fastq_file) from reads_for_fastqc + + output: + file "*.html" into fastqc_for_waiting + shell: + fastq_tag = samplename + ''' + fastqc -t !{task.cpus} !{fastq_file[0]} !{fastq_file[1]} + ''' + } + } + else if (params.qctools == 'afterqc'){ + Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) + .ifEmpty { + exit 1, print_red("Cannot find any reads matching: ${reads}\nPlz check your fasta string in nextflow.config file \n") + }.set { reads_for_fastqc} + process Run_afterQC { + + tag { fastq_tag } + label 'qc' + publishDir pattern: "QC/*.html", + path: { params.outdir + "/QC" }, mode: 'copy', overwrite: true + + input: + set val(samplename), file(fastq_file) from reads_for_fastqc + + output: + file "QC/*.html" into fastqc_for_waiting + set val(fastq_tag), file('*.good.fq.gz') into readPairs_for_discovery,readPairs_for_kallisto + shell: + fastq_tag = samplename + if (params.singleEnd) { + ''' + after.py -z -1 !{fastq_file[0]} -g ./ + ''' + } else { + ''' + after.py -z -1 !{fastq_file[0]} -2 !{fastq_file[1]} -g ./ + ''' + } + } + } + else if (params.qctools == 'fastp'){ + Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) + .ifEmpty { + exit 1, print_red("Cannot find any reads matching: ${reads}\nPlz check your fasta string in nextflow.config file \n") + } + .set { reads_for_fastqc} + process Run_FastP { + + tag { fastq_tag } + label 'qc' + + publishDir pattern: "*.html", + path: { params.outdir + "/QC" }, mode: 'copy', overwrite: true + + input: + set val(samplename), file(fastq_file) from reads_for_fastqc + + output: + file "*.html" into fastqc_for_waiting + set val(fastq_tag), file('*qc.fq.gz') into readPairs_for_discovery,readPairs_for_kallisto + shell: + fastq_tag = samplename + if (params.singleEnd) { + ''' + fastp -i !{fastq_file[0]} -o !{samplename}.qc.gz -h !{samplename}_fastp.html + + ''' + } else { + ''' + fastp -i !{fastq_file[0]} -I !{fastq_file[1]} -o !{samplename}_1.qc.fq.gz -O !{samplename}_2.qc.fq.gz -h !{samplename}_fastp.html + ''' + } + } + }else{ + Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) + .ifEmpty { + exit 1, print_red("Cannot find any reads matching: ${reads}\nPlz check your fasta string in nextflow.config file \n") + } + .into{readPairs_for_discovery; readPairs_for_kallisto;fastqc_for_waiting} + } + fastqc_for_waiting = fastqc_for_waiting.first() + + /* + * Step 4: Initialize read alignment (STAR/HISAT2/tophat) + */ + if (params.aligner == 'star') { + process fastq_star_alignment_For_discovery { + + tag { file_tag } + + publishDir pattern: "", + path: { params.outdir + "/Star_alignment" }, mode: 'copy', overwrite: true + + input: + set val(samplename), file(pair) from readPairs_for_discovery + file tempfiles from fastqc_for_waiting // just for waiting + file fasta + file star_index + + output: + set val(file_tag_new), file("${file_tag_new}Aligned.sortedByCoord.out.bam") into mappedReads,forHtseqMappedReads + file "${file_tag_new}Log.final.out" into alignment_logs + shell: + println print_purple("Start mapping with STAR aligner " + samplename) + file_tag = samplename + file_tag_new = file_tag + + if (params.singleEnd) { + println print_purple("Initial reads mapping of " + samplename + " performed by STAR in single-end mode") + """ + STAR --runThreadN !{task.cpus} \ + --twopassMode Basic \ + --genomeDir !{star_index} \ + --readFilesIn !{pair} \ + --readFilesCommand zcat \ + --outSAMtype BAM SortedByCoordinate \ + --chimSegmentMin 20 \ + --outFilterIntronMotifs RemoveNoncanonical \ + --outFilterMultimapNmax 20 \ + --alignIntronMin 20 \ + --alignIntronMax 1000000 \ + --alignMatesGapMax 1000000 \ + --outFilterType BySJout \ + --alignSJoverhangMin 8 \ + --alignSJDBoverhangMin 1 \ + --outFileNamePrefix !{file_tag_new} + """ + } else { + println print_purple("Initial reads mapping of " + samplename + " performed by STAR in paired-end mode") + ''' + STAR --runThreadN !{task.cpus} \ + --twopassMode Basic --genomeDir !{star_index} \ + --readFilesIn !{pair[0]} !{pair[1]} \ + --readFilesCommand zcat \ + --outSAMtype BAM SortedByCoordinate \ + --chimSegmentMin 20 \ + --outFilterIntronMotifs RemoveNoncanonical \ + --outFilterMultimapNmax 20 \ + --alignIntronMin 20 \ + --alignIntronMax 1000000 \ + --alignMatesGapMax 1000000 \ + --outFilterType BySJout \ + --alignSJoverhangMin 8 \ + --alignSJDBoverhangMin 1 \ + --outFileNamePrefix !{file_tag_new} + ''' + } + } + } + else if (params.aligner == 'tophat') + { + process fastq_tophat_alignment_For_discovery { + + tag { file_tag } + + publishDir pattern: "", + path: { params.outdir + "/tophat_alignment" }, mode: 'copy', overwrite: true + + input: + set val(samplename), file(pair) from readPairs_for_discovery + file tempfiles from fastqc_for_waiting // just for waiting + file fasta + file bowtie2_index from bowtie2_index.collect() + file gtf from gencode_annotation_gtf + + output: + set val(samplename),file("${file_tag_new}_thout/accepted.bam") into mappedReads,forHtseqMappedReads + file "${file_tag_new}_thout/Alignment_summary.txt" into alignment_logs + //align_summary.txt as log file + shell: + println print_purple("Start mapping with tophat2 aligner " + samplename) + file_tag = samplename + file_tag_new = file_tag + index_base = bowtie2_index[0].toString() - ~/.\d.bt2/ + strand_str="fr-firststrand" + if(unstrand){ + strand_str="fr-unstranded" + } + if (params.singleEnd) { + println print_purple("Initial reads mapping of " + samplename + " performed by Tophat in single-end mode") + ''' + tophat -p !{task.cpus} -G !{gtf} -–no-novel-juncs -o !{samplename}_thout --library-type !{strand_str} !{index_base} !{pair} + + ''' + } else { + println print_purple("Initial reads mapping of " + samplename + " performed by Tophat in paired-end mode") + ''' + tophat -p !{task.cpus} -G !{gtf} -–no-novel-juncs -o !{samplename}_thout --library-type !{strand_str} !{index_base} !{pair[0]} !{pair[1]} + ''' + } + } + } + else if (params.aligner == 'hisat') { + process fastq_hisat2_alignment_For_discovery { + + tag { file_tag } + label 'para' + publishDir pattern: "", + path: { params.outdir + "/hisat_alignment" }, mode: 'copy', overwrite: true + + input: + set val(samplename), file(pair) from readPairs_for_discovery + file tempfiles from fastqc_for_waiting // just for waiting + file fasta + file hisat2_id from hisat2_index.collect() + + output: + set val(file_tag_new),file("${file_tag_new}.sort.bam") into hisat_mappedReads,forHtseqMappedReads + file "${file_tag_new}.hisat2_summary.txt" into alignment_logs + //align_summary.txt as log file + shell: + println print_purple("Start mapping with hisat2 aligner " + samplename) + file_tag = samplename + file_tag_new = file_tag + index_base = hisat2_id[0].toString() - ~/.\d.ht2/ + + if(unstrand){ + if (params.singleEnd) { + println print_purple("Initial reads mapping of " + samplename + " performed by hisat2 in single-end mode") + ''' + mkdir tmp + hisat2 -p !{task.cpus} --dta -x !{index_base} -U !{pair} -S !{file_tag_new}.sam 2>!{file_tag_new}.hisat2_summary.txt + sambamba view -S -f bam -t !{task.cpus} !{file_tag_new}.sam -o temp.bam + sambamba sort -o !{file_tag_new}.sort.bam --tmpdir ./tmp -t !{task.cpus} temp.bam + rm !{file_tag_new}.sam + rm temp.bam + + ''' + } else { + println print_purple("Initial reads mapping of " + samplename + " performed by hisat2 in paired-end mode") + ''' + mkdir tmp + hisat2 -p !{task.cpus} --dta -x !{index_base} -1 !{pair[0]} -2 !{pair[1]} -S !{file_tag_new}.sam 2> !{file_tag_new}.hisat2_summary.txt + sambamba view -S -f bam -t !{hisat2_threads} !{file_tag_new}.sam -o temp.bam + sambamba sort -o !{file_tag_new}.sort.bam --tmpdir ./tmp -t !{task.cpus} temp.bam + rm !{file_tag_new}.sam + ''' + } + }else { + if (params.singleEnd) { + println print_purple("Initial reads mapping of " + samplename + " performed by hisat2 in single-end mode") + ''' + mkdir tmp + hisat2 -p !{task.cpus} --dta --rna-strandness !{params.hisat_strand} -x !{index_base} -U !{pair} -S !{file_tag_new}.sam 2>!{file_tag_new}.hisat2_summary.txt + sambamba view -S -f bam -t !{hisat2_threads} !{file_tag_new}.sam -o temp.bam + sambamba sort -o !{file_tag_new}.sort.bam --tmpdir ./tmp -t !{hisat2_threads} temp.bam + rm !{file_tag_new}.sam + rm temp.bam + + ''' + } else { + println print_purple("Initial reads mapping of " + samplename + " performed by hisat2 in paired-end mode") + ''' + mkdir tmp + hisat2 -p !{task.cpus} --dta --rna-strandness !{params.hisat_strand} -x !{index_base} -1 !{pair[0]} -2 !{pair[1]} -S !{file_tag_new}.sam 2> !{file_tag_new}.hisat2_summary.txt + sambamba view -S -f bam -t !{task.cpus} !{file_tag_new}.sam -o temp.bam + sambamba sort -o !{file_tag_new}.sort.bam --tmpdir ./tmp -t !{task.cpus} temp.bam + rm !{file_tag_new}.sam + ''' + } + } + } + } + + /* + * Step 5: Transcript assembly using Stringtie + */ + if(params.aligner == 'hisat'){ + process StringTie_assembly { + + tag { file_tag } + + input: + set val(samplename),file(alignment_bam) from hisat_mappedReads + file fasta + file gencode_annotation_gtf + + output: + + file "stringtie_${file_tag_new}_transcripts.gtf" into stringTieoutgtf, StringTieOutGtf_fn + + shell: + file_tag = samplename + file_tag_new = file_tag + + if(unstrand){ + ''' + #run stringtie + stringtie -p !{task.cpus} -G !{gencode_annotation_gtf} -l stringtie_!{file_tag_new} -o stringtie_!{file_tag_new}_transcripts.gtf !{alignment_bam} + ''' + }else{ + ''' + #run stringtie + stringtie -p !{task.cpus} -G !{gencode_annotation_gtf} --rf -l stringtie_!{file_tag_new} -o stringtie_!{file_tag_new}_transcripts.gtf !{alignment_bam} + ''' + } + + } +// Create a file 'gtf_filenames' containing the filenames of each post processes cufflinks gtf + stringTieoutgtf.collectFile { file -> ['gtf_filenames.txt', file.name + '\n'] } + .set { GTFfilenames } + /* + * Step 6: Merged GTFs into one + */ + process StringTie_merge_assembled_gtf { + + tag { file_tag } + label 'para' + publishDir pattern: "merged.gtf", + path: { params.outdir + "/Merged_assemblies" }, mode: 'copy', overwrite: true + + input: + file gtf_filenames from GTFfilenames + file cufflinksgtf_file from StringTieOutGtf_fn.toList() // not used but just send the file in current running folder + file fasta + + + output: + file "merged.gtf" into mergeTranscripts_forCompare, mergeTranscripts_forExtract, mergeTranscripts_forCodeingProtential + shell: + + + ''' + stringtie --merge -p !{task.cpus} -o merged.gtf !{gtf_filenames} + + + ''' + } + } + else{ + process Cufflinks_assembly { + + tag { file_tag } + + input: + set val(file_tag), file(alignment_bam) from mappedReads + file fasta + file gencode_annotation_gtf + + output: + + file "Cufout_${file_tag_new}_transcripts.gtf" into cuflinksoutgtf, cuflinksoutgtf_fn + + shell: + file_tag_new = file_tag + strand_str="fr-firststrand" + if(unstrand){ + strand_str="fr-unstranded" + } + if (params.aligner == 'tophat') { + ''' + #run cufflinks + + cufflinks -g !{gencode_annotation_gtf} \ + -b !{fasta} \ + --library-type !{strand_str}\ + --max-multiread-fraction 0.25 \ + --3-overhang-tolerance 2000 \ + -o Cufout_!{file_tag_new} \ + -p !{task.cpus} !{alignment_bam} + + mv Cufout_!{file_tag_new}/transcripts.gtf Cufout_!{file_tag_new}_transcripts.gtf + ''' + + } else if (params.aligner == 'star') { + ''' + #run cufflinks + + cufflinks -g !{gencode_annotation_gtf} \ + -b !{fasta} \ + --library-type !{strand_str} \ + --max-multiread-fraction 0.25 \ + --3-overhang-tolerance 2000 \ + -o Cufout_!{file_tag_new} \ + -p !{task.cpus} !{alignment_bam} + + mv Cufout_!{file_tag_new}/transcripts.gtf Cufout_!{file_tag_new}_transcripts.gtf + ''' + + } + + + } + +// Create a file 'gtf_filenames' containing the filenames of each post processes cufflinks gtf + + cuflinksoutgtf.collectFile { file -> ['gtf_filenames.txt', file.name + '\n'] } + .set { GTFfilenames } + + /* + * Step 6: Merged GTFs into one + */ + process cuffmerge_assembled_gtf { + + tag { file_tag } + label 'para' + publishDir pattern: "CUFFMERGE/merged.gtf", + path: { params.outdir + "/All_assemblies" }, mode: 'copy', overwrite: true + + input: + file gtf_filenames from GTFfilenames + file cufflinksgtf_file from cuflinksoutgtf_fn.toList() // not used but just send the file in current running folder + + file fasta + + + output: + file "CUFFMERGE/merged.gtf" into mergeTranscripts_forCompare, mergeTranscripts_forExtract, mergeTranscripts_forCodeingProtential + shell: + + ''' + mkdir CUFFMERGE + cuffmerge -o CUFFMERGE \ + -s !{fasta} \ + -p !{task.cpus} \ + !{gtf_filenames} + + ''' + } + } + + +} +else { + println print_yellow("Raw reads quality check step was skipped due to provided ") + print_green("--merged_gtf") + print_yellow(" option\n") + println print_yellow("Reads mapping step was skipped due to provided ") + print_green("--merged_gtf") + print_yellow(" option\n") + + merged_gtf = file(params.merged_gtf) + Channel.fromPath(merged_gtf) + .ifEmpty { exit 1, "Cannot find merged gtf : ${merged_gtf}" } + .into { + mergeTranscripts_forCompare; mergeTranscripts_forExtract; mergeTranscripts_forCodeingProtential + } + + // add fastq when do quantification + reads = params.inputdir + params.reads + if (params.qctools == 'fastqc') { + Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) + .ifEmpty { + exit 1, print_red("Fastq file not found, plz check your file path : ${reads}\n") + } + .into { reads_for_fastqc; readPairs_for_discovery;readPairs_for_kallisto} + process Run_fastQC_2 { + tag { fastq_tag } + label 'qc' + + publishDir pattern: "*.html", + path: { params.outdir + "/QC" }, mode: 'copy', overwrite: true + + input: + set val(samplename), file(fastq_file) from reads_for_fastqc + + output: + file "*.html" into fastqc_for_waiting + shell: + fastq_tag = samplename + ''' + fastqc -t !{task.cpus} !{fastq_file[0]} !{fastq_file[1]} + ''' + } + } + else if (params.qctools == 'afterqc'){ + Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) + .ifEmpty { + exit 1, print_red("Fastq file not found : ${reads}\nPlz check your reads string in nextflow.config file \n") + } + .set { reads_for_fastqc} + process Run_afterQC_2 { + + tag { fastq_tag } + label 'qc' + + publishDir pattern: "QC/*.html", + path: { params.outdir + "/QC" }, mode: 'copy', overwrite: true + + input: + set val(samplename), file(fastq_file) from reads_for_fastqc + + output: + file "QC/*.html" into fastqc_for_waiting + set val(fastq_tag), file('*.good.fq.gz') into readPairs_for_discovery,readPairs_for_kallisto + shell: + fastq_tag = samplename + if (params.singleEnd) { + ''' + after.py -z -1 !{fastq_file[0]} -g ./ + ''' + } else { + ''' + after.py -z -1 !{fastq_file[0]} -2 !{fastq_file[1]} -g ./ + ''' + } + } + } + else if (params.qctools == 'fastp'){ + Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) + .ifEmpty { + exit 1, print_red("Fastq file not found : ${reads}\nPlz check your reads string in nextflow.config file \n") + } + .set { reads_for_fastqc} + process Run_FastP_2 { + + tag { fastq_tag } + label 'qc' + + publishDir pattern: "*.html", + path: { params.outdir + "/QC" }, mode: 'copy', overwrite: true + + input: + set val(samplename), file(fastq_file) from reads_for_fastqc + + output: + file "*.html" into fastqc_for_waiting + set val(fastq_tag), file('*qc.fq.gz') into readPairs_for_discovery,readPairs_for_kallisto + shell: + fastq_tag = samplename + if (params.singleEnd) { + ''' + fastp -i !{fastq_file[0]} -o !{samplename}.qc.gz -h !{samplename}_fastp.html + + ''' + } else { + ''' + fastp -i !{fastq_file[0]} -I !{fastq_file[1]} -o !{samplename}_1.qc.fq.gz -O !{samplename}_2.qc.fq.gz -h !{samplename}_fastp.html + ''' + } + } + } + else{ + Channel.fromFilePairs(reads, size: params.singleEnd ? 1 : 2) + .ifEmpty { + exit 1, print_red("Cannot find any reads matching: ${reads}\nPlz check your reads string in nextflow.config file \n") + } + .into{readPairs_for_discovery; readPairs_for_kallisto;fastqc_for_waiting} + } + fastqc_for_waiting2 = fastqc_for_waiting.first() + +} + +/* +*Step 7: Compare assembled gtf with known annotations (GENCODE) +*/ +process Merge_assembled_gtf_with_GENCODE { + + tag { file_tag } + input: + file mergeGtfFile from mergeTranscripts_forCompare + file gencode_annotation_gtf + + output: + file "merged_lncRNA.merged.gtf.tmap" into comparedGTF_tmap + shell: + + ''' + #!/bin/sh + gffcompare -r !{gencode_annotation_gtf} -p !{task.cpus} !{mergeGtfFile} -o merged_lncRNA + ''' +} + + + +/* +*Step 8: Filter GTFs to distinguish novel lncRNAs +*/ +process Identify_novel_lncRNA_with_criterions { + + input: + file comparedTmap from comparedGTF_tmap + file fasta + file mergedGTF from mergeTranscripts_forExtract + + output: + file "novel.gtf.tmap" into noveltmap + file "novel.longRNA.fa" into novelLncRnaFasta + file "novel.longRNA.exoncount.txt" into novelLncRnaExonCount + + shell: + ''' + # filtering novel lncRNA based on cuffmerged trascripts + awk '$3 =="x"||$3=="u"||$3=="i"{print $0}' !{comparedTmap} > novel.gtf.tmap + # excluding length smaller than 200 nt + awk '$10 >200{print}' novel.gtf.tmap > novel.longRNA.gtf.tmap + # extract gtf + awk '{print $5}' novel.longRNA.gtf.tmap |perl !{baseDir}/bin/extract_gtf_by_name.pl !{mergedGTF} - >novel.longRNA.gtf + awk '{if($3=="exon"){print $0}}' novel.longRNA.gtf > novel.longRNA.format.gtf + perl !{baseDir}/bin/get_exoncount.pl novel.longRNA.format.gtf > novel.longRNA.exoncount.txt + # gtf2gff3 + #check whether required + # get fasta from gtf + gffread novel.longRNA.gtf -g !{fasta} -w novel.longRNA.fa -W + ''' +} + +/* +*Step 9: Predict coding potential abilities using CPAT and PLEK (CNCI functionality coming soon!) +*/ +novelLncRnaFasta.into { novelLncRnaFasta_for_PLEK; novelLncRnaFasta_for_CPAT; } + +process Predict_coding_abilities_by_PLEK { + + // as PLEK can not return valid exit status even run smoothly, we manually set the exit status into 0 to promote analysis + validExitStatus 0, 1, 2 + input: + file novel_lncRNA_fasta from novelLncRnaFasta_for_PLEK + output: + file "novel.longRNA.PLEK.out" into novel_longRNA_PLEK_result + shell: + ''' + PLEK.py -fasta !{novel_lncRNA_fasta} \ + -out novel.longRNA.PLEK.out \ + -thread !{task.cpus} + exit 0 + ''' + +} +process Predict_coding_abilities_by_CPAT { + input: + file novel_lncRNA_fasta from novelLncRnaFasta_for_CPAT + output: + file "novel.longRNA.CPAT.out" into novel_longRNA_CPAT_result + shell: + if(params.species=="human"){ + ''' + cpat.py -g !{novel_lncRNA_fasta} \ + -x !{baseDir}/bin/cpat_model/Human_Hexamer.tsv \ + -d !{baseDir}/bin/cpat_model/Human_logitModel.RData \ + -o novel.longRNA.CPAT.out + ''' + }else if (params.species=="mouse"){ + ''' + cpat.py -g !{novel_lncRNA_fasta} \ + -x !{baseDir}/bin/cpat_model/Mouse_Hexamer.tsv \ + -d !{baseDir}/bin/cpat_model/Mouse_logitModel.RData \ + -o novel.longRNA.CPAT.out + ''' + + }else if (params.species=="zebrafish"){ + ''' + cpat.py -g !{novel_lncRNA_fasta} \ + -x !{baseDir}/bin/cpat_model/zebrafish_Hexamer.tsv \ + -d !{baseDir}/bin/cpat_model/zebrafish_logitModel.RData \ + -o novel.longRNA.CPAT.out + ''' + }else { + ''' + cpat.py -g !{novel_lncRNA_fasta} \ + -x !{baseDir}/bin/cpat_model/fly_Hexamer.tsv \ + -d !{baseDir}/bin/cpat_model/fly_logitModel.RData \ + -o novel.longRNA.CPAT.out + ''' + } + +} + + +/* +*Step 9: Merged and filter lncRNAs based on coding potential (CPAT/PLEK) +*/ +process Filter_lncRNA_by_coding_potential_result { + input: + file novel_longRNA_PLEK_ from novel_longRNA_PLEK_result + file novel_longRNA_CPAT_ from novel_longRNA_CPAT_result + file longRNA_novel_exoncount from novelLncRnaExonCount + file cuffmergegtf from mergeTranscripts_forCodeingProtential + file gencode_annotation_gtf + file fasta + + output: + file "novel.longRNA.stringent.gtf" into Novel_longRNA_stringent_gtf // not used + file "novel.lncRNA.stringent.gtf" into novel_lncRNA_stringent_gtf + file "novel.TUCP.stringent.gtf" into novel_TUCP_stringent_gtf // not used + + shell: + ''' + #merged transcripts + perl !{baseDir}/bin/integrate_novel_transcripts.pl > novel.longRNA.txt + awk '$4 >1{print $1}' novel.longRNA.txt|perl !{baseDir}/bin/extract_gtf_by_name.pl !{cuffmergegtf} - > novel.longRNA.stringent.gtf + # retain lncRNA only by coding ability + awk '$4 >1&&$5=="lncRNA"{print $1}' novel.longRNA.txt|perl !{baseDir}/bin/extract_gtf_by_name.pl !{cuffmergegtf} - > novel.lncRNA.stringent.gtf + awk '$4 >1&&$5=="TUCP"{print $1}' novel.longRNA.txt|perl !{baseDir}/bin/extract_gtf_by_name.pl !{cuffmergegtf} - > novel.TUCP.stringent.gtf + ''' +} + +/* +*Step 10: Further filtered lncRNAs with known criterion +*/ +process Summary_renaming_and_classification { + publishDir "${params.outdir}/Identified_lncRNA", mode: 'copy' + + + input: + file knowlncRNAgtf from KnownLncRNAgtf + file gencode_protein_coding_gtf from proteinCodingGTF + file novel_lncRNA_stringent_Gtf from novel_lncRNA_stringent_gtf + file fasta + file mod_file_for_rename + + output: +// file "lncRNA.final.v2.gtf" into finalLncRNA_gtf +// file "lncRNA.final.v2.map" into finalLncRNA_map + file "protein_coding.final.gtf" into final_protein_coding_gtf + file "all_lncRNA_for_classifier.gtf" into finalLncRNA_for_class_gtf + file "final_all.gtf" into finalGTF_for_quantification_gtf, finalGTF_for_annotate_gtf + file "final_all.fa" into finalFasta_for_quantification_gtf + file "protein_coding.fa" into final_coding_gene_for_CPAT_fa + file "lncRNA.fa" into final_lncRNA_for_CPAT_fa + file "lncRNA_classification.txt" into lncRNA_classification + file "lncRNA.mapping.file" into rename_mapping_file + //file "lncRNA.final.CPAT.out" into lncRNA_CPAT_statistic + //file "protein_coding.final.CPAT.out" into protein_coding_CPAT_statistic + + shell: + + + if(params.species=="human"){ + ''' + gffcompare -G -o filter \ + -r !{knowlncRNAgtf} \ + -p !{task.cpus} !{novel_lncRNA_stringent_Gtf} + awk '$3 =="u"||$3=="x"{print $5}' filter.novel.lncRNA.stringent.gtf.tmap |sort|uniq| \ + perl !{baseDir}/bin/extract_gtf_by_name.pl !{novel_lncRNA_stringent_Gtf} - > novel.lncRNA.stringent.filter.gtf + + #rename lncRNAs according to neighbouring protein coding genes + awk '$3 =="gene"{print }' !{gencode_protein_coding_gtf} | perl -F'\\t' -lane '$F[8]=~/gene_id "(.*?)";/ && print join qq{\\t},@F[0,3,4],$1,@F[5,6,1,2,7,8,9]' - | \ + sort-bed - > gencode.protein_coding.gene.bed + gtf2bed < novel.lncRNA.stringent.filter.gtf |sort-bed - > novel.lncRNA.stringent.filter.bed + gtf2bed < !{knowlncRNAgtf} |sort-bed - > known.lncRNA.bed + + perl !{baseDir}/bin/rename_lncRNA_2.pl gencode_annotation_gtf_mod.gtf lncipedia_mod.gtf + # mv lncRNA.final.v2.gtf all_lncRNA_for_classifier.gtf + grep -v NA-1-1 lncRNA.final.v2.gtf > all_lncRNA_for_classifier.gtf + perl !{baseDir}/bin/rename_proteincoding.pl !{gencode_protein_coding_gtf}> protein_coding.final.gtf + cat all_lncRNA_for_classifier.gtf protein_coding.final.gtf > final_all.gtf + gffread final_all.gtf -g !{fasta} -w final_all.fa -W + gffread all_lncRNA_for_classifier.gtf -g !{fasta} -w lncRNA.fa -W + gffread protein_coding.final.gtf -g !{fasta} -w protein_coding.fa -W + #classification + perl !{baseDir}/bin/lincRNA_classification.pl all_lncRNA_for_classifier.gtf !{gencode_protein_coding_gtf} lncRNA_classification.txt + + + ''' + }else{ + ''' + gffcompare -G -o filter \ + -r !{knowlncRNAgtf} \ + -p !{task.cpus} !{novel_lncRNA_stringent_Gtf} + awk '$3 =="u"||$3=="x"{print $5}' filter.novel.lncRNA.stringent.gtf.tmap |sort|uniq| \ + perl !{baseDir}/bin/extract_gtf_by_name.pl !{novel_lncRNA_stringent_Gtf} - > novel.lncRNA.stringent.filter.gtf + + #rename lncRNAs according to neighbouring protein coding genes + awk '$3 =="gene"{print }' !{gencode_protein_coding_gtf} | perl -F'\\t' -lane '$F[8]=~/gene_id "(.*?)";/ && print join qq{\\t},@F[0,3,4],$1,@F[5,6,1,2,7,8,9]' - | \ + sort-bed - > gencode.protein_coding.gene.bed + gtf2bed < novel.lncRNA.stringent.filter.gtf |sort-bed - > novel.lncRNA.stringent.filter.bed + gtf2bed < !{knowlncRNAgtf} |sort-bed - > known.lncRNA.bed + perl !{baseDir}/bin/rename_lncRNA_2.pl non_human_mod.gtf + # mv lncRNA.final.v2.gtf all_lncRNA_for_classifier.gtf + grep -v NA-1-1 lncRNA.final.v2.gtf > all_lncRNA_for_classifier.gtf + perl !{baseDir}/bin/rename_proteincoding.pl !{gencode_protein_coding_gtf}> protein_coding.final.gtf + cat all_lncRNA_for_classifier.gtf protein_coding.final.gtf > final_all.gtf + gffread final_all.gtf -g !{fasta} -w final_all.fa -W + gffread all_lncRNA_for_classifier.gtf -g !{fasta} -w lncRNA.fa -W + gffread protein_coding.final.gtf -g !{fasta} -w protein_coding.fa -W + #classification + perl !{baseDir}/bin/lincRNA_classification.pl all_lncRNA_for_classifier.gtf !{gencode_protein_coding_gtf} lncRNA_classification.txt + + + ''' + } + +} + +/* +*Step 11: Rerun CPAT to evaluate the results +*/ +//evaluate lncRNA +process Rerun_CPAT_to_evaluate_lncRNA { + input: + file lncRNA_final_cpat_fasta from final_lncRNA_for_CPAT_fa + output: + file "lncRNA.final.CPAT.out" into final_lncRNA_CPAT_result + shell: + + if(params.species=="human"){ + ''' + cpat.py -g !{lncRNA_final_cpat_fasta} \ + -x !{baseDir}/bin/cpat_model/Human_Hexamer.tsv \ + -d !{baseDir}/bin/cpat_model/Human_logitModel.RData \ + -o lncRNA.final.CPAT.out + ''' + }else if (params.species=="mouse"){ + ''' + cpat.py -g !{lncRNA_final_cpat_fasta} \ + -x !{baseDir}/bin/cpat_model/Mouse_Hexamer.tsv \ + -d !{baseDir}/bin/cpat_model/Mouse_logitModel.RData \ + -o lncRNA.final.CPAT.out + ''' + + }else if (params.species=="zebrafish"){ + ''' + cpat.py -g !{lncRNA_final_cpat_fasta} \ + -x !{baseDir}/bin/cpat_model/zebrafish_Hexamer.tsv \ + -d !{baseDir}/bin/cpat_model/zebrafish_logitModel.RData \ + -o lncRNA.final.CPAT.out + ''' + }else { + ''' + cpat.py -g !{lncRNA_final_cpat_fasta} \ + -x !{baseDir}/bin/cpat_model/fly_Hexamer.tsv \ + -d !{baseDir}/bin/cpat_model/fly_logitModel.RData \ + -o lncRNA.final.CPAT.out + ''' + } +} +//evaluate coding +process Rerun_CPAT_to_evaluate_coding { + input: + file final_coding_gene_for_CPAT from final_coding_gene_for_CPAT_fa + output: + file "protein_coding.final.CPAT.out" into final_coding_gene_CPAT_result + shell: + ''' + cpat.py -g !{final_coding_gene_for_CPAT} \ + -x !{baseDir}/bin/cpat_model/Human_Hexamer.tsv \ + -d !{baseDir}/bin/cpat_model/Human_logitModel.RData \ + -o protein_coding.final.CPAT.out + ''' +} +//summary result +process Secondary_basic_statistic { + + input: + file protein_coding_final_gtf from final_protein_coding_gtf + file all_lncRNA_for_classifier_gtf from finalLncRNA_for_class_gtf + file lncRNA_cds from final_lncRNA_CPAT_result + file coding_gene_cds from final_coding_gene_CPAT_result + file lncRNA_class from lncRNA_classification + output: + file "basic_charac.txt" into statistic_result + + shell: + ''' + #!/usr/bin/perl -w + #since CPAT arbitrarily transforms gene names into upper case, we apply 'uc' function to keep the genenames' consistency. + use strict; + open OUT,">basic_charac.txt" or die; + + open FH,"all_lncRNA_for_classifier.gtf" or die; + + my %class; + my %g2t; + my %trans_len; + my %exon_num; + while(){ + chomp; + my @field=split "\t"; + $_=~/gene_id "(.+?)"/; + my $gid=$1; + $_=~/transcript_id "(.+?)"/; + my $tid=uc($1); + $class{$tid}=$field[1]; + $g2t{$tid}=$gid; + my $len=$field[4]-$field[3]; + $trans_len{$tid}=(exists $trans_len{$tid})?$trans_len{$tid}+$len:$len; + $exon_num{$tid}=(exists $exon_num{$tid})?$exon_num{$tid}+1:1; + } + open FH,"protein_coding.final.gtf" or die; + + while(){ + chomp; + my @field=split "\t"; + $_=~/gene_id "(.+?)"/; + my $gid=uc($1); + $_=~/transcript_id "(.+?)"/; + my $tid=$1; + $class{$tid}="protein_coding"; + $g2t{$tid}=$gid; + my $len=$field[4]-$field[3]; + $trans_len{$tid}=(exists $trans_len{$tid})?$trans_len{$tid}+$len:$len; + $exon_num{$tid}=(exists $exon_num{$tid})?$exon_num{$tid}+1:1; + } + + my %lin_class; + open IN,"lncRNA_classification.txt" or die; #change the file name + while(){ + chomp; + my @data = split /\\t/,$_; + $lin_class{$data[0]} = $data[1]; + } + open FH,"lncRNA.final.CPAT.out" or die; + + ; + + while(){ + chomp; + my @field=split "\t"; + my $tid=uc($field[0]); + my $class; + if (defined($lin_class{$tid})){ + $class = $lin_class{$tid}; + }else{ + $class = 'NA'; + } + print OUT $g2t{$tid}."\t".$tid."\t".$class{$tid}."\t".$field[5]."\t".$trans_len{$tid}."\t".$exon_num{$tid}."\t".$class."\n"; + } + + open FH,"protein_coding.final.CPAT.out" or die; + + ; + + while(){ + chomp; + my @field=split "\t"; + my $tid=uc($field[0]); + my $class; + if (defined($lin_class{$tid})){ + $class = $lin_class{$tid}; + }else{ + $class = 'protein_coding'; + } + print OUT $g2t{$tid}."\t".$tid."\t".$class{$tid}."\t".$field[5]."\t".$trans_len{$tid}."\t".$exon_num{$tid}."\t".$class."\n"; + } + + ''' +} + + + +//Keep the channel as constant variable to be used several times in quantification analysis + +//The following code is designed for use if the merged_gtf have already been generated previously. +if(!params.merged_gtf){ + /* +*Step 11: Quantification step (Kallisto/Htseq) +*/ + if(params.quant=="htseq"){ + process Run_htseq_for_quantification{ + tag { file_tag } + input: + set val(samplename),file(bamfile) from forHtseqMappedReads + file final_gtf from finalGTF_for_quantification_gtf + + output: + file "${file_tag_new}.htseq.count " into htseq_tcv_collection + + shell: + + file_tag = samplename + file_tag_new = file_tag + if(params.unstrand){ + ''' + sambamba view !{bamfile} > !{samplename}.sam # resolved error caused by bam and htseq version conflicts + htseq-count -t exon -i gene_id -s no -r pos -f sam !{samplename}.sam !{final_gtf} > !{samplename}.htseq.count + rm !{samplename}.sam + ''' + }else { + ''' + sambamba view !{bamfile} > !{samplename}.sam # resolved error caused by bam and htseq version conflicts + htseq-count -t exon -i gene_id -r pos -f sam !{samplename}.sam !{final_gtf} > !{samplename}.htseq.count + rm !{samplename}.sam + ''' + } + + + + } + }else{ + process Build_kallisto_index_of_GTF_for_quantification { + + input: + file transript_fasta from finalFasta_for_quantification_gtf + + output: + file "transcripts.idx" into final_kallisto_index + + shell: + ''' + #index kallisto reference + kallisto index -i transcripts.idx !{transript_fasta} + + ''' + } + constant_kallisto_index = final_kallisto_index.first() + process Run_kallisto_for_quantification { + + + tag { file_tag } + label 'para' + + input: + file kallistoIndex from constant_kallisto_index + set val(samplename), file(pair) from readPairs_for_kallisto + + output: + file "${file_tag_new}_abundance.tsv" into kallisto_tcv_collection + + shell: + file_tag = samplename + file_tag_new = file_tag + if (params.singleEnd) { + println print_purple("Quantification by kallisto in single end mode") + ''' + #quantification by kallisto in single end mode + kallisto quant -i !{kallistoIndex} -o !{file_tag_new}_kallisto -t !{task.cpus} -b 100 --single -l 180 -s 20 !{pair} + mv !{file_tag_new}_kallisto/abundance.tsv !{file_tag_new}_abundance.tsv + ''' + + + } else { + println print_purple("quantification by kallisto in paired end mode") + ''' + #quantification by kallisto + kallisto quant -i !{kallistoIndex} -o !{file_tag_new}_kallisto -t !{task.cpus} -b 100 !{pair[0]} !{pair[1]} + mv !{file_tag_new}_kallisto/abundance.tsv !{file_tag_new}_abundance.tsv + ''' + } + } + } + +}else{ + /* +*Step 11: Quantification step (Kallisto/Htseq) +*/ + if(params.quant=="htseq"){ + exit 0, print_red("htseq can not be applicable without mapping step, plz set quant tool using `kallisto`") + }else { + process Build_kallisto_index_of_GTF_for_quantification_2 { + + + input: + file transript_fasta from finalFasta_for_quantification_gtf + + output: + file "transcripts.idx" into final_kallisto_index + + shell: + ''' + #index kallisto reference + kallisto index -i transcripts.idx !{transript_fasta} + + ''' + } + constant_kallisto_index = final_kallisto_index.first() + process Run_kallisto_for_quantification_2 { + + + tag { file_tag } + label 'para' + + input: + file kallistoIndex from constant_kallisto_index + set val(samplename), file(pair) from readPairs_for_kallisto + file tempfiles from fastqc_for_waiting2 + output: + file "${file_tag_new}_abundance.tsv" into kallisto_tcv_collection + + shell: + file_tag = samplename + file_tag_new = file_tag + if (params.singleEnd) { + println print_purple("Quantification by kallisto in single end mode") + ''' + #quantification by kallisto in single end mode + kallisto quant -i !{kallistoIndex} -o !{file_tag_new}_kallisto -t !{task.cpus} -b 100 --single -l 180 -s 20 !{pair} + mv !{file_tag_new}_kallisto/abundance.tsv !{file_tag_new}_abundance.tsv + + ''' + + + } else { + println print_purple("Quantification by kallisto in paired end mode") + ''' + #quantification by kallisto + kallisto quant -i !{kallistoIndex} -o !{file_tag_new}_kallisto -t !{task.cpus} -b 100 !{pair[0]} !{pair[1]} + mv !{file_tag_new}_kallisto/abundance.tsv !{file_tag_new}_abundance.tsv + ''' + } + } + } +} + + +/* +*Step 12: Generate count matrix for differential expression analysis +*/ + +if(params.quant=="htseq"){ + process Get_HTseq_matrix { + tag { file_tag } + publishDir pattern: "htseq*.txt", + path: "${params.outdir}/Quantification/", mode: 'copy' + input: + file abundance_tsv_matrix from htseq_tcv_collection.collect() + file annotated_gtf from finalGTF_for_annotate_gtf + output: + file "htseq.count.txt" into expression_matrixfile_count + + shell: + file_tag = "htseq" + ''' + perl !{baseDir}/bin/get_map_table.pl final_all.gtf > map.file + R CMD BATCH !{baseDir}/bin/get_htseq_matrix.R + ''' + } +}else{ + process Get_kallisto_matrix { + tag { file_tag } + publishDir pattern: "kallisto*.txt", + path: "${params.outdir}/Quantification/", mode: 'copy' + input: + file abundance_tsv_matrix from kallisto_tcv_collection.collect() + file annotated_gtf from finalGTF_for_annotate_gtf + output: + file "kallisto.count.txt" into expression_matrixfile_count + file "kallisto.tpm.txt" into expression_matrixfile_tpm + + shell: + file_tag = "Kallisto" + ''' + perl !{baseDir}/bin/get_map_table.pl --gtf_file=final_all.gtf > map.file + R CMD BATCH !{baseDir}/bin/get_kallisto_matrix.R + ''' + } +} + +/* +Step 13: Perform Differential Expression analysis and generate report + */ + +// Initialize parameter for lncPipeReporter +lncRep_Output = params.lncRep_Output +lncRep_theme = params.lncRep_theme +lncRep_cdf_percent = params.lncRep_cdf_percent +lncRep_max_lnc_len = params.lncRep_max_lnc_len +lncRep_min_expressed_sample = params.lncRep_min_expressed_sample +detools = params.detools +design= params.design +if(design!=null){ + design = file(params.design) + if (!design.exists()) exit 1, "Design file not found, plz check your design file path: ${params.design}" + + if(!params.merged_gtf) { + process Run_LncPipeReporter { + tag { file_tag } + publishDir pattern: "LncPipeReports", + path: "${params.outdir}/", mode: 'copy' + input: + //alignmet log + file design + file alignmetlogs from alignment_logs.collect() + //gtf statistics + file basic_charac from statistic_result + //Expression matrix + file kallisto_count_matrix from expression_matrixfile_count + + output: + file "LncPipeReports" into final_output + shell: + file_tag = "Generating report ..." + """ + Rscript -e "library(LncPipeReporter);run_reporter(input='.', output = 'reporter.html',output_dir='./LncPipeReports',de.method=\'${detools}\',theme = 'npg',cdf.percent = ${lncRep_cdf_percent},max.lncrna.len = ${lncRep_max_lnc_len},min.expressed.sample = ${lncRep_min_expressed_sample}, ask = FALSE)" + """ + } + }else{ + process Run_LncPipeReporter_2 { + tag { file_tag } + publishDir pattern: "LncPipeReports", + path: "${params.outdir}/", mode: 'copy' + input: + //alignment log + file design + //gtf statistics + file basic_charac from statistic_result + //Expression matrix + file kallisto_count_matrix from expression_matrixfile_count + + output: + file "LncPipeReports" into final_output + shell: + file_tag = "Generating report ..." + """ + perl -F':|,' -lanE'BEGIN{say qq{SampleID\tcondition}} $del = shift @F; say qq{$_\t$del} for @F' ${design} > design.matrix + Rscript -e "library(LncPipeReporter);run_reporter(input='.', output = 'reporter.html',output_dir='./LncPipeReports',de.method=\'${detools}\',theme = 'npg',cdf.percent = ${lncRep_cdf_percent},max.lncrna.len = ${lncRep_max_lnc_len},min.expressed.sample = ${lncRep_min_expressed_sample}, ask = FALSE)" + """ + } + } + +}else{ + if(!params.design) { + process Run_LncPipeReporter_without_Design { + tag { file_tag } + publishDir pattern: "LncPipeReports", + path: "${params.outdir}/", mode: 'copy' + input: + //alignmet log + file alignmetlogs from alignment_logs.collect() + //gtf statistics + file basic_charac from statistic_result + //Expression matrix + file kallisto_count_matrix from expression_matrixfile_count + + output: + file "LncPipeReports" into final_output + shell: + file_tag = "Generating report ..." + """ + Rscript -e "library(LncPipeReporter);run_reporter(input='.', output = 'reporter.html',output_dir='./LncPipeReports',de.method=\'${detools}\',theme = 'npg',cdf.percent = ${lncRep_cdf_percent},max.lncrna.len = ${lncRep_max_lnc_len},min.expressed.sample = ${lncRep_min_expressed_sample}, ask = FALSE)" + """ + } + }else{ + process Run_LncPipeReporter_without_Design_2 { + tag { file_tag } + publishDir pattern: "LncPipeReports", + path: "${params.outdir}/", mode: 'copy' + input: + //alignment log + //gtf statistics + file basic_charac from statistic_result + //Expression matrix + file kallisto_count_matrix from expression_matrixfile_count + + output: + file "*" into final_output + shell: + file_tag = "Generating report ..." + + """ + Rscript -e "library(LncPipeReporter);run_reporter(input='.', output = 'reporter.html',output_dir='./LncPipeReports',de.method=\'${detools}\',theme = 'npg',cdf.percent = ${lncRep_cdf_percent},max.lncrna.len = ${lncRep_max_lnc_len},min.expressed.sample = ${lncRep_min_expressed_sample}, ask = FALSE)" + """ + + + } + } +} + + + +//pipeline log + +workflow.onComplete { + + log.info print_green("LncPipe Pipeline Complete!") + + //email information + if (params.mail) { + recipient = params.mail + def subject = 'My LncPipe execution' + + ['mail', '-s', subject, recipient].execute() << + """ + + LncPipe execution summary + --------------------------- + Your command line: ${workflow.commandLine} + Completed at: ${workflow.complete} + Duration : ${workflow.duration} + Success : ${workflow.success} + workDir : ${workflow.workDir} + exit status : ${workflow.exitStatus} + Error report: ${workflow.errorReport ?: '-'} + + """ + } + + +} + +workflow.onError { + println print_yellow("Oops... Pipeline execution stopped with the following message: ")+print_red(workflow.errorMessage) +} + diff --git a/mkdoc/docs/about.md b/mkdoc/docs/about.md deleted file mode 100644 index e69de29..0000000 diff --git a/mkdoc/docs/index.md b/mkdoc/docs/index.md deleted file mode 120000 index fe84005..0000000 --- a/mkdoc/docs/index.md +++ /dev/null @@ -1 +0,0 @@ -../../README.md \ No newline at end of file diff --git a/mkdoc/mkdocs.yml b/mkdoc/mkdocs.yml deleted file mode 100644 index 69ecf01..0000000 --- a/mkdoc/mkdocs.yml +++ /dev/null @@ -1,5 +0,0 @@ -site_name: LncPipe -pages: - - Home: index.md - - About: about.md -theme: yeti diff --git a/nextflow.config b/nextflow.config index a1020f1..0580d9a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,29 +1,29 @@ +params { + container = 'nfcore/lncpipe:dev' + help = false + clusterOptions = false + species="human"// mouse , zebrafish, fly + outdir = './Results' + tracedir = "${params.outdir}/pipeline_info" -params { -/* - User setting options (mandatory) - */ // input file and genome reference() - species="human"// mouse , zebrafish, fly - fastq_ext = '*_{1,2}.fq.gz' - fasta_ref = '/data/database/hg38/genome.fa' + reads = '*_{1,2}.fq.gz' + fasta = '/data/database/hg38/genome.fa' design = 'design.file' // or null hisat2_index = '/data/database/hg38/hisatIndex/grch38_snp_tran/genome_snp_tran' - cpatpath='/opt/CPAT-1.2.3' //human gtf only gencode_annotation_gtf = "/data/database/hg38/Annotation/gencode.v24.annotation.gtf" lncipedia_gtf = "/data/database/hg38/Annotation/lncipedia_4_0_hg38.gtf" // set "null" if you are going to perform analysis on other species // additional options for non-human species known_coding_gtf="" known_lncRNA_gtf="" - //for test - cpatpath = '/home/zhaoqi/software/CPAT/CPAT-1.2.2/' /* - User setting options (optional) - */ + User setting options (optional) +*/ + // tools settings hisat_strand = 'RF' star_index = ''//set if star used @@ -43,24 +43,11 @@ params { lncRep_cdf_percent = 10 lncRep_max_lnc_len = 10000 lncRep_min_expressed_sample = 50 - mem = 60 - cpus = 30 } // individual process setting process.cache = 'deep' -process { - - - withLabel: para { - maxForks = 6 - } - - withLabel: 'qc' { - maxForks = 6 - } -} /* * ------------------------------------------------- * nf-core/lncpipe Nextflow config file @@ -72,20 +59,7 @@ process { */ // Global default params, used in configs -params { - container = 'nfcore/lncpipe:latest' // Container slug. Stable releases should specify release tag! - - help = false - reads = "data/*{1,2}.fastq.gz" - singleEnd = false - outdir = './results' - igenomes_base = "./iGenomes" - tracedir = "${params.outdir}/pipeline_info" - clusterOptions = false - awsqueue = false - awsregion = 'eu-west-1' -} profiles { @@ -101,11 +75,7 @@ profiles { singularity.enabled = true process.container = {"shub://${params.container.replace('nfcore', 'nf-core')}"} } - awsbatch { - includeConfig 'conf/base.config' - includeConfig 'conf/awsbatch.config' - includeConfig 'conf/igenomes.config' - } + test { includeConfig 'conf/base.config' includeConfig 'conf/test.config' @@ -139,10 +109,9 @@ dag { manifest { name = 'nf-core/lncpipe' - author = 'Project author name (use a comma to separate multiple names).' + author = 'Qi Zhao, Yu Sun, Zhixiang Zuo' homePage = 'https://github.com/nf-core/lncpipe' description = 'LncPipe:a Nextflow-based Long non-coding RNA analysis PIPELINE' - mainScript = 'LncRNAanalysisPipe.nf' nextflowVersion = '>=0.32.0' version = '1.0dev' } diff --git a/singularity.config b/singularity.config deleted file mode 100644 index 7d144cd..0000000 --- a/singularity.config +++ /dev/null @@ -1,84 +0,0 @@ - - -params { -/* - User setting options (mandatory) - */ -// input file and genome reference() - - fastq_ext = '*_{1,2}.fq.gz' - fasta_ref = '/data/database/hg38/genome.fa' - design = 'design.file' // or null - hisat2_index = '/data/database/hg38/hisatIndex/grch38_snp_tran/genome_snp_tran' - cpatpath='/opt/CPAT-1.2.3' - //human gtf only - gencode_annotation_gtf = "/data/database/hg38/Annotation/gencode.v24.annotation.gtf" - lncipedia_gtf = "/data/database/hg38/Annotation/lncipedia_4_0_hg38.gtf" // set "null" if you are going to perform analysis on other species - -/* - User setting options (optional) - */ - // tools setting - hisat_strand = 'RF' - star_index = ''//set if star used - bowtie2_index = ''//set if tophat used - aligner = "hisat" // or "star","tophat" - sam_processor="sambamba"//or "samtools(deprecated)" - qctools ="fastp" // or "afterqc","fastp","fastqc","none" to skip qc step - detools = "edger"//or "deseq2" - quant = "kallisto"// or 'htseq' - //other setting - singleEnd = false - unstrand = false - skip_combine = false - lncRep_Output = 'reporter.html' - lncRep_theme = 'npg' - lncRep_cdf_percent = 10 - lncRep_max_lnc_len = 10000 - lncRep_min_expressed_sample = 50 - mem=60 - cpu=30 - /* - for non-human setting - */ - species="human"// mouse , zebrafish, fly - known_coding_gtf="" - known_lncRNA_gtf="" - - -/* -Don't modify -*/ - cpatpath ='/opt/CPAT-1.2.3' - - -} - -/* -Don't modify either -*/ -// Docker options - -singularity.enabled = true -process.container = './lncPipe.image' -singularity.autoMounts = true -// individual process setting -process.cache = 'deep' - -process { - withLabel: para { - maxForks = 6 - } - - withLabel: 'qc' { - maxForks = 6 - } - -} - -manifest { - homePage = 'https//github.com/likelet/LncPipe' - description = 'LncPipe:a Nextflow-based Long non-coding RNA analysis PIPELINE' - mainScript = 'LncRNAanalysisPipe.nf' -} -