Skip to content

Commit 505de67

Browse files
committed
Merge branch 'release/1.0.0' into main
TA repeat mean FPBM coverage claculation code
2 parents 207faee + 5a5d7ae commit 505de67

20 files changed

+15982
-2
lines changed

.circleci/config.yml

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
version: 2.1
2+
3+
orbs:
4+
# The python orb contains a set of prepackaged CircleCI configuration you can use repeatedly in your configuration files
5+
# Orb commands and jobs help you with common scripting around a language/tool
6+
# so you dont have to copy and paste it everywhere.
7+
# See the orb documentation here: https://circleci.com/developer/orbs/orb/circleci/python
8+
python: circleci/[email protected]
9+
10+
workflows:
11+
sample: # This is the name of the workflow, feel free to change it to better match your workflow.
12+
# Inside the workflow, you define the jobs you want to run.
13+
# For more details on extending your workflow, see the configuration docs: https://circleci.com/docs/2.0/configuration-reference/#workflows
14+
jobs:
15+
- build-and-test
16+
17+
18+
jobs:
19+
build-and-test: # This is the name of the job, feel free to change it to better match what you're trying to do!
20+
# These next lines defines a Docker executors: https://circleci.com/docs/2.0/executor-types/
21+
# You can specify an image from Dockerhub or use one of the convenience images from CircleCI's Developer Hub
22+
# A list of available CircleCI Docker convenience images are available here: https://circleci.com/developer/images/image/cimg/python
23+
# The executor is the environment in which the steps below will be executed - below will use a python 3.9 container
24+
# Change the version below to your required version of python
25+
docker:
26+
- image: cimg/python:3.8
27+
# Checkout the code as the first step. This is a dedicated CircleCI step.
28+
# The python orb's install-packages step will install the dependencies from a Pipfile via Pipenv by default.
29+
# Here we're making sure we use just use the system-wide pip. By default it uses the project root's requirements.txt.
30+
# Then run your tests!
31+
# CircleCI will report the results back to your VCS provider.
32+
steps:
33+
- checkout
34+
- python/install-packages:
35+
pkg-manager: pip
36+
# app-dir: ~/project/package-directory/ # If you're requirements.txt isn't in the root directory.
37+
# pip-dependency-file: test-requirements.txt # if you have a different name for your requirements file, maybe one that combines your runtime and test requirements.
38+
- run:
39+
name: Run tests
40+
# This assumes pytest is installed via the install-package step above
41+
command: pytest

.dockerignore

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/LICENCE
2+
/INSTALL
3+
/CHANGES.md
4+
/.gitignore
5+
/.git
6+
/.travis.yml
7+
/env
8+
/dist

.gitignore

+28
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ share/python-wheels/
2626
.installed.cfg
2727
*.egg
2828
MANIFEST
29+
out*
30+
test.txt
31+
tmpout
32+
tests/test_*/*.vcf
33+
*.log
2934

3035
# PyInstaller
3136
# Usually these files are written by a python script from a template
@@ -127,3 +132,26 @@ dmypy.json
127132

128133
# Pyre type checker
129134
.pyre/
135+
env
136+
**/__pycache__/
137+
**/*.pyc
138+
*.py[cod]
139+
*$py.class
140+
.coverage
141+
cover/
142+
htmlcov
143+
/.eggs
144+
*.egg
145+
/build
146+
/dist
147+
*.log
148+
/results.tsv
149+
/.cache*
150+
/distribute*
151+
/.idea
152+
/tmp*
153+
.nfs*
154+
.idea
155+
.pytest_cache*
156+
annotateVcf.egg-info/
157+
farm5.sh

CHANGES.md

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
2+
## 1.0.0
3+
* First release to calculate TA repeats FPBM values using samtools bedcoverage data
4+

Dockerfile

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
FROM ubuntu:20.04 as builder
2+
USER root
3+
4+
5+
6+
ENV ANALYSE_TA_VER '1.0.0'
7+
8+
# install system tools
9+
RUN apt-get -yq update
10+
RUN apt-get install -yq --no-install-recommends \
11+
locales \
12+
g++ \
13+
make \
14+
gcc \
15+
pkg-config \
16+
python3 python3-dev python3-pip python3-setuptools \
17+
zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev \
18+
git
19+
20+
ENV CGP_OPT /opt/wtsi-cgp
21+
RUN mkdir $CGP_OPT
22+
ENV PATH $CGP_OPT/bin:$CGP_OPT/python-lib/bin:$PATH
23+
ENV PYTHONPATH $CGP_OPT/python-lib/lib/python3.8/site-packages
24+
25+
RUN locale-gen en_US.UTF-8
26+
RUN update-locale LANG=en_US.UTF-8
27+
ENV LC_ALL en_US.UTF-8
28+
ENV LANG en_US.UTF-8
29+
30+
# install analyse_ta
31+
RUN pip3 install --upgrade setuptools
32+
33+
RUN pip3 install --install-option="--prefix=$CGP_OPT/python-lib" https://github.com/cancerit/analyse_ta/archive/${ANALYSE_TA_VER}.tar.gz
34+
35+
COPY . .
36+
37+
RUN python3 setup.py sdist
38+
RUN pip3 install --install-option="--prefix=$CGP_OPT/python-lib" dist/$(ls -1 dist/)
39+
40+
FROM ubuntu:20.04
41+
42+
LABEL uk.ac.sanger.cgp="Cancer Genome Project, Wellcome Sanger Institute" \
43+
version="1.0.0" \
44+
description="Tool to perform TA repeat bed coverage analysis"
45+
46+
### security upgrades and cleanup
47+
RUN apt-get -yq update
48+
RUN apt-get install -yq --no-install-recommends \
49+
apt-transport-https \
50+
locales \
51+
ca-certificates \
52+
time \
53+
unattended-upgrades \
54+
python3 \
55+
python-setuptools \
56+
python3-pip \
57+
zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev && \
58+
unattended-upgrade -d -v
59+
RUN apt-get autoremove -yq
60+
61+
RUN locale-gen en_US.UTF-8
62+
RUN update-locale LANG=en_US.UTF-8
63+
ENV CGP_OPT /opt/wtsi-cgp
64+
ENV PATH $CGP_OPT/bin:$CGP_OPT/python-lib/bin:$PATH
65+
ENV PYTHONPATH $CGP_OPT/python-lib/lib/python3.8/site-packages
66+
RUN pip3 install --upgrade setuptools
67+
ENV LD_LIBRARY_PATH $OPT/lib
68+
ENV LC_ALL en_US.UTF-8
69+
ENV LANG en_US.UTF-8
70+
RUN mkdir -p $CGP_OPT
71+
COPY --from=builder $CGP_OPT $CGP_OPT
72+
73+
## USER CONFIGURATION
74+
75+
RUN adduser --disabled-password --gecos '' ubuntu && chsh -s /bin/bash && mkdir -p /home/ubuntu
76+
77+
USER ubuntu
78+
79+
WORKDIR /home/ubuntu
80+
81+
CMD ["/bin/bash"]

README.md

+138-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,138 @@
1-
# ta_analyser
2-
script to analyse ta repeats coverage calculated using bedtools
1+
# analyse_ta
2+
[![cancerit](https://circleci.com/gh/cancerit/analyse_ta.svg?style=svg)](https://circleci.com/gh/cancerit/analyse_ta)
3+
4+
This project hosts script to calculate mean FPBM (fragments per base per million) values for TA repeats using samtools bedcov output
5+
For detailed description on method to calculate the FPBM values please refer [Nature] article.
6+
7+
8+
`samtools bedcov analyse_ta/data/liftover_broken_ta_sorted_fai.bed.gz test.bam >test_br.bedcov
9+
samtools bedcov analyse_ta/data/liftover_non_broken_ta_sorted_fai.bed.gz test.bam >test_nbr.bedcov`
10+
11+
<!-- TOC depthFrom:2 depthTo:6 withLinks:1 updateOnSave:1 orderedList:0 -->
12+
13+
- [Design](#design)
14+
- [Tools](#tools)
15+
- [analyse_ta](#analyse_ta)
16+
- [inputFormat](#inputformat)
17+
- [outputFormat](#outputformat)
18+
- [INSTALL](#install)
19+
- [Package Dependencies](#package-dependencies)
20+
- [Development environment](#development-environment)
21+
- [Development Dependencies](#development-dependencies)
22+
- [Setup VirtualEnv](#setup-virtualenv)
23+
- [Cutting a release](#cutting-a-release)
24+
- [Install via `.whl` (wheel)](#install-via-whl-wheel)
25+
- [Reference](#reference)
26+
27+
<!-- /TOC -->
28+
29+
## Design
30+
31+
Uses pandas>=1.3.1
32+
33+
## Tools
34+
35+
`analyse_ta` has multiple command line options, listed with `analyse_ta --help`.
36+
37+
### analyse_ta
38+
Takes samtools bed coverage as input file for broken and non-broken ta repeat intervals and optional sample_name parameter.
39+
40+
Various exceptions can occur for malformed input files.
41+
42+
### inputFormat
43+
44+
* ```test_br.bedcov``` bed coverage file for broken TA repeats generated using [samtools]
45+
* ```test_nbr.bedcov``` bed coverage file for non-broken TA repeats generated using [samtools]
46+
* ```test_sample``` sample name to be printed with results
47+
48+
### outputFormat
49+
50+
* ```test_sample 6.12 15.8``` command line output <sample_name> <mean_fpbm_broken> <mean_fpbm_non_broken>.
51+
52+
## INSTALL
53+
Installing via `pip install`. Simply execute with the path to the compiled 'whl' found on the [release page][analyse_ta-releases]:
54+
55+
```bash
56+
python3 setup.py sdist bdist_wheel
57+
pip install analyse_ta.X.X.X-py3-none-any.whl
58+
```
59+
60+
Release `.whl` files are generated as part of the release process and can be found on the [release page][analyse_ta-releases]
61+
62+
## Development environment
63+
64+
This project uses git pre-commit hooks. As these will execute on your system it
65+
is entirely up to you if you activate them.
66+
67+
If you want tests, coverage reports and lint-ing to automatically execute before
68+
a commit you can activate them by running:
69+
70+
```
71+
git config core.hooksPath git-hooks
72+
```
73+
74+
Only a test failure will block a commit, lint-ing is not enforced (but please consider
75+
following the guidance).
76+
77+
You can run the same checks manually without a commit by executing the following
78+
in the base of the clone:
79+
80+
```bash
81+
./run_tests.sh
82+
```
83+
84+
### Development Dependencies
85+
86+
pytest
87+
radon
88+
pytest-cov
89+
90+
#### Setup VirtualEnv
91+
92+
```
93+
cd $PROJECTROOT
94+
hash virtualenv || pip3 install virtualenv
95+
virtualenv -p python3 env
96+
source env/bin/activate
97+
python setup.py develop # so bin scripts can find module
98+
```
99+
100+
For testing/coverage (`./run_tests.sh`)
101+
102+
```
103+
source env/bin/activate # if not already in env
104+
pip install pytest
105+
pip install radon
106+
pip install pytest-cov
107+
```
108+
109+
__Also see__ [Package Dependancies](#package-dependancies)
110+
111+
### Cutting a release
112+
113+
__Make sure the version is incremented__ in `./setup.py`
114+
115+
### Install via `.whl` (wheel)
116+
117+
Generate `.whl`
118+
119+
```bash
120+
source env/bin/activate # if not already
121+
python setup.py bdist_wheel -d dist
122+
```
123+
124+
Install .whl
125+
126+
```bash
127+
# this creates an wheel archive which can be copied to a deployment location, e.g.
128+
scp dist/analyse_ta.X.X.X-py3-none-any.whl user@host:~/wheels
129+
# on host
130+
pip install --find-links=~/wheels analyse_ta
131+
```
132+
133+
### Reference
134+
<!--refs-->
135+
[Nature]:https://www.nature.com/articles/s41586-020-2769-8
136+
[samtools]: http://www.htslib.org
137+
[analyse_ta-releases]: https://github.com/cancerit/analyse_ta/releases
138+

analyse_ta/__init__.py

Whitespace-only changes.

analyse_ta/commandline.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import analyse_ta.process_bedcov as processcov
2+
import sys
3+
import os
4+
import argparse
5+
import pkg_resources
6+
# load config and reference files....
7+
8+
version = pkg_resources.require("analyse_ta")[0].version
9+
10+
11+
def main(): # pragma: no cover <--
12+
usage = "\n %prog [options] -br input_br.bedcov -nbr input_nbr.bedcov -s <sample>"
13+
14+
optParser = argparse.ArgumentParser(prog='analyse_ta',
15+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
16+
optional = optParser._action_groups.pop()
17+
required = optParser.add_argument_group('required arguments')
18+
19+
required.add_argument("-br", "--file_br", type=str, dest="file_br", required=True,
20+
default=None, help="broken ta repeat bed coverage file")
21+
required.add_argument("-nbr", "--file_nbr", type=str, dest="file_nbr", required=True,
22+
default=None, help="non broken ta repeat bed coverage file")
23+
24+
optional.add_argument("-s", "--sample_name", type=str, dest="sample_name", required=False,
25+
default='test_sample', help="sample name")
26+
optional.add_argument("-v", "--version", action='version', version='%(prog)s ' + version)
27+
optional.add_argument("-q", "--quiet", action="store_false", dest="verbose", required=False, default=True)
28+
29+
optParser._action_groups.append(optional)
30+
if len(sys.argv) == 0:
31+
optParser.print_help()
32+
sys.exit(1)
33+
opts = optParser.parse_args()
34+
if not opts.file_nbr or not opts.file_br:
35+
sys.exit('\nERROR Arguments required\n\tPlease run: analyse_ta.py --help\n')
36+
# vars function returns __dict__ of Namespace instance
37+
processed = processcov.processBedCov(**vars(opts))
38+
print(processed.results)
39+
40+
41+
if __name__ == '__main__':
42+
main()
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)