-
Notifications
You must be signed in to change notification settings - Fork 0
/
config-HiC+Phase.yaml
105 lines (89 loc) · 4.07 KB
/
config-HiC+Phase.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Specify output directory - either absolute path or relative to Snakefile.
# If using relative paths for subsequent files, these should be relative to
# this working directory.
workdir: GM12878-HiC/
# CSV file with cell type, experimental group, replicate number,
# read (forward/reverse) and path each FASTQ files.
data: ../config/samples.tsv
# Bed file of genomic regions to perform HiC analysis.
# These may be whole chromosomes for normal HiC or specific capture regions
# for region capture HiC.
regions: ../config/regions.bed
# FASTA references to align data. Must specify a reference for each cell type
# defined in config['data'].
genome :
S2Rplus : ../genome/GRCh38.fa.gz
build: GRCh38
# Set True to perform phasing and haplotype assembly pipeline.
phase: True
# Path to unphased variant calls, tagged by cell type
unphasedVCF:
GM12878 : ../genome/GM12878-unphasedVariants.vcf
# List of binsizes to analyse HiC data at different resolutions.
# The first binsize defines the base resolution, all subsequence bin sizes
# must be whole divisible by the base bin size e.g. [1000, 1500] is invalid.
resolution:
base : 1000
bins : [3000]
# Parameters for cutadapt - see https://cutadapt.readthedocs.io/en/stable/guide.html
cutadapt:
forwardAdapter: AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
reverseAdapter: AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
overlap: 6
errorRate: 0.1
minimumLength: 20
qualityCutoff: 20
GCcontent: 43
# List of restriction sequence in order of protocol usage. Cut site is denoted
# using the '^' symbol. Ensure restriction enzyme names are given as strings.
restrictionSeqs:
A:
DpnII : '^GATC'
HiCParams:
minBins: 50
minDistance: 300
maxLibraryInsertSize: 1000
minMappingQuality: 15
keepSelfLigation: False
keepSelfCircles: False
nofill: False
makeBam: False
compartmentScore: False # Run Cscore compartment analysis.
threads: 4 # Threads for building HiC matrix.
multiplicativeValue: 10000 # HiC counts are normalised to this value.
microC: False # Treat data as microC (experimental)
compareMatrices:
colourmap: bwr # Matplotlib colour map for differential comparisons.
zThreshold: 2 # Minimum Z-score for defining a diffTAD / ASTAD.
vMax: 1 # Absolute value of colour scale, vMin set to -vMax.
allPairs: False # Compute all pairwise comparisons (e.g. A vs. B, and B vs. A)
minSum: 0 # Remove interactions with fewer than this number of raw interactions.
plotParams:
distanceNorm: False # Plot obs/exp matrix instead of log counts.
plain: True # Plot additional figure without overlaid TAD / loop annotations.
raw: False # Plot raw HiC matrix (without KR normalisation).
colourmap: Purples # Matplotlib colourmap to use for HiC matrices.
coordinates: ../config/plot_coordinates.bed # BED file of regions to plot.
viewpoints: ../config/viewpoints.bed
viewpointRange: 150_000
plotRep: True # Plot individual replicates in addition to merged data.
vLines: ../config/vlines.bed # Coordinates to add vLines to plots.
includeRegions: True # Plot config['regions'] in addition to config['plotParams']['coordinates'].
filetype: svg # Plot filetype.
# Gene annotation to include with HiC plots.
Genes:
gff3: ../genome/Homo_sapiens.GRCh38.104.gff3.gz # Path to GFF3 gene annotation.
typeKey: biotype # Attribute key describing gene type
label: Name # Attribute key to label on plot
geneID: gene_id # Attribute key identifying gene ID
# Assorted BigWig annotations include with HiC plots
bigWig:
CTCF: ../genome/GRCh38-CTCF.bigWig
QC:
runQC : True
flipSNP : False
QCsample : 100000
fastqScreen : ../config/fastq_screen.config
multiQCconfig: ../config/multiqc_config.yaml
runHiCRep : True # Run HiCRep - May take a while for high res datasets.
HiCRep_bin : 3000