diff --git a/.test/config/config.yaml b/.test/config/config.yaml
index f4a0a21..0dc0ce5 100644
--- a/.test/config/config.yaml
+++ b/.test/config/config.yaml
@@ -28,7 +28,7 @@ small_variants: DV.vcf.gz
 #panel: '/cluster/work/pausch/alex/eQTL_GWAS/pangenie_panel.vcfwave.vcf'
 fastq: ''
 
-HiFi:
+HiFi_samples:
   sample1: hifi_1.fq.gz
   sample2: hifi_2.fq.gz
 
@@ -55,6 +55,12 @@ covariates:
     Testis: 'eQTL.covar'
   sQTL:
     Testis: 'sQTL.covar'
+mol_QTLs:
+  eQTL:
+    Testis: 'eQTL.TPM'
+  sQTL:
+    Testis: 'sQTL.clusters'
+
 
 permutations: 2500
 window: 1000000 #1 Mb
diff --git a/.test/eQTL.TPM b/.test/eQTL.TPM
new file mode 100644
index 0000000..e69de29
diff --git a/.test/sQTL.clusters b/.test/sQTL.clusters
new file mode 100644
index 0000000..e69de29
diff --git a/README.md b/README.md
index a221dc7..0b49244 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,30 @@ Structural variants are known to play a large role in expression and splicing QT
 However, confidently calling stuctural variants in a sufficiently large population for association mapping is hard.
 Here we use [PanGenie](https://github.com/eblerjana/pangenie) to genotype a larger cohort (100s) of short reads using an accurate pangenome panel from haplotype-resolved assemblies (10s).
 
-## Usage
+### Usage
 
+There are broadly three phases
+  - Pangenome panel (creating & genotyping)
+  - Variant analysis (statistics, linkage disequibrium, SV overlap, etc.) 
+  - Association mapping of e/sQTL
+
+Running with
+```
+snakemake --configfile config/config.yaml
+```
+Will produce execute the following DAG
+
+![workflow](https://github.com/AnimalGenomicsETH/pangenome_molQTL/assets/29678761/bb0c73ca-fc31-4319-95e2-485da93f655a)
+
+which produces the major output files (e.g., accuracy comparison of PanGenie vs DeepVariant, SV overlap with Jasmine, conditional QTL analysis with QTLtools, etc.), which can then be independently analysed further.
 
 ### Citation
 
 The preprint associated with this work can be found [here](https://www.biorxiv.org/content/10.1101/2023.06.21.545879v1).
+> **Pangenome genotyped structural variation improves molecular phenotype mapping in cattle**
+> 
+> Alexander S. Leonard, Xena M. Mapel, Hubert Pausch
+
+### Note
+Many of the parameters are tuned to run for our data and on the ETH Euler cluster, using for example a forked version of the LSF snakemake [profile](https://github.com/AnimalGenomicsETH/snakemake_lsf), so it may take some modifying to work smoothly in different contexts. Many tools are assumed to be available in $PATH, but all are freely available.
+
diff --git a/Snakefile b/Snakefile
index b5f5e05..ac3e0d4 100644
--- a/Snakefile
+++ b/Snakefile
@@ -2,14 +2,20 @@
 include: 'snakepit/pangenie_panel.smk'
 include: 'snakepit/pangenie_genotyping.smk'
 include: 'snakepit/variant_calling.smk'
+include: 'snakepit/variant_accuracy.smk'
 include: 'snakepit/association_mapping.smk'
 include: 'snakepit/LD.smk'
 
 rule all:
     input:
+        ## Pangenome panel creation and genotyping
         'pangenie_panel/multisample-vcfs/graph-filtered.vcf',
         'pangenie_panel/samples.all.pangenie_genotyping_DV.vcf.gz',
+        ## Variant comparison between pangenome panel, short reads, and HiFi reads
         'jasmine.vcf',
-        'SVs/sizes.gz',
+        expand('SVs/{metric}.gz',metric=('sizes','support','GTs')),
+        expand('SNPs/{metric}.csv',metric=('F1','isec')),
+        ## Linkage disequilibrium analysis of SVs
         expand('LD/samples.SV.{r2}.{window}.tags.list',r2=list(range(2,10))+[99],window=(10,100,1000)),
+        ## Molecular QTL mapping
         expand('QTL/{QTL}/Testis_{variants}/conditionals.01.txt.gz',QTL=('eQTL','sQTL'),variants=('PanGenie','SR'))
diff --git a/region_coverage.sh b/region_coverage.sh
deleted file mode 100644
index 1dae455..0000000
--- a/region_coverage.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-samtools bedcov <(echo -e "14\t1559308\t1605491") $(awk 'NR>205 {print "/cluster/work/pausch/inputs/bam/BTA_eQTL/"$2".bam"}' ../config/large_cohort.yaml)
-for i in $(awk 'NR>205 {print $2}' ../config/large_cohort.yaml); do echo "\"${i}\":$(awk '/total/ {print $1*150/2700000000.0}' /cluster/work/pausch/inputs/bam/BTA_eQTL/${i}.stats)"; done |tr '\n' ','
-bcftools query -f '{["%SAMPLE":"%GT",]\n' -r 14:1559308 variants.normed.vcf.gz
diff --git a/snakepit/SNPs.smk b/snakepit/SNPs.smk
deleted file mode 100644
index d13ac84..0000000
--- a/snakepit/SNPs.smk
+++ /dev/null
@@ -1,50 +0,0 @@
-rule all:
-    input:
-        expand('{asm}.ARS.vcf',asm=config['assemblies'])
-
-rule minimap2_align:
-    input:
-        '/cluster/work/pausch/alex/assembly/SV_graph/fasta/{asm}.fasta'
-    output:
-        '{asm}.ARS.paf'
-    threads: 8
-    resources:
-        mem_mb = 5000,
-        walltime = '2:00'
-    shell:
-        'minimap2 -cx asm5 -t {threads} --cs {config[reference]} {input} > {output}'
-
-rule paf_variants:
-    input:
-        '{asm}.ARS.paf'
-    output:
-        '{asm}.ARS.vcf'
-    resources:
-        mem_mb = 10000,
-        walltime = '30'
-    shell:
-        'sort {input} -k6,6 -k8,8n | paftools.js call -f {config[reference]} - > {output}'
-
-
-rule prep_vcf:
-    input:
-        '{asm}.ARS.vcf'
-    output:
-        multiext('{asm}.ARS.snp.vcf.gz','','.tbi')
-    shell:
-        '''
-        bcftools reheader -s <(echo {wildcards.asm}) {input} | bcftools view -O z -o {output[0]} -v snps - 
-        tabix -p vcf {output[0]}
-        '''
-
-rule bcftools_merge:
-    input:
-        expand('{asm}.ARS.snp.vcf.gz',asm=config['assemblies'])
-    output:
-        multiext('merged.ARS.snp.vcf.gz','','.tbi')
-    threads: 2
-    resources:
-        mem_mb = 2000,
-        walltime = '30'
-    shell:
-        'bcftools merge --threads 2 -O z -o {output[0]} {input}; tabix -p vcf {output[0]}'
diff --git a/snakepit/association_mapping.smk b/snakepit/association_mapping.smk
index 085990e..661f647 100644
--- a/snakepit/association_mapping.smk
+++ b/snakepit/association_mapping.smk
@@ -7,19 +7,6 @@ wildcard_constraints:
     MAF = r'\d+',
     vcf = r'(eQTL|gwas)/\S+'
 
-rule concat_genes:
-    input:
-        lambda wildcards: expand('/cluster/work/pausch/xena/eQTL/gene_counts/{tissue_code}/QTLtools/{chromosome}_gene_counts.gz',chromosome=range(1,30),tissue_code={'Testis':'testis','Epididymis_head':'epi_h','Vas_deferens':'vas_d'}[wildcards.tissue]) if wildcards.QTL=='eQTL' else expand('/cluster/work/pausch/xena/eQTL/sQTL/{tissue_code}/removed/phenotypes/leafcutter.clus.{chromosome}.bed.gz',chromosome=range(1,30),tissue_code={'Testis':'testis','Epididymis_head':'epi_h','Vas_deferens':'vas_d'}[wildcards.tissue])
-    output:
-        'aligned_genes/{QTL}.{tissue}.bed.gz',
-        'aligned_genes/{QTL}.{tissue}.bed.gz.tbi' 
-    localrule: True
-    shell:
-        '''
-        zcat {input} | sort -k1,1n -k2,2n | uniq | bgzip -@ 2 -c > {output[0]}
-        tabix -p bed {output[0]}
-        '''
-
 rule normalise_vcf:
     input:
         lambda wildcards: expand(rules.merge_with_population_SR.output,pangenie_mode='genotyping',allow_missing=True) if wildcards.variants == 'PanGenie' else config['small_variants']
@@ -60,7 +47,7 @@ rule qtltools_parallel:
     input:
         vcf = rules.normalise_vcf.output,
         exclude = rules.exclude_MAF.output,
-        bed = rules.concat_genes.output,
+        bed = lambda wildcards: config['mol_QTLs'][wildcards.QTL][wildcards.tissue],
         cov = lambda wildcards: config['covariates'][wildcards.QTL][wildcards.tissue],
         mapping = lambda wildcards: 'QTL/{QTL}/{tissue}_{variants}/permutations_all.{MAF}.thresholds.txt' if wildcards._pass == 'conditionals' else []
     output:
@@ -101,7 +88,7 @@ rule qtltools_FDR:
         'r/4.2.2'
     shell:
         '''
-        Rscript /cluster/work/pausch/alex/software/qtltools/scripts/qtltools_runFDR_cis.R {input} 0.05 {params.out}
+        Rscript qtltools_runFDR_cis.R {input} 0.05 {params.out}
         '''
 
 rule LD:
diff --git a/snakepit/igv_validate.smk b/snakepit/igv_validate.smk
deleted file mode 100644
index 6b6d0d4..0000000
--- a/snakepit/igv_validate.smk
+++ /dev/null
@@ -1,41 +0,0 @@
-
-rule slice_ARS:
-    input:
-        bam = '/cluster/work/pausch/inputs/bam/BTA_UCD12/{sample}.bam',
-        asm = '/cluster/work/pausch/alex/assembly/BSWCHEM120151536851/hifiasmv152_100/hap2.scaffolds.fasta'
-    output:
-        bam = get_dir('igv','{sample}.TBX3.ARS.bam'),
-        asm = get_dir('igv','{sample}.TBX3X.BSW.bam')
-    threads: 4
-    resources:
-        mem_mb = 5000,
-        disk_scratch = 10,
-        walltime = '1:00'
-    shell:
-        '''
-        samtools view -@ {threads} -o {output.bam} {input.bam} 17:60000000-61000000
-        samtools fastq -@ {threads} {output.bam} | paste - - - -  | sort -k1,1 -S 3G  | tr '\t' '\n' > $TMPDIR/reads.fq
-        minimap2 -t {threads} --cs -axsr {input.asm} <(seqtk seq -1 $TMPDIR/reads.fq) <(seqtk seq -2 $TMPDIR/reads.fq) | samtools sort - -m 1000M -@ {threads} -o {output.asm}
-        samtools index -@ {threads} {output.bam}
-        samtools index -@ {threads} {output.asm}
-        '''
-
-rule slice_BSW:
-    input:
-        fastq = expand('/cluster/work/pausch/inputs/fastq/BTA/{{sample}}_R{N}.fastq.gz',N=(1,2)),
-        asm = '/cluster/work/pausch/alex/assembly/BSWCHEM120151536851/hifiasmv152_100/hap2.scaffolds.fasta'
-    output:
-        full = multiext(get_dir('igv','{sample}.BSW.bam'),'','.bai'),
-        region = multiext(get_dir('igv','{sample}.TBX3.BSW.bam'),'','.bai')
-    threads: 24
-    resources:
-        mem_mb = 5000,
-        disk_scratch=100
-    shell:
-        '''
-            minimap2 -t {threads} --cs -axsr {input.asm} {input.fastq} | samtools sort - -m 1000M -@ {threads} -T $TMPDIR -o {output.full[0]}
-            samtools index -@ {threads} {output.full[0]}
-            samtools view -@ {threads} -o {output.region[0]} {output.full[0]} 17:68200000-68700000
-            samtools index -@ {threads} {output.region[0]}
-        '''
-
diff --git a/snakepit/variant_accuracy.smk b/snakepit/variant_accuracy.smk
index eac1d9c..5bb23dc 100644
--- a/snakepit/variant_accuracy.smk
+++ b/snakepit/variant_accuracy.smk
@@ -1,85 +1,14 @@
 from pathlib import PurePath
 
 ## Add reference path for happy container
-workflow.singularity_args = f'-B $TMPDIR -B {PurePath(config["reference"]).parent}'
-
-rule all:
-    input:
-        #expand('SVs/{sample}.denovo.sniffles.vcf.gz',sample=config['samples']),
-        #'SVs/samples.denovo.sniffles.vcf.gz',
-        'SNPs/F1.csv',
-        'SNPs/isec.csv'
-
-rule sniffles_call:
-    input:
-        bam = lambda wildcards: config['SV_samples'][wildcards.sample]#'/nfs/nas12.ethz.ch/fs1201/green_groups_tg_public/data/long-read_alignments/PacBio_CCS/eQTL/{sample}.mm2.cram'
-    output:
-        vcf = temp(multiext('SVs/{sample}.denovo.sniffles.vcf.gz','','.tbi')),
-        snf = temp('SVs/{sample}.denovo.sniffles.snf')
-    threads: 4
-    resources:
-        mem_mb = 2500
-    conda:
-        'sniffles'
-    shell:
-        '''
-        sniffles --input {input.bam} --reference {config[reference]} --sample-id {wildcards.sample} --phase --minsvlen 50 --threads {threads} --vcf {output.vcf[0]} --snf {output.snf}
-        '''
-
-rule sniffles_merge:
-    input:
-        snfs = expand(rules.sniffles_call.output['snf'],sample=config['SV_samples'],allow_missing=True)
-    output:
-        vcf = multiext('SVs/samples.{call}.sniffles.vcf.gz','','.tbi')
-    threads: 2
-    resources:
-        mem_mb = 2500
-    conda:
-        'sniffles'
-    shell:
-        '''
-        sniffles --input {input.snfs} --reference {config[reference]} --threads {threads} --vcf {output.vcf[0]}
-        '''
-
-rule sniffles_genotype:
-    input:
-        bam = '/cluster/work/pausch/alex/CCS_eQTL_cohort/ARS/{sample}.mm2.cram',
-        vcf = lambda wildcards: '' if wildcards.call == 'denovo' else '/cluster/work/pausch/alex/eQTL_GWAS/SV_ACCURACY/genotyping.vcf.gz' #'/cluster/work/pausch/alex/eQTL_GWAS/SV_ACCURACY/pangenie.vcf'
-    output:
-        vcf = temp(multiext('SVs/{sample}.{call}.sniffles.vcf.gz','','.tbi'))
-    params:
-    threads: 4
-    resources:
-        mem_mb = 1500
-    conda:
-        'sniffles'
-    shell:
-        '''
-        sniffles --input {input.bam} --genotype-vcf {input.vcf} --reference {config[reference]} --sample-id {wildcards.sample} --minsvlen 50 --threads {threads} --vcf {output.vcf}
-        '''
-
-rule bcftools_stuff:
-    input:
-        'SVs/samples.denovo.sniffles.vcf.gz'
-    output:
-        sizes = 'SVs/sizes.gz',
-        support = 'SVs/support.gz',
-        GTs = 'SVs/GTs.gz'
-    localrule: True
-    shell:
-        '''
-        bcftools query -e 'INFO/SVTYPE=="BND"' -f '%INFO/SVLEN\\n' {input} | pigz -p2 -c > {output.sizes}
-        bcftools query -e 'INFO/SVTYPE=="BND"' -f '%INFO/SUPP_VEC\\n' {input} | mawk ' {{ print gsub(1,2,$1) }} ' | pigz -p2 -c > {output.support}
-        bcftools query -e 'INFO/SVTYPE=="BND"' -f '[%GT ]\\n' {input} | sed 's"|" "g' | sed 's"/" "g' | sed 's/\./nan/g' | pigz -p2 -c > {output.GTs}
-        '''
-
+workflow._singularity_args = f'-B $TMPDIR -B {PurePath(config["reference"]).parent}'
 
 def get_variants(variant,caller):
     input_dict = {
-              'SNPs_PG':'/cluster/work/pausch/alex/eQTL_GWAS/pangenie_8_wave/samples.all.pangenie_genotyping.vcf.gz',
-              'SNPs_DV':'/cluster/work/pausch/alex/eQTL_GWAS/variants/DV-SR/cohort.autosomes.WGS.imputed.vcf.gz',
-              'SVs_PG': '/cluster/work/pausch/alex/eQTL_GWAS/pangenie_panel.vcfwave.vcf.gz',
-              'SVs_Sniffles': 'SVs/samples.denovo.sniffles.vcf.gz'
+              'SNPs_PG':'variant_calling/panel.small.vcf.gz',
+              'SNPs_DV':config['small_variants'],
+              'SVs_PG': 'variant_calling/panel.SV.vcf',
+              'SVs_Sniffles': 'variant_calling/samples.denovo.sniffles.vcf.gz'
               }
     return input_dict[f'{variant}_{caller}']
 
@@ -130,7 +59,7 @@ rule happy:
         others = temp(multiext('SNPs/{sample}','.bcf','.bcf.csi','.extended.csv','.roc.all.csv.gz','.runinfo.json'))
     params:
         _dir = lambda wildcards, output: PurePath(output.csv).with_suffix('').with_suffix('')
-    container: '/cluster/work/pausch/alex/software/images/hap.py_latest.sif'
+    container: 'docker://pkrusche/hap.py'
     threads: 2
     resources:
         mem_mb = 5000,
@@ -175,8 +104,7 @@ rule jasmine:
         pigz -dc {input.vcfs[0]} > $TMPDIR/SVs/{wildcards.sample}.PG.vcf
         pigz -dc {input.vcfs[1]} > $TMPDIR/SVs/{wildcards.sample}.Sniffles.vcf
 
-        java -Xmx6048m -jar /cluster/work/pausch/alex/software/Jasmine/jasmine.jar \
-        --comma_filelist file_list={params._input} threads={threads} out_file=/dev/stdout out_dir=$TMPDIR \
+        jasmine --comma_filelist file_list={params._input} threads={threads} out_file=/dev/stdout out_dir=$TMPDIR \
         genome_file={config[reference]} --pre_normalize --ignore_strand --allow_intrasample --ignore_type \
         max_dist_linear=1 max_dist=1000 > {output}
         #|\
@@ -185,7 +113,7 @@ rule jasmine:
 
 rule gather_jasmine:
     input:
-        expand(rules.jasmine.output[0],sample=config['SV_samples'])
+        expand(rules.jasmine.output[0],sample=config['HiFi_samples'])
     output:
         'SVs/jasmine.csv'
     localrule: True
diff --git a/snakepit/variant_calling.smk b/snakepit/variant_calling.smk
index e5d9d3f..5bdc34e 100644
--- a/snakepit/variant_calling.smk
+++ b/snakepit/variant_calling.smk
@@ -1,6 +1,6 @@
 rule minimap2_align:
     input:
-        lambda wildcards: config['HiFi'][wildcards.sample]
+        lambda wildcards: config['HiFi_samples'][wildcards.sample]
     output:
         bam = temp('alignments/{sample}.HiFi.bam'),
         csi = temp('alignments/{sample}.HiFi.bam.csi')
@@ -32,7 +32,7 @@ rule sniffles_call:
 
 rule sniffles_merge:
     input:
-        snfs = expand('variant_calling/{sample}.sniffles.snf',sample=config['HiFi'])
+        snfs = expand('variant_calling/{sample}.sniffles.snf',sample=config['HiFi_samples'])
     output:
         vcf = 'variant_calling/samples.sniffles.vcf.gz'
     threads: 2
@@ -119,23 +119,6 @@ rule jasmine_intersect:
 #jasmine --comma_filelist file_list=smoove_SV/All_filter_type.vcf,eQTL_GWAS/variants/variant_calling/panel.SV.vcf threads=1 out_file=SR_LR.vcf out_dir=$TMPDIR spec_reads=0 genome_file=REF_DATA/ARS-UCD1.2_Btau5.0.1Y.fa min_seq_id=0 --pre_normalize --ignore_strand --allow_intrasample max_dist_linear=1 --normalize_type --dup_to_ins
 #bcftools query -f '%CHROM\t%POS\t%INFO/END\t%INFO/SVTYPE\t0\t+\t%POS\t%INFO/END\t%INFO/SUPP_VEC\n' SR_LR.vcf | grep -vE "(INV|TRA)"  | sed s'/10$/50,200,50/g' | sed s'/11$/250,100,150/g' | sed s'/01$/50,20,250/g' >> SR_LR.bed 
 
-rule bcftools_isec:
-    input:
-        read = 'DV_small.vcf.gz',#'DV-{read}/cohort.autosomes.WGS.vcf.gz',
-        asm = 'variant_calling/panel.small.vcf.gz',
-        other = 'panel.raw.small.vcf.gz'
-    output:
-        isec = 'isec_{read}_{strictness}.txt',
-        _count = 'isec_{read}_{strictness}.count'
-    threads: 2
-    resources:
-        mem_mb = 3000
-    shell:
-        '''
-        bcftools isec --threads {threads} -n +1 -o {output.isec} -c {wildcards.strictness} {input}
-        cut -f 5 {output.isec} | sort | uniq -c > {output._count}
-        '''
-
 ## Assembly rules:
 rule minimap2_align_asm:
     input: