From be04faf07894d2b9e4ae94a06be06aa2049a389c Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Sun, 28 Apr 2024 15:15:21 -0700 Subject: [PATCH] Deprecate alfalfa.py and pistachio.py --- jcvi/projects/alfalfa.py | 62 ---------------------------- jcvi/projects/misc.py | 1 - jcvi/projects/pistachio.py | 83 -------------------------------------- 3 files changed, 146 deletions(-) delete mode 100644 jcvi/projects/alfalfa.py delete mode 100644 jcvi/projects/pistachio.py diff --git a/jcvi/projects/alfalfa.py b/jcvi/projects/alfalfa.py deleted file mode 100644 index 3a67b7c3..00000000 --- a/jcvi/projects/alfalfa.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Random collection of scripts associated with alfalfa assembly. -""" - -import sys - -from jcvi.formats.bed import Bed, fastaFromBed -from jcvi.graphics.mummerplot import main as mummerplot_main -from jcvi.apps.base import OptionParser, ActionDispatcher, sh - - -def main(): - - actions = (("nucmer", "select specific chromosome region based on MTR mapping"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def nucmer(args): - """ - %prog nucmer mappings.bed MTR.fasta assembly.fasta chr1 3 - - Select specific chromosome region based on MTR mapping. The above command - will extract chr1:2,000,001-3,000,000. - """ - p = OptionParser(nucmer.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 5: - sys.exit(not p.print_help()) - - mapbed, mtrfasta, asmfasta, chr, idx = args - idx = int(idx) - m1 = 1000000 - bedfile = "sample.bed" - bed = Bed() - bed.add("\t".join(str(x) for x in (chr, (idx - 1) * m1, idx * m1))) - bed.print_to_file(bedfile) - - cmd = "intersectBed -a {0} -b {1} -nonamecheck -sorted | cut -f4".format( - mapbed, bedfile - ) - idsfile = "query.ids" - sh(cmd, outfile=idsfile) - - sfasta = fastaFromBed(bedfile, mtrfasta) - qfasta = "query.fasta" - cmd = "faSomeRecords {0} {1} {2}".format(asmfasta, idsfile, qfasta) - sh(cmd) - - cmd = "nucmer {0} {1}".format(sfasta, qfasta) - sh(cmd) - - mummerplot_main(["out.delta", "--refcov=0"]) - sh("mv out.pdf {0}.{1}.pdf".format(chr, idx)) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/misc.py b/jcvi/projects/misc.py index 0a287434..2f4b8114 100644 --- a/jcvi/projects/misc.py +++ b/jcvi/projects/misc.py @@ -33,7 +33,6 @@ def main(): ("oropetium", "plot oropetium micro-synteny (requires data)"), # Pomegranate paper (Qin et al., 2017 Plant Journal) ("pomegranate", "plot pomegranate macro- and micro-synteny (requires data)"), - # Unpublished ("birch", "plot birch macro-synteny (requires data)"), ("litchi", "plot litchi micro-synteny (requires data)"), ("utricularia", "plot utricularia micro-synteny (requires data)"), diff --git a/jcvi/projects/pistachio.py b/jcvi/projects/pistachio.py deleted file mode 100644 index 83e1ca93..00000000 --- a/jcvi/projects/pistachio.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Functions related to processing of the pistachio genome. -""" -import sys - -from jcvi.apps.base import OptionParser, ActionDispatcher - - -def main(): - - actions = (("agp", "convert from the table file to agp format"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def agp(args): - """ - %prog agp Siirt_Female_pistachio_23May2017_table.txt - - The table file, as prepared by Dovetail Genomics, is not immediately useful - to convert gene model coordinates, as assumed by formats.chain.fromagp(). - This is a quick script to do such conversion. The file structure of this - table file is described in the .manifest file shipped in the same package:: - - pistachio_b_23May2017_MeyIy.table.txt - Tab-delimited table describing positions of input assembly scaffolds - in the Hirise scaffolds. The table has the following format: - - 1. HiRise scaffold name - 2. Input sequence name - 3. Starting base (zero-based) of the input sequence - 4. Ending base of the input sequence - 5. Strand (- or +) of the input sequence in the scaffold - 6. Starting base (zero-based) in the HiRise scaffold - 7. Ending base in the HiRise scaffold - - where '-' in the strand column indicates that the sequence is reverse - complemented relative to the input assembly. - - CAUTION: This is NOT a proper AGP format since it does not have gaps in - them. - """ - p = OptionParser(agp.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (tablefile,) = args - fp = open(tablefile) - for row in fp: - atoms = row.split() - hr = atoms[0] - scaf = atoms[1] - scaf_start = int(atoms[2]) + 1 - scaf_end = int(atoms[3]) - strand = atoms[4] - hr_start = int(atoms[5]) + 1 - hr_end = int(atoms[6]) - - print( - "\t".join( - str(x) - for x in ( - hr, - hr_start, - hr_end, - 1, - "W", - scaf, - scaf_start, - scaf_end, - strand, - ) - ) - ) - - -if __name__ == "__main__": - main()