From c573f16fe05e3be8e870b8bb0e0c76cf3e4b1541 Mon Sep 17 00:00:00 2001
From: Haibao Tang <tanghaibao@gmail.com>
Date: Sun, 28 Apr 2024 23:22:07 -0700
Subject: [PATCH] Deprecations (#652)

* Deprecate ml.py

* Deprecate logo, whisker, heterosis, tassel

* Deprecate script.py
---
 jcvi/algorithms/ml.py      |  54 -----------
 jcvi/apps/script.py        | 112 ----------------------
 jcvi/graphics/logo.py      |  54 -----------
 jcvi/graphics/whisker.py   |  68 --------------
 jcvi/projects/heterosis.py | 188 -------------------------------------
 jcvi/variation/tassel.py   | 124 ------------------------
 6 files changed, 600 deletions(-)
 delete mode 100644 jcvi/algorithms/ml.py
 delete mode 100644 jcvi/apps/script.py
 delete mode 100644 jcvi/graphics/logo.py
 delete mode 100644 jcvi/graphics/whisker.py
 delete mode 100644 jcvi/projects/heterosis.py
 delete mode 100644 jcvi/variation/tassel.py

diff --git a/jcvi/algorithms/ml.py b/jcvi/algorithms/ml.py
deleted file mode 100644
index 10079759..00000000
--- a/jcvi/algorithms/ml.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-
-"""
-Machine learning algorithms.
-"""
-import sys
-
-from jcvi.apps.base import OptionParser, ActionDispatcher
-
-
-def main():
-
-    actions = (("libsvm", "convert csv file to LIBSVM format"),)
-    p = ActionDispatcher(actions)
-    p.dispatch(globals())
-
-
-def libsvm(args):
-    """
-    %prog libsvm csvfile prefix.ids
-
-    Convert csv file to LIBSVM format. `prefix.ids` contains the prefix mapping.
-    Ga -1
-    Gr 1
-
-    So the feature in the first column of csvfile get scanned with the prefix
-    and mapped to different classes. Formatting spec:
-
-    http://svmlight.joachims.org/
-    """
-    from jcvi.formats.base import DictFile
-
-    p = OptionParser(libsvm.__doc__)
-    opts, args = p.parse_args(args)
-
-    if len(args) != 2:
-        sys.exit(not p.print_help())
-
-    csvfile, prefixids = args
-    d = DictFile(prefixids)
-    fp = open(csvfile)
-    next(fp)
-    for row in fp:
-        atoms = row.split()
-        klass = atoms[0]
-        kp = klass.split("_")[0]
-        klass = d.get(kp, "0")
-        feats = ["{0}:{1}".format(i + 1, x) for i, x in enumerate(atoms[1:])]
-        print(" ".join([klass] + feats))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/jcvi/apps/script.py b/jcvi/apps/script.py
deleted file mode 100644
index 28972abe..00000000
--- a/jcvi/apps/script.py
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-
-
-import sys
-import logging
-
-from jcvi.formats.base import write_file
-from jcvi.apps.base import OptionParser
-
-
-default_template = """
-\"\"\"
-
-\"\"\"
-
-import sys
-
-{}
-from jcvi.apps.base import OptionParser, ActionDispatcher
-
-
-def main():
-
-    actions = (
-        ('app', ''),
-            )
-    p = ActionDispatcher(actions)
-    p.dispatch(globals())
-
-{}
-
-if __name__ == '__main__':
-    main()
-"""
-
-default_imports = ""
-
-graphic_imports = """
-from jcvi.graphics.base import plt, savefig, normalize_axes"""
-
-default_app = """
-def app(args):
-    \"\"\"
-    %prog app datafile
-
-    \"\"\"
-    p = OptionParser(app.__doc__)
-    opts, args = p.parse_args(args)
-
-    if len(args) != 1:
-        sys.exit(not p.print_help())
-"""
-
-graphic_app = """
-def app(args):
-    \"\"\"
-    %prog app datafile
-
-    \"\"\"
-    p = OptionParser(app.__doc__)
-    opts, args, iopts = p.set_image_options(args)
-
-    if len(args) != 1:
-        sys.exit(not p.print_help())
-
-    datafile, = args
-    pf = datafile.rsplit(".", 1)[0]
-    fig = plt.figure(1, (iopts.w, iopts.h))
-    root = fig.add_axes([0, 0, 1, 1])
-
-    normalize_axes(root)
-
-    image_name = pf + "." + iopts.format
-    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
-"""
-
-
-def main():
-    """
-    %prog scriptname.py
-
-    create a minimal boilerplate for a new script
-    """
-    p = OptionParser(main.__doc__)
-    p.add_option(
-        "-g",
-        "--graphic",
-        default=False,
-        action="store_true",
-        help="Create boilerplate for a graphic script",
-    )
-
-    opts, args = p.parse_args()
-    if len(args) != 1:
-        sys.exit(not p.print_help())
-
-    (script,) = args
-    imports = graphic_imports if opts.graphic else default_imports
-    app = graphic_app if opts.graphic else default_app
-    template = default_template.format(imports, app)
-    write_file(script, template)
-
-    message = "template writes to `{0}`".format(script)
-    if opts.graphic:
-        message = "graphic " + message
-    message = message[0].upper() + message[1:]
-    logging.debug(message)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/jcvi/graphics/logo.py b/jcvi/graphics/logo.py
deleted file mode 100644
index 60a240c8..00000000
--- a/jcvi/graphics/logo.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-
-"""
-%prog text
-
-Generate logo with certain color and font. Font must be present in:
-jcvi/graphics/fonts
-
-You can download a number of free TTF fonts from:
-<http://www.urbanfonts.com>
-"""
-
-
-import sys
-
-from jcvi.graphics.base import plt, savefig, fontprop, available_fonts
-from jcvi.apps.base import OptionParser
-
-
-def main():
-    p = OptionParser(__doc__)
-    p.add_option(
-        "--customfont",
-        default="Airswing.ttf",
-        choices=available_fonts,
-        help="Custom font name",
-    )
-    p.add_option("--color", default="limegreen", help="Font color")
-    p.add_option("--size", default=36, type="int", help="Font size")
-    opts, args, iopts = p.set_image_options(figsize="2x1", dpi=60, format="png")
-
-    if len(args) != 1:
-        sys.exit(not p.print_help())
-
-    (text,) = args
-
-    plt.rcdefaults()
-    fig = plt.figure(1, (iopts.w, iopts.h))
-    ax = fig.add_axes([0, 0, 1, 1])
-
-    ax.text(0.5, 0.5, text, color=opts.color, ha="center", va="center")
-    fontprop(ax, opts.customfont, size=opts.size)
-
-    ax.set_xlim(0, 1)
-    ax.set_ylim(0, 1)
-    ax.set_axis_off()
-
-    image_name = text + "." + iopts.format
-    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/jcvi/graphics/whisker.py b/jcvi/graphics/whisker.py
deleted file mode 100644
index be67d2c9..00000000
--- a/jcvi/graphics/whisker.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-
-"""
-%prog datafile
-
-Generate whisker plot for two column file. The first column contains group, the
-second column is values.
-"""
-
-
-import sys
-
-from jcvi.apps.r import RTemplate
-from jcvi.apps.base import OptionParser
-
-
-whisker_template = """
-library(ggplot2)
-data <- read.table('$datafile', header=T, sep='\t')
-data$$$x <- factor(data$$$x, c($levels))
-m <- ggplot(data, aes($x, $y))
-m + geom_boxplot(colour="darkgreen", notch='$notch') + opts(title='$title') +
-scale_x_discrete('$lx') + scale_y_continuous('$ly') +
-theme(text=element_text(size=$fontsize))
-ggsave('$outfile')
-"""
-
-
-def main():
-    p = OptionParser(__doc__)
-    p.add_option(
-        "--levels", help="Reorder factors, comma-delimited [default: alphabetical]"
-    )
-    p.add_option(
-        "--notch",
-        default=False,
-        action="store_true",
-        help="notched box plot",
-    )
-    p.add_option("--title", default=" ", help="Title of the figure")
-    p.add_option("--xlabel", help="X-axis label")
-    p.add_option("--ylabel", help="Y-axis label")
-    p.add_option("--fontsize", default=16, help="Font size")
-    opts, args = p.parse_args()
-
-    if len(args) != 1:
-        sys.exit(not p.print_help())
-
-    (datafile,) = args
-    header = open(datafile)
-    levels = opts.levels
-    xlabel = opts.xlabel
-    ylabel = opts.ylabel
-    lx, ly = next(header).rstrip().split("\t")
-
-    lx = xlabel or lx
-    ly = ylabel or ly
-
-    if levels:
-        levels = levels.split(",")
-        levels = ", ".join("'{0}'".format(d) for d in levels)
-    rtemplate = RTemplate(whisker_template, locals())
-    rtemplate.run()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/jcvi/projects/heterosis.py b/jcvi/projects/heterosis.py
deleted file mode 100644
index d6cd3cf3..00000000
--- a/jcvi/projects/heterosis.py
+++ /dev/null
@@ -1,188 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-
-"""
-List of routines to analyze rice heterosis datasets.
-Starting from a list of .count files. The .count files are generated by HT-seq.
-"""
-import os.path as op
-import os
-import sys
-import logging
-
-import numpy as np
-from collections import defaultdict
-
-from jcvi.formats.base import BaseFile
-from jcvi.apps.base import OptionParser, ActionDispatcher, glob, mkdir
-
-
-class RiceSample(BaseFile):
-    """
-    Examples:
-    1. LCS48-3_GTCCGC_L006_R_tophat_accepted_hits.count
-    2. RF18-1_GTTTCG_L003_R_tophat_accepted_hits.count
-
-    First letter is always tissue: L - leaf, R - root
-    Next componet before dash is sample name:
-    CS48 (9311)  is parent for F, CS66 (nipponbare) is parent for C
-    Each family has a sequential id, in the second example, family ID is 18. Two
-    parents for that family is F (CS48) and 18, hybrid progeny is F18.
-    """
-
-    def __init__(self, filename):
-        super(RiceSample, self).__init__(filename)
-        self.shortname = name = op.basename(filename).split("_")[0]
-        name, rep = name.split("-")
-        tissue, ind = name[0], name[1:]
-        self.tissue, self.ind = tissue, ind
-        self.rep = rep
-        self.P1 = self.P2 = "na"
-        if ind in ("CS48", "CS66"):
-            self.label = "P1"
-            self.family = "Recur"
-        elif ind[0] in ("F", "C"):
-            self.label = "F1"
-            self.P1 = "CS48" if ind[0] == "F" else "CS66"
-            self.P2 = ind[1:]
-            self.family = int(ind[1:])
-        else:
-            self.label = "P2"
-            self.family = int(ind)
-
-        fp = open(filename)
-        data = [row.split() for row in fp]
-        self.header, self.data = zip(*data)
-        self.data = np.array(self.data, dtype=np.int32)
-        self.working = True
-
-    def __str__(self):
-        return "\t".join(
-            str(x)
-            for x in (self.filename, self.tissue, self.label, self.family, self.rep)
-        )
-
-    def merge(self, rs):
-        logging.debug("Merge '{0}' and '{1}'".format(self.filename, rs.filename))
-        self.filename = ",".join((self.filename, rs.filename))
-        assert self.tissue == rs.tissue
-        assert self.ind == rs.ind
-        assert self.rep == rs.rep
-        assert self.label == rs.label
-        assert self.family == rs.family
-        assert self.P1 == rs.P1
-        assert self.P2 == rs.P2
-        assert self.header == rs.header
-        self.data += rs.data
-
-
-def main():
-
-    actions = (
-        ("prepare", "parse list of count files and group per family"),
-        ("mergeclean", "clean redundant merged bam files"),
-    )
-    p = ActionDispatcher(actions)
-    p.dispatch(globals())
-
-
-def mergeclean(args):
-    """
-    %prog mergeclean [*.bam|*.count]
-
-    Clean redundant merged bam/count files. This usually happens after running
-    formats.sam.merge() several times.
-    """
-    from itertools import groupby
-    from jcvi.apps.base import cleanup
-
-    p = OptionParser(mergeclean.__doc__)
-    p.set_sep(sep="_", help="Separator to group per prefix")
-    opts, args = p.parse_args(args)
-
-    if len(args) < 1:
-        sys.exit(not p.print_help())
-
-    files = sorted(args)
-    sep = opts.sep
-    key = lambda x: x.split(sep)[0]
-    mtime = lambda x: os.stat(x).st_mtime
-    for pf, fs in groupby(files, key=key):
-        fs = list(fs)
-        if len(fs) == 1:
-            continue
-        newest_f = max(fs, key=mtime)
-        print("|".join(fs), "=>", newest_f, file=sys.stderr)
-        fs.remove(newest_f)
-        cleanup(fs)
-
-
-def merge_counts(ss, outfile):
-    fw = open(outfile, "w")
-    header = ["Gene"] + [x.shortname for x in ss]
-    print("\t".join(header), file=fw)
-    data = [ss[0].header] + [x.data for x in ss]
-    data = zip(*data)
-    for a in data:
-        print("\t".join(str(x) for x in a), file=fw)
-    logging.debug("File `{0}` written (size={1}).".format(outfile, len(ss)))
-
-
-def prepare(args):
-    """
-    %prog prepare countfolder families
-
-    Parse list of count files and group per family into families folder.
-    """
-    p = OptionParser(prepare.__doc__)
-    opts, args = p.parse_args(args)
-
-    if len(args) != 2:
-        sys.exit(not p.print_help())
-
-    counts, families = args
-    countfiles = glob(op.join(counts, "*.count"))
-    countsdb = defaultdict(list)
-    for c in countfiles:
-        rs = RiceSample(c)
-        countsdb[(rs.tissue, rs.ind)].append(rs)
-
-    # Merge duplicates - data sequenced in different batches
-    key = lambda x: (x.label, x.rep)
-    for (tissue, ind), rs in sorted(countsdb.items()):
-        rs.sort(key=key)
-        nrs = len(rs)
-        for i in range(nrs):
-            ri = rs[i]
-            if not ri.working:
-                continue
-            for j in range(i + 1, nrs):
-                rj = rs[j]
-                if key(ri) != key(rj):
-                    continue
-                ri.merge(rj)
-                rj.working = False
-        countsdb[(tissue, ind)] = [x for x in rs if x.working]
-
-    # Group into families
-    mkdir("families")
-    for (tissue, ind), r in sorted(countsdb.items()):
-        r = list(r)
-        if r[0].label != "F1":
-            continue
-        P1, P2 = r[0].P1, r[0].P2
-        P1, P2 = countsdb[(tissue, P1)], countsdb[(tissue, P2)]
-        rs = P1 + P2 + r
-        groups = [1] * len(P1) + [2] * len(P2) + [3] * len(r)
-        assert len(rs) == len(groups)
-
-        outfile = "-".join((tissue, ind))
-        merge_counts(rs, op.join(families, outfile))
-        groupsfile = outfile + ".groups"
-        fw = open(op.join(families, groupsfile), "w")
-        print(",".join(str(x) for x in groups), file=fw)
-        fw.close()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/jcvi/variation/tassel.py b/jcvi/variation/tassel.py
deleted file mode 100644
index e5360e28..00000000
--- a/jcvi/variation/tassel.py
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-
-"""
-Run TASSEL GBS module, following manual here:
-http://www.maizegenetics.net/tassel/docs/TasselPipelineGBS.pdf
-"""
-
-import os.path as op
-import sys
-
-from jcvi.formats.base import write_file
-from jcvi.apps.base import OptionParser, ActionDispatcher, mkdir, get_abs_path
-
-
-def main():
-
-    actions = (("prepare", "prepare TASSEL pipeline"),)
-    p = ActionDispatcher(actions)
-    p.dispatch(globals())
-
-
-def run_pipeline(thome, plugin, options, log=False):
-    cmd = op.join(thome, "run_pipeline.pl")
-    cmd += " -Xmx16g -fork1 -{0} {1} -endPlugin -runfork1".format(plugin, options)
-    if log:
-        cmd += " | tee {0}.log".format(plugin)
-    return cmd
-
-
-def prepare(args):
-    """
-    %prog prepare barcode_key.csv reference.fasta
-
-    Prepare TASSEL pipeline.
-    """
-    valid_enzymes = (
-        "ApeKI|ApoI|BamHI|EcoT22I|HinP1I|HpaII|MseI|MspI|"
-        "NdeI|PasI|PstI|Sau3AI|SbfI|AsiSI-MspI|BssHII-MspI|"
-        "FseI-MspI|PaeR7I-HhaI|PstI-ApeKI|PstI-EcoT22I|PstI-MspI"
-        "PstI-TaqI|SalI-MspI|SbfI-MspI".split("|")
-    )
-    p = OptionParser(prepare.__doc__)
-    p.add_option(
-        "--enzyme",
-        default="ApeKI",
-        choices=valid_enzymes,
-        help="Restriction enzyme used",
-    )
-    p.set_home("tassel")
-    p.set_aligner(aligner="bwa")
-    p.set_cpus()
-    opts, args = p.parse_args(args)
-
-    if len(args) != 2:
-        sys.exit(not p.print_help())
-
-    barcode, reference = args
-    thome = opts.tassel_home
-    reference = get_abs_path(reference)
-    folders = (
-        "fastq",
-        "tagCounts",
-        "mergedTagCounts",
-        "topm",
-        "tbt",
-        "mergedTBT",
-        "hapmap",
-        "hapmap/raw",
-        "hapmap/mergedSNPs",
-        "hapmap/filt",
-        "hapmap/bpec",
-    )
-    for f in folders:
-        mkdir(f)
-
-    # Build the pipeline
-    runsh = []
-    o = "-i fastq -k {0} -e {1} -o tagCounts".format(barcode, opts.enzyme)
-    cmd = run_pipeline(thome, "FastqToTagCountPlugin", o)
-    runsh.append(cmd)
-
-    o = "-i tagCounts -o mergedTagCounts/myMasterTags.cnt"
-    o += " -c 5 -t mergedTagCounts/myMasterTags.cnt.fq"
-    cmd = run_pipeline(thome, "MergeMultipleTagCountPlugin", o)
-    runsh.append(cmd)
-    runsh.append("cd mergedTagCounts")
-
-    cmd = "python -m jcvi.apps.{0} align --cpus {1}".format(opts.aligner, opts.cpus)
-    cmd += " {0} myMasterTags.cnt.fq".format(reference)
-    runsh.append(cmd)
-    runsh.append("cd ..")
-
-    o = "-i mergedTagCounts/*.sam -o topm/myMasterTags.topm"
-    cmd = run_pipeline(thome, "SAMConverterPlugin", o)
-    runsh.append(cmd)
-
-    o = "-i mergedTBT/myStudy.tbt.byte -y -m topm/myMasterTags.topm"
-    o += " -mUpd topm/myMasterTagsWithVariants.topm"
-    o += " -o hapmap/raw/myGBSGenos_chr+.hmp.txt"
-    o += " -mnF 0.8 -p myPedigreeFile.ped -mnMAF 0.02 -mnMAC 100000"
-    o += " -ref {0} -sC 1 -eC 10".format(reference)
-    cmd = run_pipeline(thome, "TagsToSNPByAlignmentPlugin", o)
-    runsh.append(cmd)
-
-    o = "-hmp hapmap/raw/myGBSGenos_chr+.hmp.txt"
-    o += " -o hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt"
-    o += " -misMat 0.1 -p myPedigreeFile.ped -callHets -sC 1 -eC 10"
-    cmd = run_pipeline(thome, "MergeDuplicateSNPsPlugin", o)
-    runsh.append(cmd)
-
-    o = "-hmp hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt"
-    o += " -o hapmap/filt/myGBSGenos_mergedSNPsFilt_chr+.hmp.txt"
-    o += " -mnTCov 0.01 -mnSCov 0.2 -mnMAF 0.01 -sC 1 -eC 10"
-    # o += "-hLD -mnR2 0.2 -mnBonP 0.005"
-    cmd = run_pipeline(thome, "GBSHapMapFiltersPlugin", o)
-    runsh.append(cmd)
-
-    runfile = "run.sh"
-    write_file(runfile, "\n".join(runsh))
-
-
-if __name__ == "__main__":
-    main()