From 73aeecbc7e2e8fab232bad1940442da794e3fc9c Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 23 Aug 2024 13:12:44 +0000 Subject: [PATCH] Deployed 14090cb to v1.0.0 with MkDocs 1.6.0 and mike 2.1.3 --- v1.0.0/api/framework/index.html | 634 ++++++++++++------------- v1.0.0/api/parsers/blast/index.html | 390 ++++++++-------- v1.0.0/api/utils/index.html | 700 ++++++++++++++-------------- v1.0.0/sitemap.xml | 32 +- v1.0.0/sitemap.xml.gz | Bin 351 -> 350 bytes 5 files changed, 878 insertions(+), 878 deletions(-) diff --git a/v1.0.0/api/framework/index.html b/v1.0.0/api/framework/index.html index ad922bf..ec75bdc 100644 --- a/v1.0.0/api/framework/index.html +++ b/v1.0.0/api/framework/index.html @@ -1662,33 +1662,33 @@

Source code in camlhmp/framework.py -
30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
-43
def print_version(framework: dict) -> None:
-    """
-    Print the version of the framework, then exit
-
-    Args:
-        framework (dict): the parsed YAML framework
-
-    Examples:
-        >>> from camlhmp.framework import print_version
-        >>> print_version(framework)
-    """
-    print(f"camlhmp, version {camlhmp.__version__}", file=sys.stderr)
-    print(f"schema {framework['metadata']['id']}, version {framework['metadata']['version']}", file=sys.stderr)
-    sys.exit(0)
+              
45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
def print_version(framework: dict) -> None:
+    """
+    Print the version of the framework, then exit
+
+    Args:
+        framework (dict): the parsed YAML framework
+
+    Examples:
+        >>> from camlhmp.framework import print_version
+        >>> print_version(framework)
+    """
+    print(f"camlhmp, version {camlhmp.__version__}", file=sys.stderr)
+    print(f"schema {framework['metadata']['id']}, version {framework['metadata']['version']}", file=sys.stderr)
+    sys.exit(0)
 
@@ -1777,39 +1777,7 @@

Source code in camlhmp/framework.py -
 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
+              
 78
  79
  80
  81
@@ -1846,76 +1814,108 @@ 

112 113 114 -115

def get_types(framework: dict) -> dict:
-    """
-    Get the types from the framework.
-
-    Example framework:
-    aliases:
-    - name: "ccr Type 2"
-      targets: ["ccrA1", "ccrB1"]
-    types:
-    - name: "I"
-      targets:
-        - "ccr Type 1"
-        - "mec Class B"
-
-    Args:
-        framework (dict): the parsed YAML framework
-
-    Returns:
-        dict: the types with associated targets
-
-    Examples:
-        >>> from camlhmp.framework import get_types
-        >>> types = get_types(framework)
-    """
-    types = {}
-    aliases = {}
-
-    # If aliases are present, save their targets
-    if "aliases" in framework:
-        for alias in framework["aliases"]:
-            aliases[alias["name"]] = alias["targets"]
-
-    # Save the types and their targets
-    for profile in framework["types"]:
-        types[profile["name"]] = {
-            "targets": [],
-            "excludes": [],
-        }
-        for target in profile["targets"]:
-            if target in aliases:
-                types[profile["name"]]["targets"] = [
-                    *types[profile["name"]]["targets"],
-                    *aliases[target],
-                ]
-            elif target in framework["targets"]:
-                types[profile["name"]]["targets"].append(target)
-            else:
-                raise ValueError(f"Target {target} not found in framework")
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
def get_types(framework: dict) -> dict:
+    """
+    Get the types from the framework.
+
+    Example framework:
+    aliases:
+    - name: "ccr Type 2"
+      targets: ["ccrA1", "ccrB1"]
+    types:
+    - name: "I"
+      targets:
+        - "ccr Type 1"
+        - "mec Class B"
+
+    Args:
+        framework (dict): the parsed YAML framework
 
-        # Capture any targets that should cause a profile to fail
-        if "excludes" in profile:
-            for exclude in profile["excludes"]:
-                if exclude in aliases:
-                    types[profile["name"]]["excludes"] = [
-                        *types[profile["name"]]["excludes"],
-                        *aliases[exclude],
-                    ]
-                elif exclude in framework["targets"]:
-                    types[profile["name"]]["excludes"].append(exclude)
-                else:
-                    raise ValueError(f"Target {exclude} not found in framework")
-
-    # Debugging information
-    logging.debug("camlhmp.framework.get_types")
-    if "aliases" in framework:
-        logging.debug(f"Aliases: {framework['aliases']}")
-    logging.debug(f"Targets: {framework['targets']}")
-    logging.debug(f"Types: {types}")
-
-    return types
+    Returns:
+        dict: the types with associated targets
+
+    Examples:
+        >>> from camlhmp.framework import get_types
+        >>> types = get_types(framework)
+    """
+    types = {}
+    aliases = {}
+
+    # If aliases are present, save their targets
+    if "aliases" in framework:
+        for alias in framework["aliases"]:
+            aliases[alias["name"]] = alias["targets"]
+
+    # Save the types and their targets
+    for profile in framework["types"]:
+        types[profile["name"]] = {
+            "targets": [],
+            "excludes": [],
+        }
+        for target in profile["targets"]:
+            if target in aliases:
+                types[profile["name"]]["targets"] = [
+                    *types[profile["name"]]["targets"],
+                    *aliases[target],
+                ]
+            elif target in framework["targets"]:
+                types[profile["name"]]["targets"].append(target)
+            else:
+                raise ValueError(f"Target {target} not found in framework")
+
+        # Capture any targets that should cause a profile to fail
+        if "excludes" in profile:
+            for exclude in profile["excludes"]:
+                if exclude in aliases:
+                    types[profile["name"]]["excludes"] = [
+                        *types[profile["name"]]["excludes"],
+                        *aliases[exclude],
+                    ]
+                elif exclude in framework["targets"]:
+                    types[profile["name"]]["excludes"].append(exclude)
+                else:
+                    raise ValueError(f"Target {exclude} not found in framework")
+
+    # Debugging information
+    logging.debug("camlhmp.framework.get_types")
+    if "aliases" in framework:
+        logging.debug(f"Aliases: {framework['aliases']}")
+    logging.debug(f"Targets: {framework['targets']}")
+    logging.debug(f"Types: {types}")
+
+    return types
 
@@ -2009,39 +2009,7 @@

Source code in camlhmp/framework.py -
118
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
-134
-135
-136
-137
-138
-139
-140
-141
-142
-143
-144
-145
-146
-147
-148
-149
-150
+              
150
 151
 152
 153
@@ -2056,54 +2024,86 @@ 

162 163 164 -165

def check_types(types: dict, results: dict) -> dict:
-    """
-    Check the types against the results.
-
-    Args:
-        types (dict): the types with associated targets
-        results (dict): the BLAST results
-
-    Returns:
-        dict: the types and their outcome
-
-    Examples:
-        >>> from camlhmp.framework import check_types
-        >>> type_hits = check_types(types, target_results)
-    """
-    type_hits = {}
-    for type, vals in types.items():
-        targets = vals["targets"]
-        excludes = vals["excludes"]
-        type_hits[type] = {
-            "status": False,
-            "targets": [],
-            "missing": [],
-            "comment": "",
-        }
-        matched_all_targets = True
-        for target in targets:
-            if results[target]:
-                type_hits[type]["targets"].append(target)
-            else:
-                type_hits[type]["missing"].append(target)
-                matched_all_targets = False
-
-        # Check if any of the excludes are present
-        for exclude in excludes:
-            if results[exclude]:
-                type_hits[type][
-                    "comment"
-                ] = f"Excluded target {exclude} found, failing type {type}"
-                logging.debug(f"Excluded target {exclude} found, failing type {type}")
-                matched_all_targets = False
-        type_hits[type]["status"] = matched_all_targets
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
def check_types(types: dict, results: dict) -> dict:
+    """
+    Check the types against the results.
+
+    Args:
+        types (dict): the types with associated targets
+        results (dict): the BLAST results
+
+    Returns:
+        dict: the types and their outcome
 
-    # Debugging information
-    logging.debug("camlhmp.framework.check_types")
-    logging.debug(f"Type Hits: {type_hits}")
-
-    return type_hits
+    Examples:
+        >>> from camlhmp.framework import check_types
+        >>> type_hits = check_types(types, target_results)
+    """
+    type_hits = {}
+    for type, vals in types.items():
+        targets = vals["targets"]
+        excludes = vals["excludes"]
+        type_hits[type] = {
+            "status": False,
+            "targets": [],
+            "missing": [],
+            "comment": "",
+        }
+        matched_all_targets = True
+        for target in targets:
+            if results[target]:
+                type_hits[type]["targets"].append(target)
+            else:
+                type_hits[type]["missing"].append(target)
+                matched_all_targets = False
+
+        # Check if any of the excludes are present
+        for exclude in excludes:
+            if results[exclude]:
+                type_hits[type][
+                    "comment"
+                ] = f"Excluded target {exclude} found, failing type {type}"
+                logging.debug(f"Excluded target {exclude} found, failing type {type}")
+                matched_all_targets = False
+        type_hits[type]["status"] = matched_all_targets
+
+    # Debugging information
+    logging.debug("camlhmp.framework.check_types")
+    logging.debug(f"Type Hits: {type_hits}")
+
+    return type_hits
 
@@ -2211,39 +2211,7 @@

Source code in camlhmp/framework.py -
168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
-183
-184
-185
-186
-187
-188
-189
-190
-191
-192
-193
-194
-195
-196
-197
-198
-199
-200
+              
200
 201
 202
 203
@@ -2285,81 +2253,113 @@ 

239 240 241 -242

def check_regions(types: dict, results: dict, min_coverage: int) -> dict:
-    """
-    Check the region types against the results.
-
-    Args:
-        types (dict): the types with associated targets
-        results (dict): the BLAST results
-        min_coverage (int): the minimum coverage required for a region
-
-    Returns:
-        dict: the types and their outcome
-
-    Examples:
-        >>> from camlhmp.framework import check_regions
-        >>> type_hits = check_regions(types, target_results, min_coverage)
-    """
-    type_hits = {}
-    for type, vals in types.items():
-        targets = vals["targets"]
-        excludes = vals["excludes"]
-        type_hits[type] = {
-            "status": False,
-            "targets": [],
-            "missing": [],
-            "coverage": [],
-            "hits": [],
-            "comment": [],
-        }
-        matched_all_targets = True
-        for target in targets:
-            if target in results:
-                if results[target]["coverage"] >= min_coverage:
-                    type_hits[type]["targets"].append(target)
-                else:
-                    type_hits[type]["missing"].append(target)
-                    matched_all_targets = False
-
-                type_hits[type]["coverage"].append(f"{results[target]['coverage']:.2f}")
-                type_hits[type]["hits"].append(str(len(results[target]["hits"])))
-                if len(targets) > 1:
-                    if results[target]["comment"]:
-                        formatted_comments = []
-                        for comment in results[target]["comment"]:
-                            formatted_comments.append(f"{target}:{comment}")
-                        if formatted_comments:
-                            type_hits[type]["comment"].append(
-                                ";".join(formatted_comments)
-                            )
-                else:
-                    if results[target]["comment"]:
-                        type_hits[type]["comment"].append(
-                            ";".join(results[target]["comment"])
-                        )
-            else:
-                matched_all_targets = False
-
-        # Check if any of the excludes are present
-        for exclude in excludes:
-            if results[exclude]:
-                if results[exclude]["coverage"] >= min_coverage:
-                    type_hits[type]["comment"].append(
-                        f"Excluded target {exclude} found, failing type {type}"
-                    )
-                    logging.debug(
-                        f"Excluded target {exclude} found, failing type {type}"
-                    )
-                    matched_all_targets = False
-
-        type_hits[type]["status"] = matched_all_targets
-
-    # Debugging information
-    logging.debug("camlhmp.framework.check_regions")
-    logging.debug(f"Type Hits: {type_hits}")
-
-    return type_hits
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
def check_regions(types: dict, results: dict, min_coverage: int) -> dict:
+    """
+    Check the region types against the results.
+
+    Args:
+        types (dict): the types with associated targets
+        results (dict): the BLAST results
+        min_coverage (int): the minimum coverage required for a region
+
+    Returns:
+        dict: the types and their outcome
+
+    Examples:
+        >>> from camlhmp.framework import check_regions
+        >>> type_hits = check_regions(types, target_results, min_coverage)
+    """
+    type_hits = {}
+    for type, vals in types.items():
+        targets = vals["targets"]
+        excludes = vals["excludes"]
+        type_hits[type] = {
+            "status": False,
+            "targets": [],
+            "missing": [],
+            "coverage": [],
+            "hits": [],
+            "comment": [],
+        }
+        matched_all_targets = True
+        for target in targets:
+            if target in results:
+                if results[target]["coverage"] >= min_coverage:
+                    type_hits[type]["targets"].append(target)
+                else:
+                    type_hits[type]["missing"].append(target)
+                    matched_all_targets = False
+
+                type_hits[type]["coverage"].append(f"{results[target]['coverage']:.2f}")
+                type_hits[type]["hits"].append(str(len(results[target]["hits"])))
+                if len(targets) > 1:
+                    if results[target]["comment"]:
+                        formatted_comments = []
+                        for comment in results[target]["comment"]:
+                            formatted_comments.append(f"{target}:{comment}")
+                        if formatted_comments:
+                            type_hits[type]["comment"].append(
+                                ";".join(formatted_comments)
+                            )
+                else:
+                    if results[target]["comment"]:
+                        type_hits[type]["comment"].append(
+                            ";".join(results[target]["comment"])
+                        )
+            else:
+                matched_all_targets = False
+
+        # Check if any of the excludes are present
+        for exclude in excludes:
+            if results[exclude]:
+                if results[exclude]["coverage"] >= min_coverage:
+                    type_hits[type]["comment"].append(
+                        f"Excluded target {exclude} found, failing type {type}"
+                    )
+                    logging.debug(
+                        f"Excluded target {exclude} found, failing type {type}"
+                    )
+                    matched_all_targets = False
+
+        type_hits[type]["status"] = matched_all_targets
+
+    # Debugging information
+    logging.debug("camlhmp.framework.check_regions")
+    logging.debug(f"Type Hits: {type_hits}")
+
+    return type_hits
 
diff --git a/v1.0.0/api/parsers/blast/index.html b/v1.0.0/api/parsers/blast/index.html index 6644925..e962e35 100644 --- a/v1.0.0/api/parsers/blast/index.html +++ b/v1.0.0/api/parsers/blast/index.html @@ -1577,9 +1577,7 @@

Source code in camlhmp/parsers/blast.py -
 5
- 6
- 7
+              
 7
  8
  9
 10
@@ -1668,98 +1666,100 @@ 

93 94 95 -96

def get_blast_allele_hits(
-    targets: dict, results: dict, min_pident: float, min_coverage: int
-) -> dict:
-    """
-    Find the allele hits in the BLAST results.
-
-    Args:
-        targets (dict): The list of target sequences {id: len(seq)}
-        results (list of dict): The BLAST results
-        min_pident (float): The minimum percent identity to count a hit
-        min_coverage (int): The minimum percent coverage to count a hit
-
-    Returns:
-        dict: The allele hits
-
-    Examples:
-        >>> from camlhmp.parsers.blast import get_blast_allele_hits
-        >>> target_results = get_blast_allele_hits(framework["targets"], blast_stdout, min_pident, min_coverage)
-    """
-    # Aggregate the hits for each target
-    target_results = {}
-
-    for result in results:
-        # Only process real hits
-        if result["qseqid"] != "NO_HITS":
-            target, allele = result["qseqid"].rsplit("_", 1)
-            if target not in target_results:
-                target_results[target] = {
-                    "known": [],
-                    "novel": [],
-                }
-
-            # only process hits that meet minimum criteria
-            if float(result["pident"]) >= min_pident and int(result["qcovs"]) >= min_coverage:
-                # hits that meet requirements
-
-                # Default to "NEW" allele, if perfect match use the allele ID
-                final_allele = "NEW"
-                final_type = "novel"
-                if float(result["pident"]) == 100 and int(result["qcovs"]) == 100:
-                    final_allele = allele
-                    final_type = "known"
-
-                target_results[target][final_type].append({
-                        "id": final_allele,
-                        "qcovs": result["qcovs"],
-                        "pident": float(result["pident"]),
-                        "bitscore": result["bitscore"],
-                })
-
-    final_allele_hits = {}
-    for target in targets:
-        final_allele_hits[target] = {
-            "id": "-",
-            "qcovs": 0,
-            "pident": 0,
-            "bitscore": 0,
-            "comment": "No hits met thresholds",
-        }
-
-    for target in target_results:
-        if len(target_results[target]["known"]):
-            # exact matches to known alleles were found
-            if len(target_results[target]["known"]) == 1:
-                final_allele_hits[target] = target_results[target]["known"][0]
-                final_allele_hits[target]["comment"] = ""
-            else:
-                # multiple hits
-                final_alleles = []
-                for hit in target_results[target]["known"]:
-                    final_alleles.append(hit["id"])
-
-                final_allele_hits[target] = target_results[target]["known"][0]
-                final_allele_hits[target]["id"] = ",".join(final_alleles)
-                final_allele_hits[target]["comment"] = "Exact matches to multiple alleles"
-        elif len(target_results[target]["novel"]):
-            # no exact matches to known alleles were found, but thresholds were met
-
-            # report the top scores
-            if len(target_results[target]["novel"]) == 1:
-                final_allele_hits[target] = target_results[target]["novel"][0]
-                final_allele_hits[target]["comment"] = ""
-            else:
-                # multiple hits, only report highest score
-                final_allele_hits[target] = sorted(target_results[target]["novel"], key=lambda x: x["bitscore"], reverse=True)[0]
-                final_allele_hits[target]["comment"] = "No exact matches to known alleles"
-
-    # Debugging information
-    logging.debug("camlhmp.engines.blast.get_blast_allele_hits")
-    logging.debug(f"Allele Hits: {final_allele_hits}")
-
-    return final_allele_hits
+96
+97
+98
def get_blast_allele_hits(
+    targets: dict, results: dict, min_pident: float, min_coverage: int
+) -> dict:
+    """
+    Find the allele hits in the BLAST results.
+
+    Args:
+        targets (dict): The list of target sequences {id: len(seq)}
+        results (list of dict): The BLAST results
+        min_pident (float): The minimum percent identity to count a hit
+        min_coverage (int): The minimum percent coverage to count a hit
+
+    Returns:
+        dict: The allele hits
+
+    Examples:
+        >>> from camlhmp.parsers.blast import get_blast_allele_hits
+        >>> target_results = get_blast_allele_hits(framework["targets"], blast_stdout, min_pident, min_coverage)
+    """
+    # Aggregate the hits for each target
+    target_results = {}
+
+    for result in results:
+        # Only process real hits
+        if result["qseqid"] != "NO_HITS":
+            target, allele = result["qseqid"].rsplit("_", 1)
+            if target not in target_results:
+                target_results[target] = {
+                    "known": [],
+                    "novel": [],
+                }
+
+            # only process hits that meet minimum criteria
+            if float(result["pident"]) >= min_pident and int(result["qcovs"]) >= min_coverage:
+                # hits that meet requirements
+
+                # Default to "NEW" allele, if perfect match use the allele ID
+                final_allele = "NEW"
+                final_type = "novel"
+                if float(result["pident"]) == 100 and int(result["qcovs"]) == 100:
+                    final_allele = allele
+                    final_type = "known"
+
+                target_results[target][final_type].append({
+                        "id": final_allele,
+                        "qcovs": result["qcovs"],
+                        "pident": float(result["pident"]),
+                        "bitscore": result["bitscore"],
+                })
+
+    final_allele_hits = {}
+    for target in targets:
+        final_allele_hits[target] = {
+            "id": "-",
+            "qcovs": 0,
+            "pident": 0,
+            "bitscore": 0,
+            "comment": "No hits met thresholds",
+        }
+
+    for target in target_results:
+        if len(target_results[target]["known"]):
+            # exact matches to known alleles were found
+            if len(target_results[target]["known"]) == 1:
+                final_allele_hits[target] = target_results[target]["known"][0]
+                final_allele_hits[target]["comment"] = ""
+            else:
+                # multiple hits
+                final_alleles = []
+                for hit in target_results[target]["known"]:
+                    final_alleles.append(hit["id"])
+
+                final_allele_hits[target] = target_results[target]["known"][0]
+                final_allele_hits[target]["id"] = ",".join(final_alleles)
+                final_allele_hits[target]["comment"] = "Exact matches to multiple alleles"
+        elif len(target_results[target]["novel"]):
+            # no exact matches to known alleles were found, but thresholds were met
+
+            # report the top scores
+            if len(target_results[target]["novel"]) == 1:
+                final_allele_hits[target] = target_results[target]["novel"][0]
+                final_allele_hits[target]["comment"] = ""
+            else:
+                # multiple hits, only report highest score
+                final_allele_hits[target] = sorted(target_results[target]["novel"], key=lambda x: x["bitscore"], reverse=True)[0]
+                final_allele_hits[target]["comment"] = "No exact matches to known alleles"
+
+    # Debugging information
+    logging.debug("camlhmp.engines.blast.get_blast_allele_hits")
+    logging.debug(f"Allele Hits: {final_allele_hits}")
+
+    return final_allele_hits
 
@@ -1881,9 +1881,7 @@

Source code in camlhmp/parsers/blast.py -
 99
-100
-101
+              
101
 102
 103
 104
@@ -1948,74 +1946,76 @@ 

163 164 165 -166

def get_blast_region_hits(
-    targets: dict, results: dict, min_pident: float, min_coverage: int
-) -> dict:
-    """
-    Aggregate multiple target hits for a region from the BLAST results.
-
-    Args:
-        targets (dict): The list of target sequences {id: len(seq)}
-        results (list of dict): The BLAST results
-        min_pident (float): The minimum percent identity to count a hit
-        min_coverage (int): The minimum percent coverage to count a hit
-
-    Returns:
-        dict: The target hits
-
-    Examples:
-        >>> from camlhmp.parsers.blast import get_blast_region_hits
-        >>> target_results = get_blast_region_hits(target_lengths, blast_stdout, min_pident, min_coverage)
-    """
-    # Aggregate the hits for each target
-    target_results = {}
-    for target, length in targets.items():
-        target_results[target] = {
-            "hits": [],
-            "coverage": [0] * length,  # Used to calculate coverage across multiple hits
-            "comment": [],
-        }
-
-    # Process each blast hit
-    for result in results:
-        # Only process real hits
-        if result["qseqid"] != "NO_HITS":
-            # Only keep hits that pass the minimum percent identity
-            if float(result["pident"]) >= min_pident:
-                # Add hit to list of hits
-                target_results[result["qseqid"]]["hits"].append(result)
-
-                # Set the coverage to 1 for each base in the hit
-                for i in range(int(result["qstart"]) - 1, int(result["qend"])):
-                    target_results[result["qseqid"]]["coverage"][i] += 1
-
-    # Determine coverage for each target
-    final_results = {}
-    for target, vals in target_results.items():
-        final_results[target] = {
-            "hits": vals["hits"],
-            "coverage": 100
-            * (
-                sum([1 for i in vals["coverage"] if i > 0])
-                / float(len(vals["coverage"]))
-            ),
-            "comment": [],
-        }
-        if len(vals["hits"]) > 1:
-            final_results[target]["comment"].append(
-                f"Coverage based on {len(vals['hits'])} hits"
-            )
-
-        if sum([1 for i in vals["coverage"] if i > 1]):
-            final_results[target]["comment"].append(
-                "There were one or more overlapping hits"
-            )
-
-    # Debugging information
-    logging.debug("camlhmp.engines.blast_region.get_blast_region_hits")
-    logging.debug(f"Profile Hits: {final_results}")
-
-    return final_results
+166
+167
+168
def get_blast_region_hits(
+    targets: dict, results: dict, min_pident: float, min_coverage: int
+) -> dict:
+    """
+    Aggregate multiple target hits for a region from the BLAST results.
+
+    Args:
+        targets (dict): The list of target sequences {id: len(seq)}
+        results (list of dict): The BLAST results
+        min_pident (float): The minimum percent identity to count a hit
+        min_coverage (int): The minimum percent coverage to count a hit
+
+    Returns:
+        dict: The target hits
+
+    Examples:
+        >>> from camlhmp.parsers.blast import get_blast_region_hits
+        >>> target_results = get_blast_region_hits(target_lengths, blast_stdout, min_pident, min_coverage)
+    """
+    # Aggregate the hits for each target
+    target_results = {}
+    for target, length in targets.items():
+        target_results[target] = {
+            "hits": [],
+            "coverage": [0] * length,  # Used to calculate coverage across multiple hits
+            "comment": [],
+        }
+
+    # Process each blast hit
+    for result in results:
+        # Only process real hits
+        if result["qseqid"] != "NO_HITS":
+            # Only keep hits that pass the minimum percent identity
+            if float(result["pident"]) >= min_pident:
+                # Add hit to list of hits
+                target_results[result["qseqid"]]["hits"].append(result)
+
+                # Set the coverage to 1 for each base in the hit
+                for i in range(int(result["qstart"]) - 1, int(result["qend"])):
+                    target_results[result["qseqid"]]["coverage"][i] += 1
+
+    # Determine coverage for each target
+    final_results = {}
+    for target, vals in target_results.items():
+        final_results[target] = {
+            "hits": vals["hits"],
+            "coverage": 100
+            * (
+                sum([1 for i in vals["coverage"] if i > 0])
+                / float(len(vals["coverage"]))
+            ),
+            "comment": [],
+        }
+        if len(vals["hits"]) > 1:
+            final_results[target]["comment"].append(
+                f"Coverage based on {len(vals['hits'])} hits"
+            )
+
+        if sum([1 for i in vals["coverage"] if i > 1]):
+            final_results[target]["comment"].append(
+                "There were one or more overlapping hits"
+            )
+
+    # Debugging information
+    logging.debug("camlhmp.engines.blast_region.get_blast_region_hits")
+    logging.debug(f"Profile Hits: {final_results}")
+
+    return final_results
 
@@ -2109,9 +2109,7 @@

Source code in camlhmp/parsers/blast.py -
169
-170
-171
+              
171
 172
 173
 174
@@ -2134,32 +2132,34 @@ 

191 192 193 -194

def get_blast_target_hits(targets: list, results: dict) -> dict:
-    """
-    Find the target hits in the BLAST results.
-
-    Args:
-        targets (list): The list of target sequences
-        results (dict): The BLAST results
-
-    Returns:
-        dict: The target hits
-
-    Examples:
-        >>> from camlhmp.parsers.blast import get_blast_target_hits
-        >>> target_results = get_blast_target_hits(framework["targets"], hits)
-    """
-    target_hits = {}
-    for target in targets:
-        target_hits[target] = False
-        if target in results:
-            target_hits[target] = True
-
-    # Debugging information
-    logging.debug("camlhmp.engines.blast.get_blast_target_hits")
-    logging.debug(f"Profile Hits: {target_hits}")
-
-    return target_hits
+194
+195
+196
def get_blast_target_hits(targets: list, results: dict) -> dict:
+    """
+    Find the target hits in the BLAST results.
+
+    Args:
+        targets (list): The list of target sequences
+        results (dict): The BLAST results
+
+    Returns:
+        dict: The target hits
+
+    Examples:
+        >>> from camlhmp.parsers.blast import get_blast_target_hits
+        >>> target_results = get_blast_target_hits(framework["targets"], hits)
+    """
+    target_hits = {}
+    for target in targets:
+        target_hits[target] = False
+        if target in results:
+            target_hits[target] = True
+
+    # Debugging information
+    logging.debug("camlhmp.engines.blast.get_blast_target_hits")
+    logging.debug(f"Profile Hits: {target_hits}")
+
+    return target_hits
 
diff --git a/v1.0.0/api/utils/index.html b/v1.0.0/api/utils/index.html index debee59..2dfe623 100644 --- a/v1.0.0/api/utils/index.html +++ b/v1.0.0/api/utils/index.html @@ -1782,8 +1782,7 @@

Source code in camlhmp/utils.py -
15
-16
+              
16
 17
 18
 19
@@ -1836,61 +1835,62 @@ 

66 67 68 -69

def execute(
-    cmd,
-    directory=Path.cwd(),
-    capture=False,
-    stdout_file=None,
-    stderr_file=None,
-    allow_fail=False,
-):
-    """
-    A simple wrapper around executor.
-
-    Args:
-        cmd (str): The command to be executed
-        directory (Path, optional): The directory to execute the command in. Defaults to Path.cwd().
-        capture (bool, optional): Capture the output of the command. Defaults to False.
-        stdout_file (Path, optional): The file to write stdout to. Defaults to None.
-        stderr_file (Path, optional): The file to write stderr to. Defaults to None.
-        allow_fail (bool, optional): Allow the command to fail. Defaults to False.
-
-    Returns:
-        Union[bool, list]: True if successful, otherwise a list of stdout and stderr
-
-    Raises:
-        ExternalCommandFailed: If the command fails and allow_fail is True
-
-    Examples:
-        >>> from camlhmp.utils import execute
-        >>> stdout, stderr = execute(
-                f"{cat_type} {subject} | {engine} -query {query} -subject - -outfmt '6 {outfmt}' {qcov_hsp_perc} {perc_identity}",
-                capture=True,
-            )
-    """
-    try:
-        command = ExternalCommand(
-            cmd,
-            directory=directory,
-            capture=True,
-            capture_stderr=True,
-            stdout_file=stdout_file,
-            stderr_file=stderr_file,
-        )
-
-        command.start()
-        logging.debug(command.decoded_stdout)
-        logging.debug(command.decoded_stderr)
-
-        if capture:
-            return [command.decoded_stdout, command.decoded_stderr]
-        return True
-    except ExternalCommandFailed as e:
-        if allow_fail:
-            logging.error(e)
-            sys.exit(e.returncode)
-        else:
-            return None
+69
+70
def execute(
+    cmd,
+    directory=Path.cwd(),
+    capture=False,
+    stdout_file=None,
+    stderr_file=None,
+    allow_fail=False,
+):
+    """
+    A simple wrapper around executor.
+
+    Args:
+        cmd (str): The command to be executed
+        directory (Path, optional): The directory to execute the command in. Defaults to Path.cwd().
+        capture (bool, optional): Capture the output of the command. Defaults to False.
+        stdout_file (Path, optional): The file to write stdout to. Defaults to None.
+        stderr_file (Path, optional): The file to write stderr to. Defaults to None.
+        allow_fail (bool, optional): Allow the command to fail. Defaults to False.
+
+    Returns:
+        Union[bool, list]: True if successful, otherwise a list of stdout and stderr
+
+    Raises:
+        ExternalCommandFailed: If the command fails and allow_fail is True
+
+    Examples:
+        >>> from camlhmp.utils import execute
+        >>> stdout, stderr = execute(
+                f"{cat_type} {subject} | {engine} -query {query} -subject - -outfmt '6 {outfmt}' {qcov_hsp_perc} {perc_identity}",
+                capture=True,
+            )
+    """
+    try:
+        command = ExternalCommand(
+            cmd,
+            directory=directory,
+            capture=True,
+            capture_stderr=True,
+            stdout_file=stdout_file,
+            stderr_file=stderr_file,
+        )
+
+        command.start()
+        logging.debug(command.decoded_stdout)
+        logging.debug(command.decoded_stderr)
+
+        if capture:
+            return [command.decoded_stdout, command.decoded_stderr]
+        return True
+    except ExternalCommandFailed as e:
+        if allow_fail:
+            logging.error(e)
+            sys.exit(e.returncode)
+        else:
+            return None
 
@@ -1918,8 +1918,7 @@

Source code in camlhmp/utils.py -
72
-73
+              
73
 74
 75
 76
@@ -1940,29 +1939,30 @@ 

91 92 93 -94

def check_dependencies():
-    """
-    Check if all dependencies are installed.
-
-    Examples:
-        >>> from camlhmp.utils import check_dependencies
-        >>> check_dependencies()
-    """
-    exit_code = 0
-    print("Checking dependencies...", file=sys.stderr)
-    for program in ["blastn"]:
-        which_path = which(program)
-        if which_path:
-            print(f"Found {program} at {which_path}", file=sys.stderr)
-        else:
-            print(f"{program} not found", file=sys.stderr)
-            exit_code = 1
-
-    if exit_code == 1:
-        print("Missing dependencies, please check.", file=sys.stderr)
-    else:
-        print("You are all set!", file=sys.stderr)
-    sys.exit(exit_code)
+94
+95
def check_dependencies():
+    """
+    Check if all dependencies are installed.
+
+    Examples:
+        >>> from camlhmp.utils import check_dependencies
+        >>> check_dependencies()
+    """
+    exit_code = 0
+    print("Checking dependencies...", file=sys.stderr)
+    for program in ["blastn"]:
+        which_path = which(program)
+        if which_path:
+            print(f"Found {program} at {which_path}", file=sys.stderr)
+        else:
+            print(f"{program} not found", file=sys.stderr)
+            exit_code = 1
+
+    if exit_code == 1:
+        print("Missing dependencies, please check.", file=sys.stderr)
+    else:
+        print("You are all set!", file=sys.stderr)
+    sys.exit(exit_code)
 
@@ -2013,8 +2013,7 @@

Source code in camlhmp/utils.py -
 97
- 98
+              
 98
  99
 100
 101
@@ -2030,24 +2029,25 @@ 

111 112 113 -114

def get_platform() -> str:
-    """
-    Get the platform of the executing machine
-
-    Returns:
-        str: The platform of the executing machine
-
-    Examples:
-        >>> from camlhmp.utils import get_platform
-        >>> platform = get_platform()
-    """
-    if platform == "darwin":
-        return "mac"
-    elif platform == "win32":
-        # Windows is not supported
-        logging.error("Windows is not supported.")
-        sys.exit(1)
-    return "linux"
+114
+115
def get_platform() -> str:
+    """
+    Get the platform of the executing machine
+
+    Returns:
+        str: The platform of the executing machine
+
+    Examples:
+        >>> from camlhmp.utils import get_platform
+        >>> platform = get_platform()
+    """
+    if platform == "darwin":
+        return "mac"
+    elif platform == "win32":
+        # Windows is not supported
+        logging.error("Windows is not supported.")
+        sys.exit(1)
+    return "linux"
 
@@ -2160,8 +2160,7 @@

Source code in camlhmp/utils.py -
117
-118
+              
118
 119
 120
 121
@@ -2183,30 +2182,31 @@ 

137 138 139 -140

def validate_file(filename: str) -> str:
-    """
-    Validate a file exists and not empty, if passing return the absolute path
-
-    Args:
-        filename (str): a file to validate exists
-
-    Returns:
-        str: absolute path to file
-
-    Raises:
-        FileNotFoundError: if the file does not exist
-        ValueError: if the file is empty
-
-    Examples:
-        >>> from camlhmp.utils import validate_file
-        >>> file = validate_file("data.fasta")
-    """
-    f = Path(filename)
-    if not f.exists():
-        raise FileNotFoundError(f"File ('{filename}') not found, cannot continue")
-    elif f.stat().st_size == 0:
-        raise ValueError(f"File ('{filename}') is empty, cannot continue")
-    return f.absolute()
+140
+141
def validate_file(filename: str) -> str:
+    """
+    Validate a file exists and not empty, if passing return the absolute path
+
+    Args:
+        filename (str): a file to validate exists
+
+    Returns:
+        str: absolute path to file
+
+    Raises:
+        FileNotFoundError: if the file does not exist
+        ValueError: if the file is empty
+
+    Examples:
+        >>> from camlhmp.utils import validate_file
+        >>> file = validate_file("data.fasta")
+    """
+    f = Path(filename)
+    if not f.exists():
+        raise FileNotFoundError(f"File ('{filename}') not found, cannot continue")
+    elif f.stat().st_size == 0:
+        raise ValueError(f"File ('{filename}') is empty, cannot continue")
+    return f.absolute()
 
@@ -2294,35 +2294,35 @@

Source code in camlhmp/utils.py -
def file_exists_error(filename: str, force: bool = False):
-    """
-    Determine if a file exists and raise an error if it does.
-
-    Args:
-        filename (str): the file to check
-        force (bool, optional): force overwrite. Defaults to False.
-
-    Raises:
-        FileExistsError: if the file exists and force is False
-    """
-    if Path(filename).exists() and not force:
-        raise FileExistsError(
-            f"Results already exists! Use --force to overwrite: {filename}"
-        )
+              
def file_exists_error(filename: str, force: bool = False):
+    """
+    Determine if a file exists and raise an error if it does.
+
+    Args:
+        filename (str): the file to check
+        force (bool, optional): force overwrite. Defaults to False.
+
+    Raises:
+        FileExistsError: if the file exists and force is False
+    """
+    if Path(filename).exists() and not force:
+        raise FileExistsError(
+            f"Results already exists! Use --force to overwrite: {filename}"
+        )
 
@@ -2416,39 +2416,39 @@

Source code in camlhmp/utils.py -
def parse_seq(seqfile: str, format: str) -> SeqIO:
-    """
-    Parse a sequence file containing a single record.
-
-    Args:
-        seqfile (str): input file to be read
-        format (str): format of the input file
-
-    Returns:
-        SeqIO: the parsed file as a SeqIO object
-
-    Examples:
-        >>> from camlhmp.utils import parse_seq
-        >>> seq = parse_seq("data.fasta", "fasta")
-    """
-    with open(seqfile, "rt") as fh:
-        return SeqIO.read(fh, format)
+              
def parse_seq(seqfile: str, format: str) -> SeqIO:
+    """
+    Parse a sequence file containing a single record.
+
+    Args:
+        seqfile (str): input file to be read
+        format (str): format of the input file
+
+    Returns:
+        SeqIO: the parsed file as a SeqIO object
+
+    Examples:
+        >>> from camlhmp.utils import parse_seq
+        >>> seq = parse_seq("data.fasta", "fasta")
+    """
+    with open(seqfile, "rt") as fh:
+        return SeqIO.read(fh, format)
 
@@ -2542,39 +2542,39 @@

Source code in camlhmp/utils.py -
def parse_seqs(seqfile: str, format: str) -> SeqIO:
-    """
-    Parse a sequence file containing a multiple records.
-
-    Args:
-        seqfile (str): input file to be read
-        format (str): format of the input file
-
-    Returns:
-        SeqIO: the parsed file as a SeqIO object
-
-    Examples:
-        >>> from camlhmp.utils import parse_seqs
-        >>> seqs = parse_seqs("data.fasta", "fasta")
-    """
-    with open(seqfile, "rt") as fh:
-        return list(SeqIO.parse(fh, format))
+              
def parse_seqs(seqfile: str, format: str) -> SeqIO:
+    """
+    Parse a sequence file containing a multiple records.
+
+    Args:
+        seqfile (str): input file to be read
+        format (str): format of the input file
+
+    Returns:
+        SeqIO: the parsed file as a SeqIO object
+
+    Examples:
+        >>> from camlhmp.utils import parse_seqs
+        >>> seqs = parse_seqs("data.fasta", "fasta")
+    """
+    with open(seqfile, "rt") as fh:
+        return list(SeqIO.parse(fh, format))
 
@@ -2682,59 +2682,59 @@

Source code in camlhmp/utils.py -
def parse_table(
-    csvfile: str, delimiter: str = "\t", has_header: bool = True
-) -> Union[list, dict]:
-    """
-    Parse a delimited file.
-
-    Args:
-        csvfile (str): input delimited file to be parsed
-        delimiter (str, optional): delimter used to separate column values. Defaults to '\t'.
-        has_header (bool, optional): the first line should be treated as a header. Defaults to True.
-
-    Returns:
-        Union[list, dict]: A dict is returned if a header is present, otherwise a list is returned
-
-    Examples:
-        >>> from camlhmp.utils import parse_table
-        >>> data = parse_table("data.tsv")
-    """
-    data = []
-    with open(csvfile, "rt") as fh:
-        for row in (
-            csv.DictReader(fh, delimiter=delimiter)
-            if has_header
-            else csv.reader(fh, delimiter=delimiter)
-        ):
-            data.append(row)
-    return data
+              
def parse_table(
+    csvfile: str, delimiter: str = "\t", has_header: bool = True
+) -> Union[list, dict]:
+    """
+    Parse a delimited file.
+
+    Args:
+        csvfile (str): input delimited file to be parsed
+        delimiter (str, optional): delimter used to separate column values. Defaults to '\t'.
+        has_header (bool, optional): the first line should be treated as a header. Defaults to True.
+
+    Returns:
+        Union[list, dict]: A dict is returned if a header is present, otherwise a list is returned
+
+    Examples:
+        >>> from camlhmp.utils import parse_table
+        >>> data = parse_table("data.tsv")
+    """
+    data = []
+    with open(csvfile, "rt") as fh:
+        for row in (
+            csv.DictReader(fh, delimiter=delimiter)
+            if has_header
+            else csv.reader(fh, delimiter=delimiter)
+        ):
+            data.append(row)
+    return data
 
@@ -2814,37 +2814,37 @@

Source code in camlhmp/utils.py -
def parse_yaml(yamlfile: str) -> Union[list, dict]:
-    """
-    Parse a YAML file.
-
-    Args:
-        yamlfile (str): input YAML file to be read
-
-    Returns:
-        Union[list, dict]: the values parsed from the YAML file
-
-    Examples:
-        >>> from camlhmp.utils import parse_yaml
-        >>> data = parse_yaml("data.yaml")
-    """
-    with open(yamlfile, "rt") as fh:
-        return yaml.safe_load(fh)
+              
def parse_yaml(yamlfile: str) -> Union[list, dict]:
+    """
+    Parse a YAML file.
+
+    Args:
+        yamlfile (str): input YAML file to be read
+
+    Returns:
+        Union[list, dict]: the values parsed from the YAML file
+
+    Examples:
+        >>> from camlhmp.utils import parse_yaml
+        >>> data = parse_yaml("data.yaml")
+    """
+    with open(yamlfile, "rt") as fh:
+        return yaml.safe_load(fh)
 
@@ -2915,49 +2915,49 @@

Source code in camlhmp/utils.py -
def write_tsv(data: list, output: str):
-    """
-    Write the dictionary to a TSV file.
-
-    Args:
-        data (list): a list of dicts to be written
-        output (str): The output file
-
-    Examples:
-        >>> from camlhmp.utils import write_tsv
-        >>> write_tsv(data, "results.tsv")
-    """
-    logging.debug(f"Writing TSV results to {output}")
-    with open(output, "w") as csvfile:
-        writer = csv.DictWriter(csvfile, delimiter="\t", fieldnames=data[0].keys())
-        writer.writeheader()
-        if next(iter(data[0].values())) != "NO_HITS":
-            # Data is not empty
-            writer.writerows(data)
-        else:
-            # Data is empty
-            logging.debug("NO_HITS found, only writing the column headers")
+              
def write_tsv(data: list, output: str):
+    """
+    Write the dictionary to a TSV file.
+
+    Args:
+        data (list): a list of dicts to be written
+        output (str): The output file
+
+    Examples:
+        >>> from camlhmp.utils import write_tsv
+        >>> write_tsv(data, "results.tsv")
+    """
+    logging.debug(f"Writing TSV results to {output}")
+    with open(output, "w") as csvfile:
+        writer = csv.DictWriter(csvfile, delimiter="\t", fieldnames=data[0].keys())
+        writer.writeheader()
+        if next(iter(data[0].values())) != "NO_HITS":
+            # Data is not empty
+            writer.writerows(data)
+        else:
+            # Data is empty
+            logging.debug("NO_HITS found, only writing the column headers")
 
diff --git a/v1.0.0/sitemap.xml b/v1.0.0/sitemap.xml index eab2cc3..d0b1d34 100644 --- a/v1.0.0/sitemap.xml +++ b/v1.0.0/sitemap.xml @@ -2,82 +2,82 @@ https://rpetit3.github.io/camlhmp/v1.0.0/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/CHANGELOG/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/about/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/available-tools/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/installation/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/schema/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/api/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/api/framework/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/api/utils/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/api/engines/blast/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/api/parsers/blast/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/cli/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/cli/camlhmp-extract/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/cli/blast/camlhmp-blast-alleles/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/cli/blast/camlhmp-blast-regions/ - 2024-08-15 + 2024-08-23 daily https://rpetit3.github.io/camlhmp/v1.0.0/cli/blast/camlhmp-blast-targets/ - 2024-08-15 + 2024-08-23 daily \ No newline at end of file diff --git a/v1.0.0/sitemap.xml.gz b/v1.0.0/sitemap.xml.gz index 3ddd58b6f4d616fc3b616bcdeb0a770198150bc4..ba35de8fb454cee98fcbf57ad46295cda033c78d 100644 GIT binary patch literal 350 zcmV-k0ipgMiwFn+(Z^;2|8r?{Wo=<_E_iKh0M(YkPQx$^hVOfds`oZ!<1kR$2_YB< z1Q&RKH1pDkG%2yWj<+XW2ihG8$+;x9oL`?KTTZz-b~ZUcK;vDNUgbrafKz^FoUYRM zx5sRqZt6uj1dBjP40c$hEm40f#5j&Q8iG#tnERjwsx}%Z&Q4L}H)*{{;+h2J(`l8~ zt9nwwC=+@hL#tdHYKJB_UMShww(G?q^5lgm#T0&dTOxISS1*g@bylpi zl}^Jxz-PUa#(tIJ!ZTxuU#>q9-ap;FY#*Lqw+vWi;|Jovd5HS5v5=AX78wv5ha|0) z6nlvQ?%b^$Dxo(_DEC2jF#7Ppgm)mrqO&GeaN0OP(M%*vn0py8&Q>|B%6Hi$Cf58%IxsP*d2-5J+@|F$iE9*?PRCVP zugg&dqfF?43|)!ZP}5h!c&Sxuo3@kt$Rl!D$T9rnvYp^P3oQ@|}h0 zM!6bx0lvzuGWNTWXPya5{&8JI`0#xHx_Nwg+c02NmG6lI=RWGI%0fooTVz0N9FnqD zQS2oKxKp=ssD#ciq1*-4!r;Rv6W*Q-i_RKZ!PUkA%4#HG!rZBVakjEy*48kgJh?JB xP*9r7+fH@x%YL-7I5EHiGR`1C9VZ^=3`m6<=pQmTZt*8w{s1Tk!)|#C005K;seu3h