diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1bc666ca..25e2bf00 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -72,6 +72,7 @@ jobs: ppanggolin write -p stepbystep/pangenome.h5 --output stepbystep -f --soft_core 0.9 --dup_margin 0.06 --gexf --light_gexf --csv --Rtab --projection --stats --partitions --compress --json --regions --spots --borders --families_tsv --cpu 1 ppanggolin fasta -p stepbystep/pangenome.h5 --output stepbystep -f --prot_families all --gene_families shell --regions all --fasta organisms.fasta.list ppanggolin draw -p stepbystep/pangenome.h5 --spots all -o stepbystep -f + ppanggolin metrics -p stepbystep/pangenome.h5 --genome_fluidity --info_modules --no_print_info -f cd - - name: gbff parsing and MSA computing shell: bash -l {0} diff --git a/VERSION b/VERSION index 1bb4cc13..03fcdbc4 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.72 \ No newline at end of file +1.2.73 diff --git a/ppanggolin/metrics/fluidity.py b/ppanggolin/metrics/fluidity.py index a7903d49..b302abc0 100644 --- a/ppanggolin/metrics/fluidity.py +++ b/ppanggolin/metrics/fluidity.py @@ -37,7 +37,7 @@ def genomes_fluidity(pangenome, disable_bar=False): g_sum = 0 logging.getLogger().debug("Get number of families in each organisms") org2_nb_fam = nb_fam_per_org(pangenome, disable_bar) - logging.getLogger().info("Compute rate of unique family for each genome combination") + logging.getLogger().info(f"Compute rate of unique family for each genome combination in {subset}") for c_organisms in tqdm(list(combinations(pangenome.organisms, 2)), unit="combination", disable=disable_bar): tot_fam = org2_nb_fam.get(c_organisms[0].name) + org2_nb_fam.get(c_organisms[1].name) common_fam = popcount(c_organisms[0].bitarray & c_organisms[1].bitarray) - 1 diff --git a/ppanggolin/metrics/metrics.py b/ppanggolin/metrics/metrics.py index 1e37305c..cb3a5545 100644 --- a/ppanggolin/metrics/metrics.py +++ b/ppanggolin/metrics/metrics.py @@ -29,8 +29,8 @@ def check_metric(pangenome, all=False, genome_fluidity=False, family_fluidity=Fa "Please use -f option if you REALLY want to compute again") - -def compute_metrics(pangenome, all=False, genome_fluidity=False, family_fluidity=False, disable_bar=False): +def compute_metrics(pangenome, all=False, genome_fluidity=False, family_fluidity=False, info_modules=False, + disable_bar=False): """Compute the metrics :param pangenome: pangenome which will be used to compute the genomes fluidity :type pangenome: Pangenome @@ -40,6 +40,8 @@ def compute_metrics(pangenome, all=False, genome_fluidity=False, family_fluidity :type genome_fluidity: bool :param family_fluidity: Ask to compute family fluidity :type family_fluidity: bool + :param info_modules: Ask to compute more information about module + :type info_modules: bool :param disable_bar: Disable the progress bar :type disable_bar: bool @@ -53,7 +55,7 @@ def compute_metrics(pangenome, all=False, genome_fluidity=False, family_fluidity metrics_dict['genome_fluidity'] = genomes_fluidity(pangenome, disable_bar) if family_fluidity or all: metrics_dict['family_fluidity'] = fam_fluidity(pangenome, disable_bar) - if info_modules: + if info_modules or all: checkPangenomeInfo(pangenome, needFamilies=True, needModules=True) metrics_dict['info_modules'] = True return metrics_dict @@ -76,21 +78,20 @@ def write_metrics(pangenome, metrics_dict, no_print_info=False): if 'family_fluidity' in metrics_dict.keys(): logging.getLogger().info("Writing family fluidity in pangenome") info_group._v_attrs.family_fluidity = metrics_dict['family_fluidity'] - + if 'info_modules' in metrics_dict.keys(): + logging.getLogger().info("Writing modules information in pangenome") writeInfoModules(pangenome, h5f) # After all metrics was written if not no_print_info: readInfo(h5f) - readInfo(h5f) - def launch(args): if not any(x for x in [args.genome_fluidity, args.family_fluidity, args.info_modules, args.all]): raise Exception("You did not indicate which metric you want to compute.") - + pangenome = Pangenome() pangenome.addFile(args.pangenome) @@ -98,8 +99,9 @@ def launch(args): check_metric(pangenome, all=args.all, genome_fluidity=args.genome_fluidity, family_fluidity=args.family_fluidity, force=args.force) logging.getLogger().info("Metrics computation begin") - metrics_dictionary = compute_metrics(pangenome, all=args.all, genome_fluidity=args.genome_fluidity, info_modules=args.info_modules, - family_fluidity=args.family_fluidity, disable_bar=args.disable_prog_bar) + metrics_dictionary = compute_metrics(pangenome, all=args.all, genome_fluidity=args.genome_fluidity, + family_fluidity=args.family_fluidity, info_modules=args.info_modules, + disable_bar=args.disable_prog_bar) logging.getLogger().info("Metrics computation done") write_metrics(pangenome, metrics_dictionary, no_print_info=args.no_print_info)