From ce4bfdb300cc5b7244092bf50d7a848bffc207e0 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@ls2n.fr>
Date: Fri, 30 Apr 2021 23:47:39 +0200
Subject: [PATCH 1/3] black

---
 birdvoxdetect/core.py | 242 ++++++++++++++++++++++++++----------------
 1 file changed, 150 insertions(+), 92 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 7139e3a..98629cb 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -225,15 +225,25 @@ def process_file(
         event_4lettercodes = []
         event_confidences = []
         if set(taxonomy["output_encoding"]) == {"fine"}:
-            df_columns = ["Time (hh:mm:ss)", "Detection confidence (%)",
-                "Species (4-letter code)", "Species confidence (%)"]
+            df_columns = [
+                "Time (hh:mm:ss)",
+                "Detection confidence (%)",
+                "Species (4-letter code)",
+                "Species confidence (%)",
+            ]
         elif set(taxonomy["output_encoding"]) == {"fine", "medium", "coarse"}:
-            df_columns = ["Time (hh:mm:ss)", "Detection confidence (%)",
-                "Order", "Order confidence (%)",
-                "Family", "Family confidence (%)",
-                "Species (4-letter code)", "Species confidence (%)"]
+            df_columns = [
+                "Time (hh:mm:ss)",
+                "Detection confidence (%)",
+                "Order",
+                "Order confidence (%)",
+                "Family",
+                "Family confidence (%)",
+                "Species (4-letter code)",
+                "Species confidence (%)",
+            ]
         df = pd.DataFrame(columns=df_columns)
-        df.to_csv(checklist_path,index=False)
+        df.to_csv(checklist_path, index=False)
 
     # Initialize fault log as a Pandas DataFrame.
     faultlist_path = get_output_path(
@@ -246,14 +256,11 @@ def process_file(
     ]
     faultlist_df = pd.DataFrame(columns=faultlist_df_columns)
     if export_faults:
-        faultlist_df.to_csv(
-            faultlist_path, columns=faultlist_df_columns, index=False)
+        faultlist_df.to_csv(faultlist_path, columns=faultlist_df_columns, index=False)
 
     # Initialize JSON output.
     if predict_proba:
-        json_path = get_output_path(
-            filepath, suffix + "proba.json", output_dir
-        )
+        json_path = get_output_path(filepath, suffix + "proba.json", output_dir)
         # Get MD5 hash.
         hash_md5 = hashlib.md5()
         with open(filepath, "rb") as fhandle:
@@ -278,7 +285,7 @@ def process_file(
             "platform_release": platform.release(),
             "platform_system": platform.system(),
             "platform_version": platform.version(),
-            "sys_version": sys.version
+            "sys_version": sys.version,
         }
         with open(json_path, "w") as f:
             json.dump({"metadata": json_metadata, "taxonomy": taxonomy}, f)
@@ -435,7 +442,8 @@ def process_file(
         none_peak_ids = []
         for peak_id, th_peak_loc in enumerate(th_peak_locs):
             row, json_dict = classify_species(
-                classifier, chunk_pcen, th_peak_loc, taxonomy)
+                classifier, chunk_pcen, th_peak_loc, taxonomy
+            )
             if row is None:
                 none_peak_ids.append(peak_id)
                 continue
@@ -444,21 +452,25 @@ def process_file(
                 chunk_timestamp = chunk_timestamps[peak_id]
                 json_dict["Time (s)"] = float(chunk_timestamp)
                 json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp)
-                json_dict["Detection confidence (%)"] = float(th_peak_confidences[peak_id])
+                json_dict["Detection confidence (%)"] = float(
+                    th_peak_confidences[peak_id]
+                )
                 json_dicts.append(json_dict)
         th_peak_confidences = [
-            th_peak_confidences[peak_id] for peak_id in range(len(th_peak_locs))
+            th_peak_confidences[peak_id]
+            for peak_id in range(len(th_peak_locs))
             if peak_id not in none_peak_ids
         ]
         chunk_timestamps = [
-            chunk_timestamps[peak_id] for peak_id in range(len(th_peak_locs))
+            chunk_timestamps[peak_id]
+            for peak_id in range(len(th_peak_locs))
             if peak_id not in none_peak_ids
         ]
         n_peaks = len(chunk_timestamps)
         chunk_df = pd.DataFrame(rows, columns=df_columns)
 
         # Count flight calls.
-        if n_peaks>0:
+        if n_peaks > 0:
             chunk_counter = collections.Counter(chunk_df["Species (4-letter code)"])
             logger.info("Number of flight calls in current chunk: {}".format(n_peaks))
             logger.info(
@@ -476,27 +488,37 @@ def process_file(
         chunk_hhmmss = list(map(seconds_to_hhmmss, chunk_timestamps))
         chunk_df["Time (hh:mm:ss)"] = event_hhmmss + chunk_hhmmss
         chunk_df["Detection confidence (%)"] = th_peak_confidences
-        df_columns = [column for column in
-            [
-                "Time (hh:mm:ss)", "Detection confidence (%)",
-                "Order", "Order confidence (%)",
-                "Family", "Family confidence (%)",
-                "Species (4-letter code)", "Species confidence (%)"]
-            if column in chunk_df]
+        df_columns = [
+            column
+            for column in [
+                "Time (hh:mm:ss)",
+                "Detection confidence (%)",
+                "Order",
+                "Order confidence (%)",
+                "Family",
+                "Family confidence (%)",
+                "Species (4-letter code)",
+                "Species confidence (%)",
+            ]
+            if column in chunk_df
+        ]
         df = df.append(chunk_df)
         df.to_csv(checklist_path, columns=df_columns, index=False)
 
         # Export probabilities as JSON file.
         if predict_proba:
             with open(json_path, "w") as f:
-                json.dump({
-                    "events": json_dicts,
-                    "metadata": json_metadata,
-                    "taxonomy": taxonomy
-                }, f)
+                json.dump(
+                    {
+                        "events": json_dicts,
+                        "metadata": json_metadata,
+                        "taxonomy": taxonomy,
+                    },
+                    f,
+                )
 
         # Export clips.
-        if export_clips and len(df)>0:
+        if export_clips and len(df) > 0:
             chunk_zip = zip(
                 chunk_timestamps,
                 chunk_hhmmss,
@@ -572,9 +594,7 @@ def process_file(
         faultlist_df = faultlist_df.append(
             {
                 "Start (hh:mm:ss)": seconds_to_hhmmss(chunk_id * chunk_duration),
-                "Stop (hh:mm:ss)": seconds_to_hhmmss(
-                    (chunk_id + 1) * chunk_duration
-                ),
+                "Stop (hh:mm:ss)": seconds_to_hhmmss((chunk_id + 1) * chunk_duration),
                 "Fault confidence (%)": int(sensor_fault_probability * 100),
             },
             ignore_index=True,
@@ -585,7 +605,7 @@ def process_file(
             )
 
         # If probability of sensor fault is above threshold, exclude chunk.
-        has_sensor_fault = (sensor_fault_probability > bva_threshold)
+        has_sensor_fault = sensor_fault_probability > bva_threshold
         if has_sensor_fault:
             logger.info(
                 "Probability of sensor fault: {:5.2f}%".format(
@@ -650,30 +670,35 @@ def process_file(
         none_peak_ids = []
         for peak_id, th_peak_loc in enumerate(th_peak_locs):
             row, json_dict = classify_species(
-                classifier, chunk_pcen, th_peak_loc, taxonomy)
+                classifier, chunk_pcen, th_peak_loc, taxonomy
+            )
             if row is None:
                 none_peak_ids.append(peak_id)
                 continue
             rows.append(row)
             if predict_proba:
                 chunk_timestamp = chunk_timestamps[peak_id]
-                json_dict["Time (s)"] = float(chunk_timestamp),
+                json_dict["Time (s)"] = (float(chunk_timestamp),)
                 json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp)
-                json_dict["Detection confidence (%)"] = float(th_peak_confidences[peak_id]),
+                json_dict["Detection confidence (%)"] = (
+                    float(th_peak_confidences[peak_id]),
+                )
                 json_dicts.append(json_dict)
         th_peak_confidences = [
-            th_peak_confidences[peak_id] for peak_id in range(len(th_peak_locs))
+            th_peak_confidences[peak_id]
+            for peak_id in range(len(th_peak_locs))
             if peak_id not in none_peak_ids
         ]
         chunk_timestamps = [
-            chunk_timestamps[peak_id] for peak_id in range(len(th_peak_locs))
+            chunk_timestamps[peak_id]
+            for peak_id in range(len(th_peak_locs))
             if peak_id not in none_peak_ids
         ]
         n_peaks = len(chunk_timestamps)
         chunk_df = pd.DataFrame(rows, columns=df_columns)
 
         # Count flight calls.
-        if n_peaks>0:
+        if n_peaks > 0:
             chunk_counter = collections.Counter(chunk_df["Species (4-letter code)"])
             logger.info("Number of flight calls in current chunk: {}".format(n_peaks))
             logger.info(
@@ -691,26 +716,37 @@ def process_file(
         chunk_hhmmss = list(map(seconds_to_hhmmss, chunk_timestamps))
         chunk_df["Time (hh:mm:ss)"] = event_hhmmss + chunk_hhmmss
         chunk_df["Detection confidence (%)"] = th_peak_confidences
-        df_columns = [column for column in
-            ["Time (hh:mm:ss)", "Detection confidence (%)",
-            "Order", "Order confidence (%)",
-            "Family", "Family confidence (%)",
-            "Species (4-letter code)", "Species confidence (%)"]
-                if column in chunk_df]
+        df_columns = [
+            column
+            for column in [
+                "Time (hh:mm:ss)",
+                "Detection confidence (%)",
+                "Order",
+                "Order confidence (%)",
+                "Family",
+                "Family confidence (%)",
+                "Species (4-letter code)",
+                "Species confidence (%)",
+            ]
+            if column in chunk_df
+        ]
         df = df.append(chunk_df)
         df.to_csv(checklist_path, columns=df_columns, index=False)
 
         # Export probabilities as JSON file.
         if predict_proba:
             with open(json_path, "w") as f:
-                json.dump({
-                    "events": json_dicts,
-                    "metadata": json_metadata,
-                    "taxonomy": taxonomy
-                }, f)
+                json.dump(
+                    {
+                        "events": json_dicts,
+                        "metadata": json_metadata,
+                        "taxonomy": taxonomy,
+                    },
+                    f,
+                )
 
         # Export clips.
-        if export_clips and len(df)>0:
+        if export_clips and len(df) > 0:
             chunk_zip = zip(
                 chunk_timestamps,
                 chunk_hhmmss,
@@ -751,7 +787,7 @@ def process_file(
     # unstable with files shorter than 30 minutes, which is why we issue a
     # warning. Also, we do not try to detect sensor faults in files shorter than
     # 30 minutes.
-    if (n_chunks > 1):
+    if n_chunks > 1:
         faultlist_df = faultlist_df.append(
             {
                 "Start (hh:mm:ss)": seconds_to_hhmmss(chunk_id * chunk_duration),
@@ -762,7 +798,8 @@ def process_file(
         )
         if export_faults:
             faultlist_df.to_csv(
-                faultlist_path, columns=faultlist_df_columns, index=False)
+                faultlist_path, columns=faultlist_df_columns, index=False
+            )
 
     if (n_chunks > 1) and has_sensor_fault:
         logger.info(
@@ -869,7 +906,8 @@ def process_file(
             none_peak_ids = []
             for peak_id, th_peak_loc in enumerate(th_peak_locs):
                 row, json_dict = classify_species(
-                    classifier, chunk_pcen, th_peak_loc, taxonomy)
+                    classifier, chunk_pcen, th_peak_loc, taxonomy
+                )
                 if row is None:
                     none_peak_ids.append(peak_id)
                     continue
@@ -878,23 +916,29 @@ def process_file(
                     chunk_timestamp = chunk_timestamps[peak_id]
                     json_dict["Time (s)"] = float(chunk_timestamp)
                     json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp)
-                    json_dict["Detection confidence (%)"] = float(th_peak_confidences[peak_id])
+                    json_dict["Detection confidence (%)"] = float(
+                        th_peak_confidences[peak_id]
+                    )
                     json_dicts.append(json_dict)
             th_peak_confidences = [
-                th_peak_confidences[peak_id] for peak_id in range(len(th_peak_locs))
+                th_peak_confidences[peak_id]
+                for peak_id in range(len(th_peak_locs))
                 if peak_id not in none_peak_ids
             ]
             chunk_timestamps = [
-                chunk_timestamps[peak_id] for peak_id in range(len(th_peak_locs))
+                chunk_timestamps[peak_id]
+                for peak_id in range(len(th_peak_locs))
                 if peak_id not in none_peak_ids
             ]
             n_peaks = len(chunk_timestamps)
             chunk_df = pd.DataFrame(rows, columns=df_columns)
 
             # Count flight calls.
-            if n_peaks>0:
+            if n_peaks > 0:
                 chunk_counter = collections.Counter(chunk_df["Species (4-letter code)"])
-                logger.info("Number of flight calls in current chunk: {}".format(n_peaks))
+                logger.info(
+                    "Number of flight calls in current chunk: {}".format(n_peaks)
+                )
                 logger.info(
                     "("
                     + ", ".join(
@@ -910,12 +954,20 @@ def process_file(
             chunk_hhmmss = list(map(seconds_to_hhmmss, chunk_timestamps))
             chunk_df["Time (hh:mm:ss)"] = event_hhmmss + chunk_hhmmss
             chunk_df["Detection confidence (%)"] = th_peak_confidences
-            df_columns = [column for column in
-                ["Time (hh:mm:ss)", "Detection confidence (%)",
-                "Order", "Order confidence (%)",
-                "Family", "Family confidence (%)",
-                "Species (4-letter code)", "Species confidence (%)"]
-                    if column in chunk_df]
+            df_columns = [
+                column
+                for column in [
+                    "Time (hh:mm:ss)",
+                    "Detection confidence (%)",
+                    "Order",
+                    "Order confidence (%)",
+                    "Family",
+                    "Family confidence (%)",
+                    "Species (4-letter code)",
+                    "Species confidence (%)",
+                ]
+                if column in chunk_df
+            ]
             df = df.append(chunk_df)
             df.to_csv(checklist_path, columns=df_columns, index=False)
 
@@ -924,15 +976,18 @@ def process_file(
                 with open(json_path, "w") as f:
                     json_faultlist = faultlist_df.to_json(orient="index")
                     json_metadata["elapsed_time"] = time.time() - start_time
-                    json.dump({
-                        "events": json_dicts,
-                        "metadata": json_metadata,
-                        "sensor_faults": json.loads(json_faultlist),
-                        "taxonomy": taxonomy
-                    }, f)
+                    json.dump(
+                        {
+                            "events": json_dicts,
+                            "metadata": json_metadata,
+                            "sensor_faults": json.loads(json_faultlist),
+                            "taxonomy": taxonomy,
+                        },
+                        f,
+                    )
 
             # Export clips.
-            if export_clips and len(df)>0:
+            if export_clips and len(df) > 0:
                 chunk_zip = zip(
                     chunk_timestamps,
                     chunk_hhmmss,
@@ -1037,7 +1092,7 @@ def process_file(
     # Print final messages.
     if threshold is not None:
         df = pd.read_csv(checklist_path)
-        if (len(df)>0) and ("Species (4-letter code)" in df.columns):
+        if (len(df) > 0) and ("Species (4-letter code)" in df.columns):
             logger.info(
                 "\n".join(
                     [
@@ -1094,7 +1149,8 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy):
 
     # Format prediction
     formatted_prediction = birdvoxclassify.format_pred(
-        bvc_prediction, taxonomy=taxonomy)
+        bvc_prediction, taxonomy=taxonomy
+    )
 
     # Get prediction levels.
     pred_levels = list(formatted_prediction.keys())
@@ -1109,14 +1165,14 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy):
         max_prob = prob_dict[argmax_taxon]
         argmax_prediction = {
             "Species (4-letter code)": "OTHE",
-            "Species confidence (%)": 100*(1-max_prob)
+            "Species confidence (%)": 100 * (1 - max_prob),
         }
-        if max_prob>0.5:
+        if max_prob > 0.5:
             argmax_dict = formatted_prediction["fine"][argmax_taxon]
             aliases = argmax_dict["taxonomy_level_aliases"]
             alias = aliases["species_4letter_code"]
             argmax_prediction["Species (4-letter code)"] = alias
-            argmax_prediction["Species confidence (%)"] = 100*max_prob
+            argmax_prediction["Species confidence (%)"] = 100 * max_prob
         return argmax_prediction, formatted_prediction
 
     # Case of a hierarchical classifier. (ex: TaxoNet)
@@ -1130,18 +1186,19 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy):
         max_prob = prob_dict[argmax_taxon]
         argmax_prediction = {
             "Order": "other",
-            "Order confidence (%)": 100*(1-max_prob)
+            "Order confidence (%)": 100 * (1 - max_prob),
         }
-        if max_prob>0.5:
+        if max_prob > 0.5:
             argmax_dict = formatted_prediction["coarse"][argmax_taxon]
             argmax_prediction["Order"] = argmax_dict["scientific_name"]
-            argmax_prediction["Order confidence (%)"] = 100*max_prob
+            argmax_prediction["Order confidence (%)"] = 100 * max_prob
 
         # Medium level: family.
         if argmax_prediction["Order"] == "other":
             argmax_prediction["Family"] = "other"
-            argmax_prediction["Family confidence (%)"] =\
-                argmax_prediction["Order confidence (%)"]
+            argmax_prediction["Family confidence (%)"] = argmax_prediction[
+                "Order confidence (%)"
+            ]
         else:
             prob_dict = {
                 k: formatted_prediction["medium"][k]["probability"]
@@ -1150,17 +1207,18 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy):
             argmax_taxon = max(prob_dict.items(), key=operator.itemgetter(1))[0]
             max_prob = prob_dict[argmax_taxon]
             argmax_prediction["Family"] = "other"
-            argmax_prediction["Family confidence (%)"] = 100*(1-max_prob)
-            if max_prob>0.5:
+            argmax_prediction["Family confidence (%)"] = 100 * (1 - max_prob)
+            if max_prob > 0.5:
                 argmax_dict = formatted_prediction["medium"][argmax_taxon]
                 argmax_prediction["Family"] = argmax_dict["scientific_name"]
-                argmax_prediction["Family confidence (%)"] = 100*max_prob
+                argmax_prediction["Family confidence (%)"] = 100 * max_prob
 
         # Fine level: species.
         if argmax_prediction["Family"] == "other":
             argmax_prediction["Species (4-letter code)"] = "OTHE"
-            argmax_prediction["Species confidence (%)"] =\
-                argmax_prediction["Family confidence (%)"]
+            argmax_prediction["Species confidence (%)"] = argmax_prediction[
+                "Family confidence (%)"
+            ]
         else:
             prob_dict = {
                 k: formatted_prediction["fine"][k]["probability"]
@@ -1170,13 +1228,13 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy):
             argmax_taxon = max(prob_dict.items(), key=operator.itemgetter(1))[0]
             max_prob = prob_dict[argmax_taxon]
             argmax_prediction["Species (4-letter code)"] = "OTHE"
-            argmax_prediction["Species confidence (%)"] = 100*(1-max_prob)
-            if max_prob>0.5:
+            argmax_prediction["Species confidence (%)"] = 100 * (1 - max_prob)
+            if max_prob > 0.5:
                 argmax_dict = formatted_prediction["fine"][argmax_taxon]
                 aliases = argmax_dict["taxonomy_level_aliases"]
                 alias = aliases["species_4letter_code"]
                 argmax_prediction["Species (4-letter code)"] = alias
-                argmax_prediction["Species confidence (%)"] = 100*max_prob
+                argmax_prediction["Species confidence (%)"] = 100 * max_prob
 
     return argmax_prediction, formatted_prediction
 

From 8a2a5b3ff58370dedb4d46e897536aef7f0c9cf1 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@ls2n.fr>
Date: Fri, 30 Apr 2021 23:47:52 +0200
Subject: [PATCH 2/3] set default classifier to flat-multitask-convnet-v2

---
 birdvoxdetect/core.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 98629cb..7ceeb17 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -61,7 +61,10 @@ def process_file(
     logger_level=logging.INFO,
     detector_name="birdvoxdetect-v03_trial-12_network_epoch-068",
     classifier_name="_".join(
-        ["birdvoxclassify-taxonet", "tv1hierarchical-2e7e1bbd434a35b3961e315cfe3832fc"]
+        [
+            "birdvoxclassify-flat-multitask-convnet-v2",
+            "tv1hierarchical-2e7e1bbd434a35b3961e315cfe3832fc",
+        ]
     ),
     custom_objects=None,
     bva_threshold=0.5,

From 6857820418d5f8317c68e0504c3f661e3d5664e7 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@ls2n.fr>
Date: Sat, 1 May 2021 00:20:04 +0200
Subject: [PATCH 3/3] from collections.abc import Iterable

---
 birdvoxdetect/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/birdvoxdetect/cli.py b/birdvoxdetect/cli.py
index fef6e45..301b812 100644
--- a/birdvoxdetect/cli.py
+++ b/birdvoxdetect/cli.py
@@ -1,6 +1,6 @@
 from __future__ import print_function
 from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentTypeError
-from collections import Iterable
+from collections.abc import Iterable
 import logging
 import numpy as np
 import os