bigmlcom · jaor · Jun 13, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 12, 2024
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -3,6 +3,11 @@
 History
 -------
 
+5.9.0 (2024-06-12)
+~~~~~~~~~~~~~~~~~~
+
+- Adding MS COCO to BigML-COCO translator.
+
 5.8.1 (2024-05-31)
 ~~~~~~~~~~~~~~~~~~
 

diff --git a/README.rst b/README.rst
@@ -14,16 +14,6 @@ the
 BigMLer is open sourced under the `Apache License, Version
 2.0 <http://www.apache.org/licenses/LICENSE-2.0.html>`_.
 
-Support
-=======
-
-Please report problems and bugs to our `BigML.io issue
-tracker <https://github.com/bigmlcom/io/issues>`_.
-
-Discussions about the different bindings take place in the general
-`BigML mailing list <http://groups.google.com/group/bigml>`_. Or join us
-in our `Campfire chatroom <https://bigmlinc.campfirenow.com/f20a0>`_.
-
 Requirements
 ============
 
@@ -54,7 +44,7 @@ using:
 The external libraries used in this case exist for the majority of recent
 Operating System versions. Still, some of them might need especific
 compiler versions or dlls, so their installation may require an additional
-setup effort.
+setup effort and will not be supported by default.
 
 The full set of libraries can be installed using
 
@@ -146,32 +136,26 @@ For a detailed description of authentication instructions on Windows see the
 BigMLer on Windows
 ==================
 
-To install BigMLer on Windows environments, you'll need `Python for Windows
-(v.2.7.x) <http://www.python.org/download/>`_ installed.
-
-In addition to that, you'll need the ``pip`` tool to install BigMLer. To
-install pip, first you need to open your command line window (write ``cmd`` in
-the input field that appears when you click on ``Start`` and hit ``enter``),
-download this `python file <http://python-distribute.org/distribute_setup.py>`_
-and execute it
-
-.. code-block:: bash
-
-    c:\Python27\python.exe distribute_setup.py
-
-After that, you'll be able to install ``pip`` by typing the following command
+To install BigMLer on Windows environments, you'll need Python installed.
+The code has been tested with Python 3.10 and you can create a *conda*
+environment with that Python version or download it from `Python for Windows
+<http://www.python.org/download/>`_ and install it. In the latter case, you'll
+also need too install the ``pip`` tool to install BigMLer.
 
-.. code-block:: bash
-
-    c:\Python27\Scripts\easy_install.exe pip
+To install ``pip``, first you need to open your command terminal window
+(write ``cmd`` in
+the input field that appears when you click on ``Start`` and hit ``enter``).
+Then you can follow the steps described, for example, in this `guide
+<https://monovm.com/blog/how-to-install-pip-on-windows-linux/#How-to-install-PIP-on-Windows?-[A-Step-by-Step-Guide]>`_
+to install its latest version.
 
-And finally, to install BigMLer, just type
+And finally, to install BigMLer in its basic capacities, just type
 
 .. code-block:: bash
 
-    c:\Python27\Scripts\pip.exe install bigmler
+    python -m pip install bigmler
 
-and BigMLer should be installed in your computer. Then
+and BigMLer should be installed in your computer or conda environment. Then
 issuing
 
 .. code-block:: bash
@@ -180,6 +164,11 @@ issuing
 
 should show BigMLer version information.
 
+Extensions of BigMLer to use images are usually not available in Windows.
+The libraries needed for those models are not available usually for that
+operating system. If your Machine Learning project involves images, we
+recommend that you choose a Linux based operating system.
+
 Finally, to start using BigMLer to handle your BigML resources, you need to
 set your credentials in BigML for authentication. If you want them to be
 permanently stored in your system, use
@@ -189,6 +178,9 @@ permanently stored in your system, use
     setx BIGML_USERNAME myusername
     setx BIGML_API_KEY ae579e7e53fb9abd646a6ff8aa99d4afe83ac291
 
+Note that ``setx`` will not change the environment variables of your actual
+console, so you will need to open a new one to start using them.
+
 
 BigML Development Mode
 ======================
@@ -347,3 +339,13 @@ Additional Information
 
 For additional information, see
 the `full documentation for BigMLer on Read the Docs <http://bigmler.readthedocs.org>`_.
+
+
+Support
+=======
+
+Please report problems and bugs to our `BigML.io issue
+tracker <https://github.com/bigmlcom/io/issues>`_.
+
+Discussions about the different bindings take place in the general
+`BigML mailing list <http://groups.google.com/group/bigml>`_.
diff --git a/bigmler/__init__.py b/bigmler/__init__.py
@@ -1,2 +1,2 @@
 # -*- coding: utf-8 -*-
-__version__ = '5.8.1'
+__version__ = '5.9.0'
diff --git a/bigmler/analyze/dispatcher.py b/bigmler/analyze/dispatcher.py
@@ -96,7 +96,7 @@ def analyze_dispatcher(args=sys.argv[1:]):
         command_args.optimize = command_args.maximize
     incompatible_flags = [command_args.cv, command_args.features,
                           command_args.nodes, command_args.random_fields]
-    if sum([int(bool(flag)) for flag in incompatible_flags]) > 1:
+    if sum(int(bool(flag)) for flag in incompatible_flags) > 1:
         sys.exit("The following flags cannot be used together:\n    --features"
                  "\n    --cross-validation\n    --nodes\n    --random-fields")
     if (command_args.dataset is None and command_args.datasets is None and

diff --git a/bigmler/analyze/k_fold_cv.py b/bigmler/analyze/k_fold_cv.py
@@ -193,7 +193,7 @@ def create_kfold_cv(args, api, command_obj, resume=False):
         args.output_dir = os.path.normpath(
             os.path.join(u.check_dir(datasets_file),
                          KFOLD_SUBDIR))
-        message = ('Creating the kfold evaluations.........\n')
+        message = 'Creating the kfold evaluations.........\n'
         u.log_message(message, log_file=session_file,
                       console=args.verbosity)
         args.objective_field = objective_name
@@ -213,7 +213,7 @@ def create_features_analysis(args, api, command_obj, resume=False):
     datasets_file, objective_name, resume = create_kfold_datasets_file(
         args, api, command_obj, resume=resume)
     args.objective_field = objective_name
-    message = ('Creating the best features set..........\n')
+    message = 'Creating the best features set..........\n'
     u.log_message(message, log_file=session_file,
                   console=args.verbosity)
     model_fields = best_first_search(
@@ -256,7 +256,7 @@ def create_nodes_analysis(args, api, command_obj, resume=False):
     datasets_file, objective_name, resume = create_kfold_datasets_file(
         args, api, command_obj, resume=resume)
     args.objective_field = objective_name
-    message = ('Creating the node threshold set..........\n')
+    message = 'Creating the node threshold set..........\n'
     u.log_message(message, log_file=session_file,
                   console=args.verbosity)
     node_threshold = best_node_threshold(
@@ -285,7 +285,7 @@ def create_kfold_datasets_file(args, api, command_obj, resume=False):
        one per line
 
     """
-    message = ('Creating the kfold datasets............\n')
+    message = 'Creating the kfold datasets............\n'
     u.log_message(message, log_file=session_file, console=args.verbosity)
     if args.output_dir is None:
         args.output_dir = a.OUTPUT_DIR
@@ -899,7 +899,7 @@ def create_candidates_analysis(args, api, command_obj, resume=False):
     args.objective_field = objective_name
     if args.number_of_models == 1:
         args.number_of_models = DEFAULT_NUMBER_OF_MODELS
-    message = ('Creating the random candidates set..........\n')
+    message = 'Creating the random candidates set..........\n'
     u.log_message(message, log_file=session_file,
                   console=args.verbosity)
     random_candidates = best_candidates_number(

diff --git a/bigmler/anomaly_score.py b/bigmler/anomaly_score.py
@@ -182,6 +182,7 @@ def remote_anomaly_score(anomaly, test_dataset, batch_anomaly_score_args, args,
         batch_anomaly_score = create_batch_anomaly_score(
             anomaly_id, test_dataset, batch_anomaly_score_args,
             args, api, session_file=session_file, path=path, log=log)
+    #pylint: disable=locally-disabled,possibly-used-before-assignment
     if not args.no_csv:
         file_name = api.download_batch_anomaly_score(batch_anomaly_score,
                                                      prediction_file)

diff --git a/bigmler/centroid.py b/bigmler/centroid.py
@@ -177,6 +177,7 @@ def remote_centroid(cluster, test_dataset, batch_centroid_args, args,
         batch_centroid = create_batch_centroid(
             cluster_id, test_dataset, batch_centroid_args,
             args, api, session_file=session_file, path=path, log=log)
+    #pylint: disable=locally-disabled,possibly-used-before-assignment
     if not args.no_csv:
         file_name = api.download_batch_centroid(batch_centroid,
                                                 prediction_file)

diff --git a/bigmler/command.py b/bigmler/command.py
@@ -76,7 +76,8 @@ def tail(file_handler, window=1):
 def get_log_reversed(file_name, stack_level):
     """Reads the line of a log file that has the chosen stack_level """
     with open(file_name, "r") as handler:
-        lines_list = tail(handler, window=(stack_level + 1))
+        window = stack_level + 1
+        lines_list = tail(handler, window=window)
         return lines_list[0].decode(u.BIGML_SYS_ENCODING)
     return ""
 

diff --git a/bigmler/dispatcher.py b/bigmler/dispatcher.py
@@ -462,6 +462,7 @@ def compute_output(api, args):
 
     # We get the fields of the model if we haven't got
     # them yet and need them
+    #pylint: disable=locally-disabled,possibly-used-before-assignment
     if model and not args.evaluate and (a.has_test(args) or
                                         args.export_fields):
         # if we are using boosted ensembles to predict, activate boosting

diff --git a/bigmler/dnprediction.py b/bigmler/dnprediction.py
@@ -106,6 +106,7 @@ def remote_dn_prediction(deepnet, test_dataset,
         batch_prediction = create_batch_prediction(
             deepnet_id, test_dataset, batch_prediction_args,
             args, api, session_file=session_file, path=path, log=log)
+    #pylint: disable=locally-disabled,possibly-used-before-assignment
     if not args.no_csv:
         file_name = api.download_batch_prediction(batch_prediction,
                                                   prediction_file)

diff --git a/bigmler/evaluation.py b/bigmler/evaluation.py
@@ -146,7 +146,7 @@ def standard_deviation(points, mean):
     """
     total = float(len(points))
     if total > 0:
-        return math.sqrt(sum([(point - mean) ** 2 for point in points]) /
+        return math.sqrt(sum((point - mean) ** 2 for point in points) /
                          total)
     return float('nan')
 

diff --git a/bigmler/folderreader.py b/bigmler/folderreader.py
@@ -39,5 +39,4 @@ def open_reader(self):
         """
         self.reader.extend([[filename] for filename in
             os.listdir(self.folder) if self.filter_fn(filename)])
-        for filename in self.reader:
-            yield filename
+        yield from self.reader
diff --git a/bigmler/forecast.py b/bigmler/forecast.py
@@ -96,6 +96,7 @@ def remote_forecast(time_series,
         local_time_series = TimeSeries(time_series,
                                        api=args.retrieve_api_)
         output = args.predictions
+        input_data = {}
         if args.test_set is not None:
             input_data = u.read_json(args.test_set)
         elif args.horizon is not None:

diff --git a/bigmler/fusion/dispatcher.py b/bigmler/fusion/dispatcher.py
@@ -94,6 +94,7 @@ def compute_output(api, args):
     args.max_parallel_fusions = 1
     # fusion cannot be published yet.
     args.public_fusion = False
+    fields = {}
 
     # It is compulsory to have a description to publish either datasets or
     # fusions

diff --git a/bigmler/logrprediction.py b/bigmler/logrprediction.py
@@ -136,6 +136,7 @@ def remote_prediction(model, test_dataset,
         batch_prediction = create_batch_prediction(
             model_id, test_dataset, batch_prediction_args,
             args, api, session_file=session_file, path=path, log=log)
+    #pylint: disable=locally-disabled,possibly-used-before-assignment
     if not args.no_csv:
         file_name = api.download_batch_prediction(batch_prediction,
                                                   prediction_file)

diff --git a/bigmler/lrprediction.py b/bigmler/lrprediction.py
@@ -133,6 +133,7 @@ def remote_lr_prediction(linear_regression, test_dataset,
         batch_prediction = create_batch_prediction(
             linear_regression_id, test_dataset, batch_prediction_args,
             args, api, session_file=session_file, path=path, log=log)
+    #pylint: disable=locally-disabled,possibly-used-before-assignment
     if not args.no_csv:
         file_name = api.download_batch_prediction(batch_prediction,
                                                   prediction_file)

diff --git a/bigmler/options/source.py b/bigmler/options/source.py
@@ -334,12 +334,16 @@ def get_source_options(defaults=None):
             'action': 'store',
             'dest': 'annotations_language',
             'default': defaults.get('annotations_language', None),
-            'choices': ["VOC", "YOLO"],
+            'choices': ["VOC", "YOLO", "COCO"],
             'help': ("Language used to provide the annotations for images."
                      "Annotations are expected to be provided using "
-                     "on file per image. The --train option should point"
+                     "one file per image. The --train option should point"
                      " to the directory that contains both images and"
-                     " the corresponding annotations.")},
+                     " the corresponding annotations, unless some "
+                     " folder attribute is provided in each"
+                     " annotation. In that case it should point to"
+                     " the folder's' parent directory and --anotations-dir"
+                     " should be used to point to the annotations files.")},
 
         # Annotations file
         # File that contains annotations for images
@@ -356,7 +360,15 @@ def get_source_options(defaults=None):
             'action': 'store',
             'dest': 'annotations_dir',
             'default': defaults.get('annotations_dir', None),
-            'help': "Directory for individual annotation files."},
+            'help': ("Directory for individual annotation files."
+                     " Used when annotations are provided using "
+                     "one file per image. The --train option should point"
+                     " to the directory that contains both images and"
+                     " the corresponding annotations, unless some "
+                     " folder attribute is provided in each"
+                     " annotation. In that case it should point to"
+                     " the folder's' parent directory and --anotations-dir"
+                     " should be used to point to the annotations files.")},
 
         # Images file
         # Compressed file with images used as reference for annotations

diff --git a/bigmler/prediction.py b/bigmler/prediction.py
@@ -175,7 +175,7 @@ def prediction_to_row(prediction, prediction_info=NORMAL_FORMAT):
             elif 'categories' in summary:
                 distribution = summary['categories']
         if distribution:
-            row.extend([repr(distribution), sum([x[1] for x in distribution])])
+            row.extend([repr(distribution), sum(x[1] for x in distribution)])
     return row
 
 
@@ -700,6 +700,7 @@ def remote_predict(model, test_dataset, batch_prediction_args, args,
 
     Predictions are computed remotely using the batch predictions call.
     """
+    #pylint: disable=locally-disabled,used-before-assignment
     if args.ensemble is not None and not args.dataset_off:
         model_or_ensemble = args.ensemble
     elif args.dataset_off:
@@ -716,6 +717,7 @@ def remote_predict(model, test_dataset, batch_prediction_args, args,
         resume, batch_prediction = c.checkpoint(
             c.is_batch_prediction_created, path, debug=args.debug,
             message=message, log_file=session_file, console=args.verbosity)
+    #pylint: disable=locally-disabled,possibly-used-before-assignment
     if not resume:
         if not args.dataset_off:
             batch_prediction = create_batch_prediction(