From 3b85b2b07e2f329b2514864302748546c5592b2e Mon Sep 17 00:00:00 2001
From: Sefik Ilkin Serengil <serengil@gmail.com>
Date: Sat, 23 Dec 2023 11:01:23 +0000
Subject: [PATCH 1/3] improvements

---
 .github/workflows/pythonpublish.yml |   26 -
 .github/workflows/tests.yml         |   67 ++
 .gitignore                          |    2 +-
 .pylintrc                           |  640 ++++++++++++++
 .vscode/settings.json               |   20 +
 Makefile                            |    5 +
 README.md                           |    5 +-
 chefboost/Chefboost.py              | 1015 ++++++++++-----------
 chefboost/commons/evaluate.py       |  250 +++---
 chefboost/commons/functions.py      |  251 +++---
 chefboost/commons/logger.py         |   40 +
 chefboost/training/Preprocess.py    |  296 ++++---
 chefboost/training/Training.py      | 1261 ++++++++++++++-------------
 chefboost/tuning/adaboost.py        |  277 +++---
 chefboost/tuning/gbm.py             |  627 ++++++-------
 chefboost/tuning/randomforest.py    |  199 +++--
 requirements.txt                    |    8 +-
 scripts/push-release.sh             |   11 +
 setup.py                            |    5 +-
 tests/global-unit-test.py           |  438 +++++-----
 20 files changed, 3259 insertions(+), 2184 deletions(-)
 delete mode 100644 .github/workflows/pythonpublish.yml
 create mode 100644 .github/workflows/tests.yml
 create mode 100644 .pylintrc
 create mode 100644 .vscode/settings.json
 create mode 100644 Makefile
 create mode 100644 chefboost/commons/logger.py
 create mode 100644 scripts/push-release.sh

diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml
deleted file mode 100644
index 21f2f01..0000000
--- a/.github/workflows/pythonpublish.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-name: Upload Python Package
-
-on:
-  release:
-    types: [created]
-
-jobs:
-  deploy:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v1
-    - name: Set up Python
-      uses: actions/setup-python@v1
-      with:
-        python-version: '3.x'
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install setuptools wheel twine
-    - name: Build and publish
-      env:
-        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
-      run: |
-        python setup.py sdist bdist_wheel
-        twine upload dist/*
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..65d670b
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,67 @@
+name: Tests and Linting
+
+on: 
+  push:
+    paths:
+      - '.github/workflows/tests.yml'
+      - 'chefboost/**'  
+      - 'tests/**'
+      - 'requirements.txt'
+      - '.gitignore'
+      - 'setup.py'
+  pull_request:
+    paths:
+      - '.github/workflows/tests.yml'
+      - 'chefboost/**'  
+      - 'tests/**'
+      - 'requirements.txt'
+      - '.gitignore'
+      - 'setup.py'
+
+jobs:
+  unit-tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.8]
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pytest
+        pip install .
+        
+    - name: Test with pytest
+      run: |
+        cd tests
+        python global-unit-test.py
+  linting:
+    needs: unit-tests
+    
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.8]
+  
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pylint
+        pip install black
+        pip install .
+        
+    - name: Lint with pylint
+      run: |
+        python -m pylint chefboost/ --fail-under=10
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 34eeb1a..feaa71a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,6 @@ dist/
 Pipfile
 Pipfile.lock
 .mypy_cache/
-.vscode/
 .idea/
 chefboost.egg-info/
 tests/outputs/
@@ -19,3 +18,4 @@ chefboost/tuning/__pycache__/*
 .DS_Store
 chefboost/.DS_Store
 tests/.DS_Store
+.pytest_cache
\ No newline at end of file
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..694ae4c
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,640 @@
+[MAIN]
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Load and enable all available extensions. Use --list-extensions to see a list
+# all available extensions.
+#enable-all-extensions=
+
+# In error mode, messages with a category besides ERROR or FATAL are
+# suppressed, and no reports are done by default. Error mode is compatible with
+# disabling specific errors.
+#errors-only=
+
+# Always return a 0 (non-error) status code, even if lint errors are found.
+# This is primarily useful in continuous integration scripts.
+#exit-zero=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
+# for backward compatibility.)
+extension-pkg-whitelist=
+
+# Return non-zero exit code if any of these messages/categories are detected,
+# even if score is above --fail-under value. Syntax same as enable. Messages
+# specified are enabled, while categories only check already-enabled messages.
+fail-on=
+
+# Specify a score threshold under which the program will exit with error.
+fail-under=10
+
+# Interpret the stdin as a python script, whose filename needs to be passed as
+# the module_or_package argument.
+#from-stdin=
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=CVS
+
+# Add files or directories matching the regular expressions patterns to the
+# ignore-list. The regex matches against paths and can be in Posix or Windows
+# format. Because '\' represents the directory delimiter on Windows systems, it
+# can't be used as an escape character.
+ignore-paths=
+
+# Files or directories matching the regular expression patterns are skipped.
+# The regex matches against base names, not paths. The default value ignores
+# Emacs file locks
+ignore-patterns=^\.#
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use, and will cap the count on Windows to
+# avoid hangs.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.9
+
+# Discover python modules and packages in the file system subtree.
+recursive=no
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# In verbose mode, extra non-checker-related info will be displayed.
+#verbose=
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style. If left empty, argument names will be checked with the set
+# naming style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style. If left empty, attribute names will be checked with the set naming
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+          bar,
+          baz,
+          toto,
+          tutu,
+          tata
+
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style. If left empty, class attribute names will be checked
+# with the set naming style.
+#class-attribute-rgx=
+
+# Naming style matching correct class constant names.
+class-const-naming-style=UPPER_CASE
+
+# Regular expression matching correct class constant names. Overrides class-
+# const-naming-style. If left empty, class constant names will be checked with
+# the set naming style.
+#class-const-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style. If left empty, class names will be checked with the set naming style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style. If left empty, constant names will be checked with the set naming
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style. If left empty, function names will be checked with the set
+# naming style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _
+
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style. If left empty, inline iteration names will be checked
+# with the set naming style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style. If left empty, method names will be checked with the set naming style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style. If left empty, module names will be checked with the set naming style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Regular expression matching correct type variable names. If left empty, type
+# variable names will be checked with the set naming style.
+#typevar-rgx=
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style. If left empty, variable names will be checked with the set
+# naming style.
+#variable-rgx=
+
+
+[CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp,
+                      __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=cls
+
+
+[DESIGN]
+
+# List of regular expressions of class ancestor names to ignore when counting
+# public methods (see R0903)
+exclude-too-few-public-methods=
+
+# List of qualified class names to ignore when counting class parents (see
+# R0901)
+ignored-parents=
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when caught.
+overgeneral-exceptions=BaseException,
+                       Exception
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=100
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=
+
+# Output a graph (.gv or any supported image format) of external dependencies
+# to the given file (report RP0402 must not be disabled).
+ext-import-graph=
+
+# Output a graph (.gv or any supported image format) of all (i.e. internal and
+# external) dependencies to the given file (report RP0402 must not be
+# disabled).
+import-graph=
+
+# Output a graph (.gv or any supported image format) of internal dependencies
+# to the given file (report RP0402 must not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
+# UNDEFINED.
+confidence=HIGH,
+           CONTROL_FLOW,
+           INFERENCE,
+           INFERENCE_FAILURE,
+           UNDEFINED
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then re-enable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        use-symbolic-message-instead,
+        import-error,
+        invalid-name,
+        missing-module-docstring,
+        missing-function-docstring,
+        missing-class-docstring,
+        too-many-arguments, 
+        too-many-locals, 
+        too-many-branches, 
+        too-many-statements,
+        global-variable-undefined,
+        import-outside-toplevel,
+        singleton-comparison,
+        too-many-lines,
+        duplicate-code,
+        bare-except,
+        cyclic-import,
+        global-statement,
+        no-member,
+        no-name-in-module,
+        unrecognized-option,
+        consider-using-dict-items,
+        consider-iterating-dictionary,
+        unexpected-keyword-arg
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+
+
+[METHOD_ARGS]
+
+# List of qualified names (i.e., library.method) which require a timeout
+# parameter e.g. 'requests.api.get,requests.api.post'
+timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX
+
+# Regular expression of note tags to take in consideration.
+notes-rgx=
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit,argparse.parse_error
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'fatal', 'error', 'warning', 'refactor',
+# 'convention', and 'info' which contain the number of messages in each
+# category, as well as 'statement' which is the total number of statements
+# analyzed. This score is used by the global evaluation report (RP0004).
+evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+#output-format=
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[SIMILARITIES]
+
+# Comments are removed from the similarity computation
+ignore-comments=yes
+
+# Docstrings are removed from the similarity computation
+ignore-docstrings=yes
+
+# Imports are removed from the similarity computation
+ignore-imports=yes
+
+# Signatures are removed from the similarity computation
+ignore-signatures=yes
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. Available dictionaries: none. To make it work,
+# install the 'python-enchant' package.
+spelling-dict=
+
+# List of comma separated words that should be considered directives if they
+# appear at the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of symbolic message names to ignore for Mixin members.
+ignored-checks-for-mixins=no-member,
+                          not-async-context-manager,
+                          not-context-manager,
+                          attribute-defined-outside-init
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# Regex pattern to define which classes are considered mixins.
+mixin-class-rgx=.*[Mm]ixin
+
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of names allowed to shadow builtins
+allowed-redefined-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..38fd700
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,20 @@
+{
+    "python.linting.pylintEnabled": true,
+    "python.linting.enabled": true,
+    "python.linting.pylintUseMinimalCheckers": false,
+    "editor.formatOnSave": true,
+    "editor.renderWhitespace": "all",
+    "files.autoSave": "afterDelay",
+    "python.analysis.typeCheckingMode": "basic",
+    "python.formatting.provider": "black",
+    "python.formatting.blackArgs": [
+        "--line-length=100"
+    ],
+    "editor.fontWeight": "normal",
+    "python.analysis.extraPaths": [
+        "./chefboost"
+    ],
+    "black-formatter.args": [
+        "--line-length=100"
+    ]
+}
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..ab7f41a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,5 @@
+test:
+	cd tests && python global-unit-test.py
+
+lint:
+	python -m pylint chefboost/ --fail-under=10
\ No newline at end of file
diff --git a/README.md b/README.md
index 0296244..8a8a569 100644
--- a/README.md
+++ b/README.md
@@ -187,9 +187,10 @@ config = {'algorithm': 'C4.5', 'enableParallelism': False}
 model = chef.fit(df, config)
 ```
 
-### Contributing
+## Contribution [![Tests](https://github.com/serengil/chefboost/actions/workflows/tests.yml/badge.svg)](https://github.com/serengil/chefboost/actions/workflows/tests.yml)
+
+Pull requests are more than welcome! You should run the unit tests and linting locally by running `make test` and `make lint` commands before creating a PR. Once a PR created, GitHub test workflow will be run automatically and unit test results will be available in [GitHub actions](https://github.com/serengil/chefboosts/actions) before approval.
 
-Pull requests are welcome. You should run the unit tests locally by running [`test/global-unit-test.py`](https://github.com/serengil/chefboost/blob/master/tests/global-unit-test.py). Please share the unit test result logs in the PR.
 
 ### Support
 
diff --git a/chefboost/Chefboost.py b/chefboost/Chefboost.py
index f7798f7..f45c6ac 100644
--- a/chefboost/Chefboost.py
+++ b/chefboost/Chefboost.py
@@ -1,556 +1,601 @@
-import pandas as pd
-import math
-import numpy as np
 import time
-import imp
 import pickle
 import os
-from os import path
 import json
+from typing import Optional, Dict, Any, Union
 
-from chefboost.commons import functions, evaluate as eval
-from chefboost.training import Preprocess, Training
-from chefboost.tuning import gbm, adaboost, randomforest
-
-#------------------------
-
-def fit(df, config = {}, target_label = 'Decision', validation_df = None):
-
-	"""
-	Parameters:
-		df (pandas data frame): Training data frame. The target column must be named as 'Decision' and it has to be in the last column
-
-		config (dictionary):
-
-			config = {
-				'algorithm' (string): ID3, 'C4.5, CART, CHAID or Regression
-				'enableParallelism' (boolean): False
-
-				'enableGBM' (boolean): True,
-				'epochs' (int): 7,
-				'learning_rate' (int): 1,
-
-				'enableRandomForest' (boolean): True,
-				'num_of_trees' (int): 5,
-
-				'enableAdaboost' (boolean): True,
-				'num_of_weak_classifier' (int): 4
-			}
-
-		validation_df (pandas data frame): if nothing is passed to validation data frame, then the function validates built trees for training data frame
-
-	Returns:
-		chefboost model
-
-	"""
-
-	#------------------------
-
-	process_id = os.getpid()
-
-	#------------------------
-	#rename target column name
-	if target_label != 'Decision':
-		df = df.rename(columns = {target_label: 'Decision'})
-
-	#if target is not the last column
-	if df.columns[-1] != 'Decision':
-		if 'Decision' in df.columns:
-			new_column_order = df.columns.drop('Decision').tolist() + ['Decision']
-			#print(new_column_order)
-			df = df[new_column_order]
-		else:
-			raise ValueError('Please set the target_label')
-
-	#------------------------
-
-	base_df = df.copy()
-
-	#------------------------
-
-	target_label = df.columns[len(df.columns)-1]
-	if target_label != 'Decision':
-		print("Expected: Decision, Existing: ",target_label)
-		raise ValueError('Please confirm that name of the target column is "Decision" and it is put to the right in pandas data frame')
-
-	#------------------------
-	#handle NaN values
-
-	nan_values = []
-
-	for column in df.columns:
-		if df[column].dtypes != 'object':
-			min_value = df[column].min()
-			idx = df[df[column].isna()].index
-
-			nan_value = []
-			nan_value.append(column)
-
-			if idx.shape[0] > 0:
-				df.loc[idx, column] = min_value - 1
-				nan_value.append(min_value - 1)
-				min_value - 1
-				#print("NaN values are replaced to ", min_value - 1, " in column ", column)
-			else:
-				nan_value.append(None)
-
-			nan_values.append(nan_value)
-
-	#------------------------
-
-	#initialize params and folders
-	config = functions.initializeParams(config)
-	functions.initializeFolders()
-
-	#------------------------
-
-	algorithm = config['algorithm']
-
-	valid_algorithms = ['ID3', 'C4.5', 'CART', 'CHAID', 'Regression']
-
-	if algorithm not in valid_algorithms:
-		raise ValueError('Invalid algorithm passed. You passed ', algorithm," but valid algorithms are ",valid_algorithms)
-
-	#------------------------
-
-	enableRandomForest = config['enableRandomForest']
-	num_of_trees = config['num_of_trees']
-	enableMultitasking = config['enableMultitasking'] #no longer used. check to remove this variable.
-
-	enableGBM = config['enableGBM']
-	epochs = config['epochs']
-	learning_rate = config['learning_rate']
-
-	enableAdaboost = config['enableAdaboost']
-	enableParallelism = config['enableParallelism']
-
-	#------------------------
-
-	if enableParallelism == True:
-		print("[INFO]: ",config["num_cores"],"CPU cores will be allocated in parallel running")
-
-		from multiprocessing import set_start_method, freeze_support
-		set_start_method("spawn", force=True)
-		freeze_support()
-	#------------------------
-	raw_df = df.copy()
-	num_of_rows = df.shape[0]; num_of_columns = df.shape[1]
-
-	if algorithm == 'Regression':
-		if df['Decision'].dtypes == 'object':
-			raise ValueError('Regression trees cannot be applied for nominal target values! You can either change the algorithm or data set.')
-
-	if df['Decision'].dtypes != 'object': #this must be regression tree even if it is not mentioned in algorithm
-
-		if algorithm != 'Regression':
-			print("WARNING: You set the algorithm to ", algorithm," but the Decision column of your data set has non-object type.")
-			print("That's why, the algorithm is set to Regression to handle the data set.")
-
-		algorithm = 'Regression'
-		config['algorithm'] = 'Regression'
-		global_stdev = df['Decision'].std(ddof=0)
-
-	if enableGBM == True:
-		print("Gradient Boosting Machines...")
-		algorithm = 'Regression'
-		config['algorithm'] = 'Regression'
-
-	if enableAdaboost == True:
-		#enableParallelism = False
-		for j in range(0, num_of_columns):
-			column_name = df.columns[j]
-			if df[column_name].dtypes  == 'object':
-				raise ValueError('Adaboost must be run on numeric data set for both features and target')
-
-	#-------------------------
-
-	print(algorithm," tree is going to be built...")
-
-	dataset_features = dict() #initialize a dictionary. this is going to be used to check features numeric or nominal. numeric features should be transformed to nominal values based on scales.
-
-	header = "def findDecision(obj): #"
-
-	num_of_columns = df.shape[1]-1
-	for i in range(0, num_of_columns):
-		column_name = df.columns[i]
-		dataset_features[column_name] = df[column_name].dtypes
-		header = header + "obj[" + str(i) +"]: "+column_name
-		if i != num_of_columns - 1:
-			header = header + ", "
-
-	header = header + "\n"
-
-	#------------------------
-
-	begin = time.time()
-
-	trees = []; alphas = []
-
-	if enableAdaboost == True:
-		trees, alphas = adaboost.apply(df, config, header, dataset_features, validation_df = validation_df, process_id = process_id)
-
-	elif enableGBM == True:
-
-		if df['Decision'].dtypes == 'object': #transform classification problem to regression
-			trees, alphas = gbm.classifier(df, config, header, dataset_features, validation_df = validation_df, process_id = process_id)
-			classification = True
-
-		else: #regression
-			trees = gbm.regressor(df, config, header, dataset_features, validation_df = validation_df, process_id = process_id)
-			classification = False
-
-	elif enableRandomForest == True:
-		trees = randomforest.apply(df, config, header, dataset_features, validation_df = validation_df, process_id = process_id)
-	else: #regular decision tree building
-
-		root = 1; file = "outputs/rules/rules.py"
-		functions.createFile(file, header)
-
-		if enableParallelism == True:
-			json_file = "outputs/rules/rules.json"
-			functions.createFile(json_file, "[\n")
-
-		trees = Training.buildDecisionTree(df, root = root, file = file, config = config
-				, dataset_features = dataset_features
-				, parent_level = 0, leaf_id = 0, parents = 'root', validation_df = validation_df, main_process_id = process_id)
-
-	print("-------------------------")
-	print("finished in ",time.time() - begin," seconds")
-
-	obj = {
-		"trees": trees,
-		"alphas": alphas,
-		"config": config,
-		"nan_values": nan_values
-	}
-
-	#-----------------------------------------
-
-	#train set accuracy
-	df = base_df.copy()
-	evaluate(obj, df, task = 'train')
-
-	#validation set accuracy
-	if isinstance(validation_df, pd.DataFrame):
-		evaluate(obj, validation_df, task = 'validation')
-
-	#-----------------------------------------
-
-	return obj
-
-	#-----------------------------------------
-
-def predict(model, param):
-
-	"""
-	Parameters:
-		model (built chefboost model): you should pass model argument to the return of fit function
-		param (list): pass input features as python list
-
-		e.g. chef.predict(model, param = ['Sunny', 'Hot', 'High', 'Weak'])
-	Returns:
-		prediction
-	"""
-
-	trees = model["trees"]
-	config = model["config"]
-
-	alphas = []
-	if "alphas" in model:
-		alphas = model["alphas"]
-
-	nan_values = []
-	if "nan_values" in model:
-		nan_values = model["nan_values"]
-
-	#-----------------------
-	#handle missing values
-
-	column_index = 0
-	for column in nan_values:
-		column_name = column[0]
-		missing_value = column[1]
-
-		if pd.isna(missing_value) != True:
-			#print("missing values will be replaced with ",missing_value," in ",column_name," column")
-
-			if pd.isna(param[column_index]):
-				param[column_index] = missing_value
-
-		column_index = column_index + 1
-
-	#print("instance: ", param)
-	#-----------------------
-
-	enableGBM = config['enableGBM']
-	adaboost = config['enableAdaboost']
-	enableRandomForest = config['enableRandomForest']
-
-	#-----------------------
-
-	classification = False
-	prediction = 0
-	prediction_classes = []
-
-	#-----------------------
-
-	if enableGBM == True:
-
-		if len(trees) == config['epochs']:
-			classification = False
-		else:
-			classification = True
-			prediction_classes = [0 for i in alphas]
-
-	#-----------------------
-
-	if len(trees) > 1: #bagging or boosting
-		index = 0
-		for tree in trees:
-			if adaboost != True:
-
-				custom_prediction = tree.findDecision(param)
+import numpy as np
+import pandas as pd
 
-				if custom_prediction != None:
-					if type(custom_prediction) != str: #regression
+from chefboost.commons import functions, evaluate as cb_eval
+from chefboost.training import Training
+from chefboost.tuning import gbm, adaboost as adaboost_clf, randomforest
+from chefboost.commons.logger import Logger
 
-						if enableGBM == True and classification == True:
-							prediction_classes[index % len(alphas)] += custom_prediction
-						else:
-							prediction += custom_prediction
-					else:
-						classification = True
-						prediction_classes.append(custom_prediction)
-			else: #adaboost
-				prediction += alphas[index] * tree.findDecision(param)
-			index = index + 1
+# pylint: disable=too-many-nested-blocks, no-else-return, inconsistent-return-statements
 
-		if enableRandomForest == True:
-			#notice that gbm requires cumilative sum but random forest requires mean of each tree
-			prediction = prediction / len(trees)
+logger = Logger(module="chefboost/Chefboost.py")
 
-		if adaboost == True:
-			prediction = functions.sign(prediction)
-	else: #regular decision tree
-		tree = trees[0]
-		prediction = tree.findDecision(param)
+# ------------------------
 
-	if classification == False:
-		return prediction
-	else:
-		if enableGBM == True and classification == True:
-			return alphas[np.argmax(prediction_classes)]
-		else: #classification
-			#e.g. random forest
-			#get predictions made by different trees
-			predictions = np.array(prediction_classes)
 
-			#find the most frequent prediction
-			(values, counts) = np.unique(predictions, return_counts=True)
-			idx = np.argmax(counts)
-			prediction = values[idx]
+def fit(
+    df: pd.DataFrame,
+    config: Optional[dict] = None,
+    target_label: str = "Decision",
+    validation_df: Optional[pd.DataFrame] = None,
+) -> Dict[str, Any]:
+    """
+    Build (a) decision tree model(s)
 
-			return prediction
+    Args:
+            df (pandas data frame): Training data frame.
 
-def save_model(base_model, file_name="model.pkl"):
+            config (dictionary): training configuration. e.g.
 
-	"""
-	Parameters:
-		base_model (built chefboost model): you should pass this to the return of fit function
-		file_name (string): you should pass target file name as exact path.
-	"""
+                    config = {
+                            'algorithm' (string): ID3, 'C4.5, CART, CHAID or Regression
+                            'enableParallelism' (boolean): False
 
-	model = base_model.copy()
+                            'enableGBM' (boolean): True,
+                            'epochs' (int): 7,
+                            'learning_rate' (int): 1,
 
-	#modules cannot be saved. Save its reference instead.
-	module_names = []
-	for tree in model["trees"]:
-		module_names.append(tree.__name__)
+                            'enableRandomForest' (boolean): True,
+                            'num_of_trees' (int): 5,
 
-	model["trees"] = module_names
+                            'enableAdaboost' (boolean): True,
+                            'num_of_weak_classifier' (int): 4
+                    }
 
-	f = open("outputs/rules/"+file_name, "wb")
-	pickle.dump(model,f)
-	f.close()
+            target_label (str): target label for supervised learning.
+                Default is Decision at the end of dataframe.
 
-def load_model(file_name="model.pkl"):
+            validation_df (pandas data frame): validation data frame
+                if nothing is passed to validation data frame, then the function validates
+                built trees for training data frame
 
-	"""
-	Parameters:
-		file_name (string): exact path of the target saved model
-	Returns:
-		built chefboost model
-	"""
+    Returns:
+            chefboost model
+    """
 
-	f = open('outputs/rules/'+file_name, 'rb')
-	model = pickle.load(f)
+    # ------------------------
 
-	#restore modules from its references
-	modules = []
-	for model_name in model["trees"]:
-		module = functions.restoreTree(model_name)
-		modules.append(module)
+    process_id = os.getpid()
 
-	model["trees"] = modules
+    # ------------------------
+    # rename target column name
+    if target_label != "Decision":
+        # TODO: what if another column name is Decision?
+        df = df.rename(columns={target_label: "Decision"})
 
-	return model
+    # if target is not the last column
+    if df.columns[-1] != "Decision":
+        if "Decision" in df.columns:
+            new_column_order = df.columns.drop("Decision").tolist() + ["Decision"]
+            logger.debug(new_column_order)
+            df = df[new_column_order]
+        else:
+            raise ValueError("Please set the target_label")
 
-def restoreTree(moduleName):
+    # ------------------------
 
-	"""
-	If you have decision rules, then this function enables you to load a built chefboost model. You can then call prediction.
-	Parameters:
-		moduleName (string): you should pass outputs/rules/rules if you want to restore outputs/rules/rules.py
+    base_df = df.copy()
 
-	Returns:
-		built chefboost model
-	"""
+    # ------------------------
 
-	return functions.restoreTree(moduleName)
+    target_label = df.columns[len(df.columns) - 1]
 
-def feature_importance(rules):
+    # ------------------------
+    # handle NaN values
 
-	"""
-	Parameters:
-		rules (string or list):
+    nan_values = []
 
-		e.g. decision_rules = "outputs/rules/rules.py"
-		or this could be retrieved from built model as shown below.
+    for column in df.columns:
+        if df[column].dtypes != "object":
+            min_value = df[column].min()
+            idx = df[df[column].isna()].index
 
-			decision_rules = []
-			for tree in model["trees"]:
-			   rule = .__dict__["__spec__"].origin
-			   decision_rules.append(rule)
+            nan_value = []
+            nan_value.append(column)
 
-	Returns:
-		pandas data frame
-	"""
+            if idx.shape[0] > 0:
+                df.loc[idx, column] = min_value - 1
+                nan_value.append(min_value - 1)
+                logger.debug("NaN values are replaced to {min_value - 1} in column {column}")
+            else:
+                nan_value.append(None)
 
-	if type(rules) != list:
-		rules = [rules]
-	else:
-		print("rules: ",rules)
+            nan_values.append(nan_value)
 
-	#-----------------------------
+    # ------------------------
 
-	dfs = []
+    # initialize params and folders
+    config = functions.initializeParams(config)
+    functions.initializeFolders()
 
-	for rule in rules:
-		print("Decision rule: ",rule)
+    # ------------------------
 
-		file = open(rule, 'r')
-		lines = file.readlines()
+    algorithm = config["algorithm"]
 
-		pivot = {}
-		rules = []
+    valid_algorithms = ["ID3", "C4.5", "CART", "CHAID", "Regression"]
 
-		#initialize feature importances
-		line_idx = 0
-		for line in lines:
-			if line_idx == 0:
-				feature_explainer_list = line.split("#")[1].split(", ")
-				for feature_explainer in feature_explainer_list:
-					feature = feature_explainer.split(": ")[1].replace("\n", "")
-					pivot[feature] = 0
-			else:
-				if "# " in line:
-					rule = line.strip().split("# ")[1]
-					rules.append(json.loads(rule))
+    if algorithm not in valid_algorithms:
+        raise ValueError(
+            "Invalid algorithm passed. You passed ",
+            algorithm,
+            " but valid algorithms are ",
+            valid_algorithms,
+        )
 
-			line_idx = line_idx + 1
+    # ------------------------
 
-		feature_names = list(pivot.keys())
+    enableRandomForest = config["enableRandomForest"]
+    enableGBM = config["enableGBM"]
+    enableAdaboost = config["enableAdaboost"]
+    enableParallelism = config["enableParallelism"]
 
-		for feature in feature_names:
-			for rule in rules:
-				if rule["feature"] == feature:
+    # ------------------------
 
+    if enableParallelism == True:
+        num_cores = config["num_cores"]
+        logger.info(f"[INFO]: {num_cores} CPU cores will be allocated in parallel running")
 
-					score = rule["metric_value"] * rule["instances"]
-					current_depth = rule["depth"]
+        from multiprocessing import set_start_method, freeze_support
+
+        set_start_method("spawn", force=True)
+        freeze_support()
+    # ------------------------
+    num_of_columns = df.shape[1]
+
+    if algorithm == "Regression":
+        if df["Decision"].dtypes == "object":
+            raise ValueError(
+                "Regression trees cannot be applied for nominal target values!"
+                "You can either change the algorithm or data set."
+            )
+
+    if (
+        df["Decision"].dtypes != "object"
+    ):  # this must be regression tree even if it is not mentioned in algorithm
+        if algorithm != "Regression":
+            logger.warn(
+                f"You set the algorithm to {algorithm} but the Decision column of your"
+                " data set has non-object type."
+                "That's why, the algorithm is set to Regression to handle the data set."
+            )
+
+        algorithm = "Regression"
+        config["algorithm"] = "Regression"
+
+    if enableGBM == True:
+        logger.info("Gradient Boosting Machines...")
+        algorithm = "Regression"
+        config["algorithm"] = "Regression"
+
+    if enableAdaboost == True:
+        # enableParallelism = False
+        for j in range(0, num_of_columns):
+            column_name = df.columns[j]
+            if df[column_name].dtypes == "object":
+                raise ValueError(
+                    "Adaboost must be run on numeric data set for both features and target"
+                )
+
+    # -------------------------
+
+    logger.info(f"{algorithm} tree is going to be built...")
+
+    # initialize a dictionary. this is going to be used to check features numeric or nominal.
+    # numeric features should be transformed to nominal values based on scales.
+    dataset_features = {}
+
+    header = "def findDecision(obj): #"
+
+    num_of_columns = df.shape[1] - 1
+    for i in range(0, num_of_columns):
+        column_name = df.columns[i]
+        dataset_features[column_name] = df[column_name].dtypes
+        header += f"obj[{str(i)}]: {column_name}"
+
+        if i != num_of_columns - 1:
+            header = header + ", "
+
+    header = header + "\n"
+
+    # ------------------------
+
+    begin = time.time()
+
+    trees = []
+    alphas = []
+
+    if enableAdaboost == True:
+        trees, alphas = adaboost_clf.apply(
+            df, config, header, dataset_features, validation_df=validation_df, process_id=process_id
+        )
+
+    elif enableGBM == True:
+        if df["Decision"].dtypes == "object":  # transform classification problem to regression
+            trees, alphas = gbm.classifier(
+                df,
+                config,
+                header,
+                dataset_features,
+                validation_df=validation_df,
+                process_id=process_id,
+            )
+            # classification = True
+
+        else:  # regression
+            trees = gbm.regressor(
+                df,
+                config,
+                header,
+                dataset_features,
+                validation_df=validation_df,
+                process_id=process_id,
+            )
+            # classification = False
+
+    elif enableRandomForest == True:
+        trees = randomforest.apply(
+            df, config, header, dataset_features, validation_df=validation_df, process_id=process_id
+        )
+    else:  # regular decision tree building
+        root = 1
+        file = "outputs/rules/rules.py"
+        functions.createFile(file, header)
+
+        if enableParallelism == True:
+            json_file = "outputs/rules/rules.json"
+            functions.createFile(json_file, "[\n")
+
+        trees = Training.buildDecisionTree(
+            df,
+            root=root,
+            file=file,
+            config=config,
+            dataset_features=dataset_features,
+            parent_level=0,
+            leaf_id=0,
+            parents="root",
+            validation_df=validation_df,
+            main_process_id=process_id,
+        )
+
+    logger.info("-------------------------")
+    logger.info(f"finished in {time.time() - begin} seconds")
+
+    obj = {"trees": trees, "alphas": alphas, "config": config, "nan_values": nan_values}
+
+    # -----------------------------------------
+
+    # train set accuracy
+    df = base_df.copy()
+    evaluate(obj, df, task="train")
+
+    # validation set accuracy
+    if isinstance(validation_df, pd.DataFrame):
+        evaluate(obj, validation_df, task="validation")
+
+    # -----------------------------------------
+
+    return obj
+
+    # -----------------------------------------
+
+
+def predict(model: dict, param: list) -> Union[str, int, float]:
+    """
+    Predict the target label of given features from a pre-trained model
+    Args:
+        model (built chefboost model): pre-trained model which is the output
+            of fit function
+        param (list): pass input features as python list
+            e.g. chef.predict(model, param = ['Sunny', 'Hot', 'High', 'Weak'])
+    Returns:
+            prediction
+    """
+
+    trees = model["trees"]
+    config = model["config"]
+
+    alphas = []
+    if "alphas" in model:
+        alphas = model["alphas"]
+
+    nan_values = []
+    if "nan_values" in model:
+        nan_values = model["nan_values"]
+
+    # -----------------------
+    # handle missing values
+
+    column_index = 0
+    for column in nan_values:
+        column_name = column[0]
+        missing_value = column[1]
+
+        if pd.isna(missing_value) != True:
+            logger.debug(
+                f"missing values will be replaced with {missing_value} in {column_name} column"
+            )
+
+            if pd.isna(param[column_index]):
+                param[column_index] = missing_value
+
+        column_index = column_index + 1
+
+    logger.debug(f"instance: {param}")
+    # -----------------------
+
+    enableGBM = config["enableGBM"]
+    adaboost = config["enableAdaboost"]
+    enableRandomForest = config["enableRandomForest"]
+
+    # -----------------------
+
+    classification = False
+    prediction = 0
+    prediction_classes = []
+
+    # -----------------------
+
+    if enableGBM == True:
+        if len(trees) == config["epochs"]:
+            classification = False
+        else:
+            classification = True
+            prediction_classes = [0 for i in alphas]
+
+    # -----------------------
+
+    if len(trees) > 1:  # bagging or boosting
+        index = 0
+        for tree in trees:
+            if adaboost != True:
+                custom_prediction = tree.findDecision(param)
+
+                if custom_prediction != None:
+                    if not isinstance(custom_prediction, str):  # regression
+                        if enableGBM == True and classification == True:
+                            prediction_classes[index % len(alphas)] += custom_prediction
+                        else:
+                            prediction += custom_prediction
+                    else:
+                        classification = True
+                        prediction_classes.append(custom_prediction)
+            else:  # adaboost
+                prediction += alphas[index] * tree.findDecision(param)
+            index = index + 1
+
+        if enableRandomForest == True:
+            # notice that gbm requires cumilative sum but random forest requires mean of each tree
+            prediction = prediction / len(trees)
+
+        if adaboost == True:
+            prediction = functions.sign(prediction)
+    else:  # regular decision tree
+        tree = trees[0]
+        prediction = tree.findDecision(param)
+
+    if classification == False:
+        return prediction
+    else:
+        if enableGBM == True and classification == True:
+            return alphas[np.argmax(prediction_classes)]
+        else:  # classification
+            # e.g. random forest
+            # get predictions made by different trees
+            predictions = np.array(prediction_classes)
+
+            # find the most frequent prediction
+            (values, counts) = np.unique(predictions, return_counts=True)
+            idx = np.argmax(counts)
+            prediction = values[idx]
+
+            return prediction
+
+
+def save_model(base_model: dict, file_name: str = "model.pkl") -> None:
+    """
+    Save pre-trained model on file system
+    Args:
+            base_model (dict): pre-trained model which is the output
+                of the fit function
+            file_name (string): target file name as exact path.
+    """
+
+    model = base_model.copy()
+
+    # modules cannot be saved. Save its reference instead.
+    module_names = []
+    for tree in model["trees"]:
+        module_names.append(tree.__name__)
+
+    model["trees"] = module_names
+
+    with open(f"outputs/rules/{file_name}", "wb") as f:
+        pickle.dump(model, f)
+
+
+def load_model(file_name: str = "model.pkl") -> dict:
+    """
+    Load the save pre-trained model from file system
+    Args:
+            file_name (str): exact path of the target saved model
+    Returns:
+            built model (dict)
+    """
+
+    with open("outputs/rules/" + file_name, "rb") as f:
+        model = pickle.load(f)
+
+    # restore modules from its references
+    modules = []
+    for model_name in model["trees"]:
+        module = functions.restoreTree(model_name)
+        modules.append(module)
+
+    model["trees"] = modules
+
+    return model
+
+
+def restoreTree(moduleName) -> dict:
+    """
+    Load built model from set of decision rules
+    Args:
+        moduleName (str): e.g. outputs/rules/rules to restore outputs/rules/rules.py
+    Returns:
+            built model (dict)
+    """
+
+    return functions.restoreTree(moduleName)
+
+
+def feature_importance(rules: Union[str, list]) -> pd.DataFrame:
+    """
+    Show the feature importance values of a built model
+    Args:
+            rules (str or list): e.g. decision_rules = "outputs/rules/rules.py"
+            or this could be retrieved from built model as shown below.
+
+                    decision_rules = []
+                    for tree in model["trees"]:
+                       rule = .__dict__["__spec__"].origin
+                       decision_rules.append(rule)
+    Returns:
+            feature importance (pd.DataFrame)
+    """
+
+    if not isinstance(rules, list):
+        rules = [rules]
+    logger.info(f"rules: {rules}")
+
+    # -----------------------------
+
+    dfs = []
+
+    for rule in rules:
+        logger.info("Decision rule: {rule}")
+
+        with open(rule, "r", encoding="UTF-8") as file:
+            lines = file.readlines()
+
+        pivot = {}
+        rules = []
+
+        # initialize feature importances
+        line_idx = 0
+        for line in lines:
+            if line_idx == 0:
+                feature_explainer_list = line.split("#")[1].split(", ")
+                for feature_explainer in feature_explainer_list:
+                    feature = feature_explainer.split(": ")[1].replace("\n", "")
+                    pivot[feature] = 0
+            else:
+                if "# " in line:
+                    rule = line.strip().split("# ")[1]
+                    rules.append(json.loads(rule))
 
-					child_scores = 0
-					#find child node importances
-					for child_rule in rules:
-						if child_rule["depth"] == current_depth + 1:
+            line_idx = line_idx + 1
 
-							child_score = child_rule["metric_value"] * child_rule["instances"]
+        feature_names = list(pivot.keys())
 
-							child_scores = child_scores + child_score
+        for feature in feature_names:
+            for rule in rules:
+                if rule["feature"] == feature:
+                    score = rule["metric_value"] * rule["instances"]
+                    current_depth = rule["depth"]
 
-					score = score - child_scores
+                    child_scores = 0
+                    # find child node importances
+                    for child_rule in rules:
+                        if child_rule["depth"] == current_depth + 1:
+                            child_score = child_rule["metric_value"] * child_rule["instances"]
 
-					pivot[feature] = pivot[feature] + score
+                            child_scores = child_scores + child_score
 
-		#normalize feature importance
+                    score = score - child_scores
 
-		total_score = 0
-		for feature, score in pivot.items():
-			total_score = total_score + score
+                    pivot[feature] = pivot[feature] + score
 
-		for feature, score in pivot.items():
-			pivot[feature] = round(pivot[feature] / total_score, 4)
+        # normalize feature importance
 
-		instances = []
-		for feature, score in pivot.items():
-			instance = []
-			instance.append(feature)
-			instance.append(score)
-			instances.append(instance)
+        total_score = 0
+        for feature, score in pivot.items():
+            total_score = total_score + score
 
-		df = pd.DataFrame(instances, columns = ["feature", "final_importance"])
-		df = df.sort_values(by = ["final_importance"], ascending = False)
+        for feature, score in pivot.items():
+            pivot[feature] = round(pivot[feature] / total_score, 4)
 
-		if len(rules) == 1:
-			return df
-		else:
-			dfs.append(df)
+        instances = []
+        for feature, score in pivot.items():
+            instance = []
+            instance.append(feature)
+            instance.append(score)
+            instances.append(instance)
 
-	if len(rules) > 1:
+        df = pd.DataFrame(instances, columns=["feature", "final_importance"])
+        df = df.sort_values(by=["final_importance"], ascending=False)
 
-		hf = pd.DataFrame(feature_names, columns = ["feature"])
-		hf["importance"] = 0
+        if len(rules) == 1:
+            return df
+        else:
+            dfs.append(df)
 
-		for df in dfs:
-			hf = hf.merge(df, on = ["feature"], how = "left")
-			hf["importance"] = hf["importance"] + hf["final_importance"]
-			hf = hf.drop(columns = ["final_importance"])
+    if len(rules) > 1:
+        hf = pd.DataFrame(feature_names, columns=["feature"])
+        hf["importance"] = 0
 
-		#------------------------
-		#normalize
-		hf["importance"] = hf["importance"] / hf["importance"].sum()
-		hf = hf.sort_values(by = ["importance"], ascending = False)
+        for df in dfs:
+            hf = hf.merge(df, on=["feature"], how="left")
+            hf["importance"] = hf["importance"] + hf["final_importance"]
+            hf = hf.drop(columns=["final_importance"])
 
-		return hf
+        # ------------------------
+        # normalize
+        hf["importance"] = hf["importance"] / hf["importance"].sum()
+        hf = hf.sort_values(by=["importance"], ascending=False)
 
-def evaluate(model, df, target_label = 'Decision', task = 'test'):
+        return hf
 
-	"""
-	Parameters:
-		model (built chefboost model): you should pass the return of fit function
-		df (pandas data frame): data frame you would like to evaluate
-		task (string): optionally you can pass this train, validation or test
-	"""
 
-	#--------------------------
+def evaluate(
+    model: dict, df: pd.DataFrame, target_label: str = "Decision", task: str = "test"
+) -> None:
+    """
+    Evaluate the performance of a built model on a data set
+    Args:
+        model (dict): built model which is the output of fit function
+        df (pandas data frame): data frame you would like to evaluate
+        target_label (str): target label
+        task (string): set this to train, validation or test
+    Returns:
+        None
+    """
 
-	if target_label != 'Decision':
-		df = df.rename(columns = {target_label: 'Decision'})
+    # --------------------------
 
-	#if target is not the last column
-	if df.columns[-1] != 'Decision':
-		new_column_order = df.columns.drop('Decision').tolist() + ['Decision']
-		print(new_column_order)
-		df = df[new_column_order]
+    if target_label != "Decision":
+        df = df.rename(columns={target_label: "Decision"})
 
-	#--------------------------
+    # if target is not the last column
+    if df.columns[-1] != "Decision":
+        new_column_order = df.columns.drop("Decision").tolist() + ["Decision"]
+        logger.debug(new_column_order)
+        df = df[new_column_order]
+
+    # --------------------------
 
-	functions.bulk_prediction(df, model)
+    functions.bulk_prediction(df, model)
 
-	enableAdaboost = model["config"]["enableAdaboost"]
+    enableAdaboost = model["config"]["enableAdaboost"]
 
-	if enableAdaboost == True:
-		df['Decision'] = df['Decision'].astype(str)
-		df['Prediction'] = df['Prediction'].astype(str)
+    if enableAdaboost == True:
+        df["Decision"] = df["Decision"].astype(str)
+        df["Prediction"] = df["Prediction"].astype(str)
 
-	eval.evaluate(df, task = task)
+    cb_eval.evaluate(df, task=task)
diff --git a/chefboost/commons/evaluate.py b/chefboost/commons/evaluate.py
index 62a4061..44eba39 100644
--- a/chefboost/commons/evaluate.py
+++ b/chefboost/commons/evaluate.py
@@ -1,121 +1,133 @@
 import math
+from chefboost.commons.logger import Logger
 
-def evaluate(df, task = 'train'):
-	
-	if df['Decision'].dtypes == 'object':
-		problem_type = 'classification'
-	else:
-		problem_type = 'regression'
-		
-	#-------------------------------------
-	
-	instances = df.shape[0]
-	
-	print("-------------------------")
-	print("Evaluate ",task,"set")
-	print("-------------------------")
-	
-	if problem_type == 'classification':
-		
-		idx = df[df['Prediction'] == df['Decision']].index
-		accuracy = 100*len(idx)/df.shape[0]
-		print("Accuracy: ", accuracy,"% on ",instances," instances")
-		
-		#-----------------------------
-		
-		predictions = df.Prediction.values
-		actuals = df.Decision.values
-		
-		#-----------------------------
-		#confusion matrix
-		
-		#labels = df.Prediction.unique()
-		labels = df.Decision.unique()
-		
-		confusion_matrix = []
-		for prediction_label in labels:
-			confusion_row = []
-			for actual_label in labels:
-				item = len(df[(df['Prediction'] == prediction_label) 
-						& (df['Decision'] == actual_label)]['Decision'].values)
-				confusion_row.append(item)
-			confusion_matrix.append(confusion_row)
-		
-		print("Labels: ", labels)
-		print("Confusion matrix: ",confusion_matrix)
-		
-		#-----------------------------
-		#precision and recall
-						
-		for decision_class in labels:
-			
-			fp = 0; fn = 0; tp = 0; tn = 0
-			for i in range(0, len(predictions)):
-				prediction = predictions[i]
-				actual = actuals[i]
-				
-				if actual == decision_class and prediction == decision_class:
-					tp = tp + 1
-				elif actual != decision_class and prediction != decision_class:
-					tn = tn + 1
-				elif actual != decision_class and prediction == decision_class:
-					fp = fp + 1
-				elif actual == decision_class and prediction != decision_class:
-					fn = fn + 1
-			
-			epsilon = 0.0000001 #to avoid divison by zero exception
-			precision = round(100*tp / (tp + fp + epsilon), 4)
-			recall = round(100*tp / (tp + fn + epsilon), 4) #tpr
-			f1_score = round((2 * precision * recall) / (precision + recall + epsilon), 4)
-			accuracy = round(100 * (tp + tn) / (tp + tn + fp + fn + epsilon), 4)
-			
-			if len(labels) >= 3:
-				print("Decision ", decision_class, " => ",end = '')
-				print("Accuray: ", accuracy,"%, ", end = '')
-			
-			print("Precision: ", precision,"%, Recall: ", recall,"%, F1: ", f1_score,"%")
-			#print("TP: ",tp,", TN: ",tn,", FP: ", fp,", FN: ",fn)
-			
-			if len(labels) < 3:
-				break	
-				
-	#-------------------------------------
-	else:
-		
-		df['Absolute_Error'] = abs(df['Prediction'] - df['Decision'])
-		df['Absolute_Error_Squared'] = df['Absolute_Error'] * df['Absolute_Error']
-		df['Decision_Squared'] = df['Decision'] * df['Decision']
-		df['Decision_Mean'] = df['Decision'].mean()
-		
-		#print(df)
-		
-		if instances > 0:
-		
-			mae = df['Absolute_Error'].sum()/instances
-			print("MAE: ",mae)
-			
-			mse = df['Absolute_Error_Squared'].sum()/instances
-			print("MSE: ", mse)
-			
-			rmse = math.sqrt(mse)
-			print("RMSE: ",rmse)
-			
-			rae = 0; rrse = 0
-			try: #divisor might be equal to 0.
-				
-				rae = math.sqrt(df['Absolute_Error_Squared'].sum())/math.sqrt(df['Decision_Squared'].sum())
-				
-				rrse = math.sqrt((df['Absolute_Error_Squared'].sum()) / ((df['Decision_Mean'] - df['Decision']) ** 2).sum())
-				
-			except Exception as err:
-				print(str(err))
-			
-			print("RAE: ", rae)
-			print("RRSE: ",rrse)
-		
-			mean = df['Decision'].mean()
-			print("Mean: ", mean)
-		
-			if mean > 0:
-				print("MAE / Mean: ",100*mae/mean,"%")
-				print("RMSE / Mean: ",100*rmse/mean,"%")
\ No newline at end of file
+# pylint: disable=broad-except
+
+logger = Logger(module="chefboost/commons/evaluate.py")
+
+
+def evaluate(df, task="train"):
+    if df["Decision"].dtypes == "object":
+        problem_type = "classification"
+    else:
+        problem_type = "regression"
+
+    # -------------------------------------
+
+    instances = df.shape[0]
+
+    logger.info("-------------------------")
+    logger.info(f"Evaluate {task} set")
+    logger.info("-------------------------")
+
+    if problem_type == "classification":
+        idx = df[df["Prediction"] == df["Decision"]].index
+        accuracy = 100 * len(idx) / df.shape[0]
+        logger.info(f"Accuracy: {accuracy}% on {instances} instances")
+
+        # -----------------------------
+
+        predictions = df.Prediction.values
+        actuals = df.Decision.values
+
+        # -----------------------------
+        # confusion matrix
+
+        # labels = df.Prediction.unique()
+        labels = df.Decision.unique()
+
+        confusion_matrix = []
+        for prediction_label in labels:
+            confusion_row = []
+            for actual_label in labels:
+                item = len(
+                    df[(df["Prediction"] == prediction_label) & (df["Decision"] == actual_label)][
+                        "Decision"
+                    ].values
+                )
+                confusion_row.append(item)
+            confusion_matrix.append(confusion_row)
+
+        logger.info(f"Labels: {labels}")
+        logger.info(f"Confusion matrix: {confusion_matrix}")
+
+        # -----------------------------
+        # precision and recall
+
+        for decision_class in labels:
+            fp = 0
+            fn = 0
+            tp = 0
+            tn = 0
+            for i, prediction in enumerate(predictions):
+                actual = actuals[i]
+
+                if actual == decision_class and prediction == decision_class:
+                    tp = tp + 1
+                # pylint: disable=consider-using-in
+                elif actual != decision_class and prediction != decision_class:
+                    tn = tn + 1
+                elif actual != decision_class and prediction == decision_class:
+                    fp = fp + 1
+                elif actual == decision_class and prediction != decision_class:
+                    fn = fn + 1
+
+            epsilon = 0.0000001  # to avoid divison by zero exception
+            precision = round(100 * tp / (tp + fp + epsilon), 4)
+            recall = round(100 * tp / (tp + fn + epsilon), 4)  # tpr
+            f1_score = round((2 * precision * recall) / (precision + recall + epsilon), 4)
+            accuracy = round(100 * (tp + tn) / (tp + tn + fp + fn + epsilon), 4)
+
+            if len(labels) >= 3:
+                logger.info(f"Decision {decision_class}")
+                logger.info(f"Accuray: {accuracy}")
+
+            logger.info(f"Precision: {precision}%, Recall: {recall}%, F1: {f1_score}%")
+            logger.debug(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+
+            if len(labels) < 3:
+                break
+
+    # -------------------------------------
+    else:
+        df["Absolute_Error"] = abs(df["Prediction"] - df["Decision"])
+        df["Absolute_Error_Squared"] = df["Absolute_Error"] * df["Absolute_Error"]
+        df["Decision_Squared"] = df["Decision"] * df["Decision"]
+        df["Decision_Mean"] = df["Decision"].mean()
+
+        logger.debug(df)
+
+        if instances > 0:
+            mae = df["Absolute_Error"].sum() / instances
+            logger.info(f"MAE: {mae}")
+
+            mse = df["Absolute_Error_Squared"].sum() / instances
+            logger.info(f"MSE: {mse}")
+
+            rmse = math.sqrt(mse)
+            logger.info(f"RMSE: {rmse}")
+
+            rae = 0
+            rrse = 0
+            try:  # divisor might be equal to 0.
+                rae = math.sqrt(df["Absolute_Error_Squared"].sum()) / math.sqrt(
+                    df["Decision_Squared"].sum()
+                )
+
+                rrse = math.sqrt(
+                    (df["Absolute_Error_Squared"].sum())
+                    / ((df["Decision_Mean"] - df["Decision"]) ** 2).sum()
+                )
+
+            except Exception as err:
+                logger.error(str(err))
+
+            logger.info(f"RAE: {rae}")
+            logger.info(f"RRSE {rrse}")
+
+            mean = df["Decision"].mean()
+            logger.info(f"Mean: {mean}")
+
+            if mean > 0:
+                logger.info(f"MAE / Mean: {100 * mae / mean}%")
+                logger.info(f"RMSE / Mean: {100 * rmse / mean}%")
diff --git a/chefboost/commons/functions.py b/chefboost/commons/functions.py
index f4d5b0a..3df4138 100644
--- a/chefboost/commons/functions.py
+++ b/chefboost/commons/functions.py
@@ -1,133 +1,164 @@
-import numpy as np
 import pathlib
-import imp
+import imp  # pylint: disable=deprecated-module
 import os
 from os import path
 import multiprocessing
+from typing import Optional
+import numpy as np
 from chefboost import Chefboost as cb
+from chefboost.commons.logger import Logger
+
+# pylint: disable=no-else-return, broad-except
+
+logger = Logger(module="chefboost/commons/functions.py")
+
 
 def bulk_prediction(df, model):
+    predictions = []
+    for _, instance in df.iterrows():
+        features = instance.values[0:-1]
+        prediction = cb.predict(model, features)
+        predictions.append(prediction)
 
-	predictions = []
-	for index, instance in df.iterrows():
-		features = instance.values[0:-1]
-		prediction = cb.predict(model, features)
-		predictions.append(prediction)
+    df["Prediction"] = predictions
 
-	df['Prediction'] = predictions
 
 def restoreTree(moduleName):
-   fp, pathname, description = imp.find_module(moduleName)
-   return imp.load_module(moduleName, fp, pathname, description)
+    fp, pathname, description = imp.find_module(moduleName)
+    return imp.load_module(moduleName, fp, pathname, description)
+
 
 def softmax(w):
-	e = np.exp(np.array(w, dtype=np.float32))
-	dist = e / np.sum(e)
-	return dist
+    e = np.exp(np.array(w, dtype=np.float32))
+    dist = e / np.sum(e)
+    return dist
+
 
 def sign(x):
-	if x > 0:
-		return 1
-	elif x < 0:
-		return -1
-	else:
-		return 0
+    if x > 0:
+        return 1
+    elif x < 0:
+        return -1
+    else:
+        return 0
+
 
 def formatRule(root):
-	resp = ''
+    resp = ""
+
+    for _ in range(0, root):
+        resp = resp + "   "
+
+    return resp
+
 
-	for i in range(0, root):
-		resp = resp + '   '
+def storeRule(file, content):
+    with open(file, "a+", encoding="UTF-8") as f:
+        f.writelines(content)
+        f.writelines("\n")
 
-	return resp
 
-def storeRule(file,content):
-	f = open(file, "a+")
-	f.writelines(content)
-	f.writelines("\n")
+def createFile(file, content):
+    with open(file, "w", encoding="UTF-8") as f:
+        f.write(content)
 
-def createFile(file,content):
-	f = open(file, "w")
-	f.write(content)
 
 def initializeFolders():
-	import sys
-	sys.path.append("..")
-	pathlib.Path("outputs").mkdir(parents=True, exist_ok=True)
-	pathlib.Path("outputs/data").mkdir(parents=True, exist_ok=True)
-	pathlib.Path("outputs/rules").mkdir(parents=True, exist_ok=True)
-
-	#-----------------------------------
-
-	#clear existing rules in outputs/
-
-	outputs_path = os.getcwd()+os.path.sep+"outputs"+os.path.sep
-
-	try:
-		if path.exists(outputs_path+"data"):
-			for file in os.listdir(outputs_path+"data"):
-				os.remove(outputs_path+"data"+os.path.sep+file)
-
-		if path.exists(outputs_path+"rules"):
-			for file in os.listdir(outputs_path+"rules"):
-				if ".py" in file or ".json" in file or ".txt" in file or ".pkl" in file or ".csv" in file:
-					os.remove(outputs_path+"rules"+os.path.sep+file)
-	except Exception as err:
-		print("WARNING: ", str(err))
-
-	#------------------------------------
-
-def initializeParams(config):
-	algorithm = 'ID3'
-	enableRandomForest = False; num_of_trees = 5; enableMultitasking = False
-	enableGBM = False; epochs = 10; learning_rate = 1; max_depth = 5
-	enableAdaboost = False; num_of_weak_classifier = 4
-	enableParallelism = True
-	num_cores = int(multiprocessing.cpu_count()/2) #allocate half of your total cores
-	#num_cores = int((3*multiprocessing.cpu_count())/4) #allocate 3/4 of your total cores
-	#num_cores = multiprocessing.cpu_count()
-
-	for key, value in config.items():
-		if key == 'algorithm':
-			algorithm = value
-		#---------------------------------
-		elif key == 'enableRandomForest':
-			enableRandomForest = value
-		elif key == 'num_of_trees':
-			num_of_trees = value
-		elif key == 'enableMultitasking':
-			enableMultitasking = value
-		#---------------------------------
-		elif key == 'enableGBM':
-			enableGBM = value
-		elif key == 'epochs':
-			epochs = value
-		elif key == 'learning_rate':
-			learning_rate = value
-		elif key == 'max_depth':
-			max_depth = value
-		#---------------------------------
-		elif key == 'enableAdaboost':
-			enableAdaboost = value
-		elif key == 'num_of_weak_classifier':
-			num_of_weak_classifier = value
-		#---------------------------------
-		elif key == 'enableParallelism':
-			enableParallelism = value
-		elif key == 'num_cores':
-			num_cores = value
-
-	config['algorithm'] = algorithm
-	config['enableRandomForest'] = enableRandomForest
-	config['num_of_trees'] = num_of_trees
-	config['enableMultitasking'] = enableMultitasking
-	config['enableGBM'] = enableGBM
-	config['epochs'] = epochs
-	config['learning_rate'] = learning_rate
-	config['max_depth'] = max_depth
-	config['enableAdaboost'] = enableAdaboost
-	config['num_of_weak_classifier'] = num_of_weak_classifier
-	config['enableParallelism'] = enableParallelism
-	config['num_cores'] = num_cores
-
-	return config
+    import sys
+
+    sys.path.append("..")
+    pathlib.Path("outputs").mkdir(parents=True, exist_ok=True)
+    pathlib.Path("outputs/data").mkdir(parents=True, exist_ok=True)
+    pathlib.Path("outputs/rules").mkdir(parents=True, exist_ok=True)
+
+    # -----------------------------------
+
+    # clear existing rules in outputs/
+
+    outputs_path = os.getcwd() + os.path.sep + "outputs" + os.path.sep
+
+    try:
+        if path.exists(outputs_path + "data"):
+            for file in os.listdir(outputs_path + "data"):
+                os.remove(outputs_path + "data" + os.path.sep + file)
+
+        if path.exists(outputs_path + "rules"):
+            for file in os.listdir(outputs_path + "rules"):
+                if (
+                    ".py" in file
+                    or ".json" in file
+                    or ".txt" in file
+                    or ".pkl" in file
+                    or ".csv" in file
+                ):
+                    os.remove(outputs_path + "rules" + os.path.sep + file)
+    except Exception as err:
+        logger.warn(str(err))
+
+    # ------------------------------------
+
+
+def initializeParams(config: Optional[dict] = None):
+
+    if config == None:
+        config = {}
+
+    algorithm = "ID3"
+    enableRandomForest = False
+    num_of_trees = 5
+    enableMultitasking = False
+    enableGBM = False
+    epochs = 10
+    learning_rate = 1
+    max_depth = 5
+    enableAdaboost = False
+    num_of_weak_classifier = 4
+    enableParallelism = True
+    num_cores = int(multiprocessing.cpu_count() / 2)  # allocate half of your total cores
+    # num_cores = int((3*multiprocessing.cpu_count())/4) #allocate 3/4 of your total cores
+    # num_cores = multiprocessing.cpu_count()
+
+    for key, value in config.items():
+        if key == "algorithm":
+            algorithm = value
+        # ---------------------------------
+        elif key == "enableRandomForest":
+            enableRandomForest = value
+        elif key == "num_of_trees":
+            num_of_trees = value
+        elif key == "enableMultitasking":
+            enableMultitasking = value
+        # ---------------------------------
+        elif key == "enableGBM":
+            enableGBM = value
+        elif key == "epochs":
+            epochs = value
+        elif key == "learning_rate":
+            learning_rate = value
+        elif key == "max_depth":
+            max_depth = value
+        # ---------------------------------
+        elif key == "enableAdaboost":
+            enableAdaboost = value
+        elif key == "num_of_weak_classifier":
+            num_of_weak_classifier = value
+        # ---------------------------------
+        elif key == "enableParallelism":
+            enableParallelism = value
+        elif key == "num_cores":
+            num_cores = value
+
+    config["algorithm"] = algorithm
+    config["enableRandomForest"] = enableRandomForest
+    config["num_of_trees"] = num_of_trees
+    config["enableMultitasking"] = enableMultitasking
+    config["enableGBM"] = enableGBM
+    config["epochs"] = epochs
+    config["learning_rate"] = learning_rate
+    config["max_depth"] = max_depth
+    config["enableAdaboost"] = enableAdaboost
+    config["num_of_weak_classifier"] = num_of_weak_classifier
+    config["enableParallelism"] = enableParallelism
+    config["num_cores"] = num_cores
+
+    return config
diff --git a/chefboost/commons/logger.py b/chefboost/commons/logger.py
new file mode 100644
index 0000000..4a8dc6d
--- /dev/null
+++ b/chefboost/commons/logger.py
@@ -0,0 +1,40 @@
+import os
+import logging
+from datetime import datetime
+
+# pylint: disable=broad-except
+class Logger:
+    def __init__(self, module=None):
+        self.module = module
+        log_level = os.environ.get("CHEFBOOST_LOG_LEVEL", str(logging.INFO))
+        try:
+            self.log_level = int(log_level)
+        except Exception as err:
+            self.dump_log(
+                f"Exception while parsing $CHEFBOOST_LOG_LEVEL."
+                f"Expected int but it is {log_level} ({str(err)})"
+            )
+            self.log_level = logging.INFO
+
+    def info(self, message):
+        if self.log_level <= logging.INFO:
+            self.dump_log(f"{message}")
+
+    def debug(self, message):
+        if self.log_level <= logging.DEBUG:
+            self.dump_log(f"🕷️ {message}")
+
+    def warn(self, message):
+        if self.log_level <= logging.WARNING:
+            self.dump_log(f"⚠️ {message}")
+
+    def error(self, message):
+        if self.log_level <= logging.ERROR:
+            self.dump_log(f"🔴 {message}")
+
+    def critical(self, message):
+        if self.log_level <= logging.CRITICAL:
+            self.dump_log(f"💥 {message}")
+
+    def dump_log(self, message):
+        print(f"{str(datetime.now())[2:-7]} - {message}")
diff --git a/chefboost/training/Preprocess.py b/chefboost/training/Preprocess.py
index 7900ae3..127efa9 100644
--- a/chefboost/training/Preprocess.py
+++ b/chefboost/training/Preprocess.py
@@ -1,132 +1,174 @@
+import math
 import numpy as np
+from chefboost.training import Training
+from chefboost.commons.logger import Logger
 
-import math
+logger = Logger(module="chefboost/training/Preprocess.py")
 
-from chefboost.training import Training
-#from training import Training
 
 def processContinuousFeatures(algorithm, df, column_name, entropy, config):
-	
-	#if True:
-	if df[column_name].nunique() <= 20:
-		unique_values = sorted(df[column_name].unique())
-	else:
-		unique_values = []
-		
-		df_mean = df[column_name].mean()
-		df_std = df[column_name].std(ddof=0)
-		df_min = df[column_name].min()
-		df_max = df[column_name].max()
-		
-		unique_values.append(df[column_name].min())
-		unique_values.append(df[column_name].max())
-		unique_values.append(df[column_name].mean())
-		
-		scales = list(range(-3,+4, 1))
-		for scale in scales:
-			if df_mean + scale * df_std > df_min and df_mean + scale * df_std < df_max:
-				unique_values.append(df_mean + scale * df_std)
-		
-		unique_values.sort()
-		
-	#print(column_name,"->",unique_values)
-	
-	subset_gainratios = []; subset_gains = []; subset_ginis = []; subset_red_stdevs = []; subset_chi_squares = []
-	
-	if len(unique_values) == 1:
-		winner_threshold = unique_values[0]
-		df[column_name] = np.where(df[column_name] <= winner_threshold, "<="+str(winner_threshold), ">"+str(winner_threshold))
-		return df
-	
-	for i in range(0, len(unique_values)-1):
-		threshold = unique_values[i]
-		
-		subset1 = df[df[column_name] <= threshold]
-		subset2 = df[df[column_name] > threshold]
-		
-		subset1_rows = subset1.shape[0]; subset2_rows = subset2.shape[0]
-		total_instances = df.shape[0] #subset1_rows+subset2_rows
-		
-		subset1_probability = subset1_rows / total_instances
-		subset2_probability = subset2_rows / total_instances
-		
-		if algorithm == 'ID3' or algorithm == 'C4.5':
-			threshold_gain = entropy - subset1_probability*Training.calculateEntropy(subset1, config) - subset2_probability*Training.calculateEntropy(subset2, config)
-			subset_gains.append(threshold_gain)
-		
-		if algorithm == 'C4.5': #C4.5 also need gain in the block above. That's why, instead of else if we used direct if condition here
-			
-			threshold_splitinfo = -subset1_probability * math.log(subset1_probability, 2)-subset2_probability*math.log(subset2_probability, 2)
-			gainratio = threshold_gain / threshold_splitinfo
-			subset_gainratios.append(gainratio)
-				
-		elif algorithm == 'CART':
-			decision_for_subset1 = subset1['Decision'].value_counts().tolist()
-			decision_for_subset2 = subset2['Decision'].value_counts().tolist()
-			
-			gini_subset1 = 1; gini_subset2 = 1
-			
-			for j in range(0, len(decision_for_subset1)):
-				gini_subset1 = gini_subset1 - math.pow((decision_for_subset1[j]/subset1_rows),2)
-			
-			for j in range(0, len(decision_for_subset2)):
-				gini_subset2 = gini_subset2 - math.pow((decision_for_subset2[j]/subset2_rows),2)
-			
-			gini = (subset1_rows/total_instances)*gini_subset1 + (subset2_rows/total_instances) * gini_subset2
-			
-			subset_ginis.append(gini)
-		
-		elif algorithm == "CHAID":
-			#subset1 = high, subset2 = normal
-			
-			unique_decisions = df['Decision'].unique() #Yes, No
-			num_of_decisions = len(unique_decisions) #2
-			
-			subset1_expected = subset1.shape[0] / num_of_decisions
-			subset2_expected = subset2.shape[0] / num_of_decisions
-			
-			chi_square = 0
-			for d in unique_decisions: #Yes, No
-				
-				#decision = Yes
-				subset1_d = subset1[subset1["Decision"] == d] #high, yes
-				subset2_d = subset2[subset2["Decision"] == d] #normal, yes
-				
-				subset1_d_chi_square = math.sqrt(((subset1_d.shape[0] - subset1_expected) * (subset1_d.shape[0] - subset1_expected))/subset1_expected)
-				
-				subset2_d_chi_square = math.sqrt(((subset2_d.shape[0] - subset2_expected) * (subset2_d.shape[0] - subset2_expected))/subset2_expected)
-				
-				chi_square = chi_square + subset1_d_chi_square + subset2_d_chi_square
-			
-			subset_chi_squares.append(chi_square)
-		
-		#----------------------------------
-		elif algorithm == 'Regression':
-			superset_stdev = df['Decision'].std(ddof=0)
-			subset1_stdev = subset1['Decision'].std(ddof=0)
-			subset2_stdev = subset2['Decision'].std(ddof=0)
-			
-			threshold_weighted_stdev = (subset1_rows/total_instances)*subset1_stdev + (subset2_rows/total_instances)*subset2_stdev
-			threshold_reducted_stdev = superset_stdev - threshold_weighted_stdev
-			subset_red_stdevs.append(threshold_reducted_stdev)
-			
-		#----------------------------------
-	
-	if algorithm == "C4.5":
-		winner_one = subset_gainratios.index(max(subset_gainratios))
-	elif algorithm == "ID3": #actually, ID3 does not support for continuous features but we can still do it
-		winner_one = subset_gains.index(max(subset_gains))
-	elif algorithm == "CART":
-		winner_one = subset_ginis.index(min(subset_ginis))
-	elif algorithm == "CHAID":
-		winner_one = subset_chi_squares.index(max(subset_chi_squares))
-	elif algorithm == "Regression":
-		winner_one = subset_red_stdevs.index(max(subset_red_stdevs))
-	
-	winner_threshold = unique_values[winner_one]
-	#print(column_name,": ", winner_threshold," in ", unique_values)
-	
-	#print("theshold is ",winner_threshold," for ",column_name)
-	df[column_name] = np.where(df[column_name] <= winner_threshold, "<="+str(winner_threshold), ">"+str(winner_threshold))
-	
-	return df
+    # if True:
+    if df[column_name].nunique() <= 20:
+        unique_values = sorted(df[column_name].unique())
+    else:
+        unique_values = []
+
+        df_mean = df[column_name].mean()
+        df_std = df[column_name].std(ddof=0)
+        df_min = df[column_name].min()
+        df_max = df[column_name].max()
+
+        unique_values.append(df[column_name].min())
+        unique_values.append(df[column_name].max())
+        unique_values.append(df[column_name].mean())
+
+        scales = list(range(-3, +4, 1))
+        for scale in scales:
+            if df_mean + scale * df_std > df_min and df_mean + scale * df_std < df_max:
+                unique_values.append(df_mean + scale * df_std)
+
+        unique_values.sort()
+
+    logger.debug(f"{column_name} -> {unique_values}")
+
+    subset_gainratios = []
+    subset_gains = []
+    subset_ginis = []
+    subset_red_stdevs = []
+    subset_chi_squares = []
+
+    if len(unique_values) == 1:
+        winner_threshold = unique_values[0]
+        df[column_name] = np.where(
+            df[column_name] <= winner_threshold,
+            "<=" + str(winner_threshold),
+            ">" + str(winner_threshold),
+        )
+        return df
+
+    for i in range(0, len(unique_values) - 1):
+        threshold = unique_values[i]
+
+        subset1 = df[df[column_name] <= threshold]
+        subset2 = df[df[column_name] > threshold]
+
+        subset1_rows = subset1.shape[0]
+        subset2_rows = subset2.shape[0]
+        total_instances = df.shape[0]  # subset1_rows+subset2_rows
+
+        subset1_probability = subset1_rows / total_instances
+        subset2_probability = subset2_rows / total_instances
+
+        if algorithm in ["ID3", "C4.5"]:
+            threshold_gain = (
+                entropy
+                - subset1_probability * Training.calculateEntropy(subset1, config)
+                - subset2_probability * Training.calculateEntropy(subset2, config)
+            )
+            subset_gains.append(threshold_gain)
+
+        # C4.5 also need gain in the block above.
+        # That's why, instead of else if we used direct if condition here
+        if algorithm == "C4.5":
+            threshold_splitinfo = -subset1_probability * math.log(
+                subset1_probability, 2
+            ) - subset2_probability * math.log(subset2_probability, 2)
+            gainratio = threshold_gain / threshold_splitinfo
+            subset_gainratios.append(gainratio)
+
+        elif algorithm == "CART":
+            decision_for_subset1 = subset1["Decision"].value_counts().tolist()
+            decision_for_subset2 = subset2["Decision"].value_counts().tolist()
+
+            gini_subset1 = 1
+            gini_subset2 = 1
+
+            for current_decision_for_subset1 in decision_for_subset1:
+                gini_subset1 = gini_subset1 - math.pow(
+                    (current_decision_for_subset1 / subset1_rows), 2
+                )
+
+            for current_decision_for_subset2 in decision_for_subset2:
+                gini_subset2 = gini_subset2 - math.pow(
+                    (current_decision_for_subset2 / subset2_rows), 2
+                )
+
+            gini = (subset1_rows / total_instances) * gini_subset1 + (
+                subset2_rows / total_instances
+            ) * gini_subset2
+
+            subset_ginis.append(gini)
+
+        elif algorithm == "CHAID":
+            # subset1 = high, subset2 = normal
+
+            unique_decisions = df["Decision"].unique()  # Yes, No
+            num_of_decisions = len(unique_decisions)  # 2
+
+            subset1_expected = subset1.shape[0] / num_of_decisions
+            subset2_expected = subset2.shape[0] / num_of_decisions
+
+            chi_square = 0
+            for d in unique_decisions:  # Yes, No
+                # decision = Yes
+                subset1_d = subset1[subset1["Decision"] == d]  # high, yes
+                subset2_d = subset2[subset2["Decision"] == d]  # normal, yes
+
+                subset1_d_chi_square = math.sqrt(
+                    (
+                        (subset1_d.shape[0] - subset1_expected)
+                        * (subset1_d.shape[0] - subset1_expected)
+                    )
+                    / subset1_expected
+                )
+
+                subset2_d_chi_square = math.sqrt(
+                    (
+                        (subset2_d.shape[0] - subset2_expected)
+                        * (subset2_d.shape[0] - subset2_expected)
+                    )
+                    / subset2_expected
+                )
+
+                chi_square = chi_square + subset1_d_chi_square + subset2_d_chi_square
+
+            subset_chi_squares.append(chi_square)
+
+        # ----------------------------------
+        elif algorithm == "Regression":
+            superset_stdev = df["Decision"].std(ddof=0)
+            subset1_stdev = subset1["Decision"].std(ddof=0)
+            subset2_stdev = subset2["Decision"].std(ddof=0)
+
+            threshold_weighted_stdev = (subset1_rows / total_instances) * subset1_stdev + (
+                subset2_rows / total_instances
+            ) * subset2_stdev
+            threshold_reducted_stdev = superset_stdev - threshold_weighted_stdev
+            subset_red_stdevs.append(threshold_reducted_stdev)
+
+        # ----------------------------------
+
+    if algorithm == "C4.5":
+        winner_one = subset_gainratios.index(max(subset_gainratios))
+    elif (
+        algorithm == "ID3"
+    ):  # actually, ID3 does not support for continuous features but we can still do it
+        winner_one = subset_gains.index(max(subset_gains))
+    elif algorithm == "CART":
+        winner_one = subset_ginis.index(min(subset_ginis))
+    elif algorithm == "CHAID":
+        winner_one = subset_chi_squares.index(max(subset_chi_squares))
+    elif algorithm == "Regression":
+        winner_one = subset_red_stdevs.index(max(subset_red_stdevs))
+
+    winner_threshold = unique_values[winner_one]
+    logger.debug(f"{column_name}: {winner_threshold} in {unique_values}")
+
+    logger.debug(f"theshold is {winner_threshold} for {column_name}")
+    df[column_name] = np.where(
+        df[column_name] <= winner_threshold,
+        "<=" + str(winner_threshold),
+        ">" + str(winner_threshold),
+    )
+
+    return df
diff --git a/chefboost/training/Training.py b/chefboost/training/Training.py
index 7b151d5..c1b34c9 100644
--- a/chefboost/training/Training.py
+++ b/chefboost/training/Training.py
@@ -1,729 +1,808 @@
 import math
-import imp
+import imp  # pylint:disable=deprecated-module
 import uuid
 import json
-import numpy as np
 import copy
-import os
 import multiprocessing
 import multiprocessing.pool
 from contextlib import closing
-import pandas as pd
-import psutil
 import gc
-import sys
-import tqdm
-
-from chefboost.training import Preprocess
-from chefboost.commons import functions, evaluate
-
-#----------------------------------------
-
-global decision_rules
-
-class NoDaemonProcess(multiprocessing.Process):
-	# make 'daemon' attribute always return False
-	def _get_daemon(self):
-		return False
-	def _set_daemon(self, value):
-		pass
-	daemon = property(_get_daemon, _set_daemon)
-
-class NoDaemonContext(type(multiprocessing.get_context())):
-	Process = NoDaemonProcess
-
-class MyPool(multiprocessing.pool.Pool):
-
-	def __init__(self, *args, **kwargs):
-		kwargs['context'] = NoDaemonContext()
-		super(MyPool, self).__init__(*args, **kwargs)
-
-#----------------------------------------
-def calculateEntropy(df, config):
-
-	algorithm = config['algorithm']
-
-	#--------------------------
-
-	if algorithm == 'Regression':
-		return 0
-
-	#print(df)
-
-	instances = df.shape[0]; columns = df.shape[1]
-	#print(instances," rows, ",columns," columns")
-
-	decisions = df['Decision'].value_counts().keys().tolist()
-
-	entropy = 0
-
-	for i in range(0, len(decisions)):
-		decision = decisions[i]
-		num_of_decisions = df['Decision'].value_counts().tolist()[i]
-		#print(decision,"->",num_of_decisions)
-
-		class_probability = num_of_decisions/instances
-
-		entropy = entropy - class_probability*math.log(class_probability, 2)
-
-	return entropy
-
-def findDecision(df, config):
-	#information gain for id3, gain ratio for c4.5, gini for cart, chi square for chaid and std for regression
-	algorithm = config['algorithm']
-
-	resp_obj = findGains(df, config)
-	gains = list(resp_obj["gains"].values())
-	entropy = resp_obj["entropy"]
-
-	if algorithm == "ID3":
-		winner_index = gains.index(max(gains))
-		metric_value = entropy
-		metric_name = "Entropy"
-	elif algorithm == "C4.5":
-		winner_index = gains.index(max(gains))
-		metric_value = entropy
-		metric_name = "Entropy"
-	elif algorithm == "CART":
-		winner_index = gains.index(min(gains))
-		metric_value = min(gains)
-		metric_name = "Gini"
-	elif algorithm == "CHAID":
-		winner_index = gains.index(max(gains))
-		metric_value = max(gains)
-		metric_name = "ChiSquared"
-	elif algorithm == "Regression":
-		winner_index = gains.index(max(gains))
-		metric_value = max(gains)
-		metric_name = "Std"
-
-	winner_name = df.columns[winner_index]
-
-	return winner_name, df.shape[0], metric_value, metric_name
-
-def findGains(df, config):
-
-	algorithm = config['algorithm']
-	decision_classes = df["Decision"].unique()
-
-	#-----------------------------
-
-	entropy = 0
-
-	if algorithm == "ID3" or algorithm == "C4.5":
-		entropy = calculateEntropy(df, config)
-
-	columns = df.shape[1]; instances = df.shape[0]
-
-	gains = []
-
-	for i in range(0, columns-1):
-		column_name = df.columns[i]
-		column_type = df[column_name].dtypes
-
-		#print(column_name,"->",column_type)
-
-		if column_type != 'object':
-			df = Preprocess.processContinuousFeatures(algorithm, df, column_name, entropy, config)
-
-		classes = df[column_name].value_counts()
-
-		splitinfo = 0
-		if algorithm == 'ID3' or algorithm == 'C4.5':
-			gain = entropy * 1
-		else:
-			gain = 0
-
-		for j in range(0, len(classes)):
-			current_class = classes.keys().tolist()[j]
-			#print(column_name,"->",current_class)
-
-			subdataset = df[df[column_name] == current_class]
-			#print(subdataset)
-
-			subset_instances = subdataset.shape[0]
-			class_probability = subset_instances/instances
-
-			if algorithm == 'ID3' or algorithm == 'C4.5':
-				subset_entropy = calculateEntropy(subdataset, config)
-				gain = gain - class_probability * subset_entropy
-
-			if algorithm == 'C4.5':
-				splitinfo = splitinfo - class_probability*math.log(class_probability, 2)
-
-			elif algorithm == 'CART': #GINI index
-				decision_list = subdataset['Decision'].value_counts().tolist()
-
-				subgini = 1
-
-				for k in range(0, len(decision_list)):
-					subgini = subgini - math.pow((decision_list[k]/subset_instances), 2)
-
-				gain = gain + (subset_instances / instances) * subgini
-
-			elif algorithm == 'CHAID':
-				num_of_decisions = len(decision_classes)
-
-				expected = subset_instances / num_of_decisions
-
-				for d in decision_classes:
-					num_of_d = subdataset[subdataset["Decision"] == d].shape[0]
-
-					chi_square_of_d = math.sqrt(((num_of_d - expected) * (num_of_d - expected)) / expected)
-
-					gain += chi_square_of_d
-
-			elif algorithm == 'Regression':
-				subset_stdev = subdataset['Decision'].std(ddof=0)
-				gain = gain + (subset_instances/instances)*subset_stdev
-
-		#iterating over classes for loop end
-		#-------------------------------
-
-		if algorithm == 'Regression':
-			stdev = df['Decision'].std(ddof=0)
-			gain = stdev - gain
-		if algorithm == 'C4.5':
-			if splitinfo == 0:
-				splitinfo = 100 #this can be if data set consists of 2 rows and current column consists of 1 class. still decision can be made (decisions for these 2 rows same). set splitinfo to very large value to make gain ratio very small. in this way, we won't find this column as the most dominant one.
-			gain = gain / splitinfo
-
-		#----------------------------------
-
-		gains.append(gain)
-
-	#-------------------------------------------------
+import psutil
 
-	resp_obj = {}
-	resp_obj["gains"] = {}
+import numpy as np
+import pandas as pd
 
-	for idx, feature in enumerate(df.columns[0:-1]): #Decision is always the last column
-		#print(idx, feature)
-		resp_obj["gains"][feature] = gains[idx]
+from chefboost.training import Preprocess
+from chefboost.commons import functions
+from chefboost.commons.logger import Logger
 
-	resp_obj["entropy"] = entropy
+# pylint: disable=too-many-function-args, unused-argument
 
-	return resp_obj
+logger = Logger(module="chefboost/training/Training.py")
 
-def createBranchWrapper(func, args):
-	return func(*args)
-
-def createBranch(config, current_class, subdataset, numericColumn, branch_index, winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric, tree_id = 0, main_process_id = None):
-
-	custom_rules = []
-
-	algorithm = config['algorithm']
-	enableAdaboost = config['enableAdaboost']
-	enableGBM = config['enableGBM']
-	max_depth = config['max_depth']
-	enableParallelism = config['enableParallelism']
-
-	charForResp = "'"
-	if algorithm == 'Regression':
-		charForResp = ""
-
-	#---------------------------
-
-	tmp_root = root * 1
-	parents_raw = copy.copy(parents)
-
-	#---------------------------
-
-	if numericColumn == True:
-		compareTo = current_class #current class might be <=x or >x in this case
-	else:
-		compareTo = " == '"+str(current_class)+"'"
-
-	terminateBuilding = False
-
-	#-----------------------------------------------
-	#can decision be made?
-
-	if enableGBM == True and root >= max_depth: #max depth
-		final_decision = subdataset['Decision'].mean()
-		terminateBuilding = True
-	elif enableAdaboost == True:
-		#final_decision = subdataset['Decision'].value_counts().idxmax()
-		final_decision = functions.sign(subdataset['Decision'].mean()) #get average
-		terminateBuilding = True
-		enableParallelism = False
-	elif len(subdataset['Decision'].value_counts().tolist()) == 1:
-		final_decision = subdataset['Decision'].value_counts().keys().tolist()[0] #all items are equal in this case
-		terminateBuilding = True
-	elif subdataset.shape[1] == 1: #if decision cannot be made even though all columns dropped
-		final_decision = subdataset['Decision'].value_counts().idxmax() #get the most frequent one
-		terminateBuilding = True
-	elif algorithm == 'Regression' and (subdataset.shape[0] < 5 or root >= max_depth): #pruning condition
-	#elif algorithm == 'Regression' and subdataset['Decision'].std(ddof=0)/global_stdev < 0.4: #pruning condition
-		final_decision = subdataset['Decision'].mean() #get average
-		terminateBuilding = True
-	elif algorithm in ['ID3', 'C4.5', 'CART', 'CHAID'] and root >= max_depth:
-		final_decision = subdataset['Decision'].value_counts().idxmax() #get the most frequent one
-		terminateBuilding = True
+# ----------------------------------------
 
-	#-----------------------------------------------
+global decision_rules  # pylint: disable=global-at-module-level
 
-	if enableParallelism == True:
-		check_condition = "if" #TODO: elif checks might be above than if statements in parallel
-	else:
-		if branch_index == 0:
-			check_condition = "if"
-		else:
-			check_condition = "elif"
 
-	check_rule = check_condition+" obj["+str(winner_index)+"]"+compareTo+":"
+class NoDaemonProcess(multiprocessing.Process):
+    # make 'daemon' attribute always return False
+    def _get_daemon(self):
+        return False
 
-	leaf_id = str(uuid.uuid1())
+    def _set_daemon(self, value):
+        pass
 
-	if enableParallelism != True:
-		functions.storeRule(file,(functions.formatRule(root),"",check_rule))
-	else:
-		sample_rule = {}
-		sample_rule["current_level"] = root
-		sample_rule["leaf_id"] = leaf_id
-		sample_rule["parents"] = parents
-		sample_rule["rule"] = check_rule
-		sample_rule["feature_idx"] = winner_index
-		sample_rule["feature_name"] = winner_name
-		sample_rule["instances"] = num_of_instances
-		sample_rule["metric"] = metric
-		sample_rule["return_statement"] = 0
-		sample_rule["tree_id"] = tree_id
+    daemon = property(_get_daemon, _set_daemon)
 
-		#json to string
-		sample_rule = json.dumps(sample_rule)
 
-		custom_rules.append(sample_rule)
+class NoDaemonContext(type(multiprocessing.get_context())):
+    # pylint: disable=too-few-public-methods
+    Process = NoDaemonProcess
 
-	#-----------------------------------------------
 
-	if terminateBuilding == True: #check decision is made
+class MyPool(multiprocessing.pool.Pool):
+    # pylint: disable=too-few-public-methods, abstract-method, super-with-arguments
+    def __init__(self, *args, **kwargs):
+        kwargs["context"] = NoDaemonContext()
+        super(MyPool, self).__init__(*args, **kwargs)
 
-		parents = copy.copy(leaf_id)
-		leaf_id = str(uuid.uuid1())
 
-		decision_rule = "return "+charForResp+str(final_decision)+charForResp
+# ----------------------------------------
+def calculateEntropy(df, config):
+    algorithm = config["algorithm"]
 
-		if enableParallelism != True:
-			#serial
-			functions.storeRule(file,(functions.formatRule(root+1),decision_rule))
-		else:
-			#parallel
-			sample_rule = {}
-			sample_rule["current_level"] = root+1
-			sample_rule["leaf_id"] = leaf_id
-			sample_rule["parents"] = parents
-			sample_rule["rule"] = decision_rule
-			sample_rule["feature_idx"] = winner_index
-			sample_rule["feature_name"] = winner_name
-			sample_rule["instances"] = num_of_instances
-			sample_rule["metric"] = 0
-			sample_rule["return_statement"] = 1
-			sample_rule["tree_id"] = tree_id
+    # --------------------------
 
-			#json to string
-			sample_rule = json.dumps(sample_rule)
+    if algorithm == "Regression":
+        return 0
 
-			custom_rules.append(sample_rule)
+    logger.debug(df)
 
-	else: #decision is not made, continue to create branch and leafs
-		root = root + 1 #the following rule will be included by this rule. increase root
-		parents = copy.copy(leaf_id)
-
-		results = buildDecisionTree(subdataset, root, file, config, dataset_features
-			, root-1, leaf_id, parents, tree_id = tree_id, main_process_id = main_process_id)
+    instances = df.shape[0]
 
-		custom_rules = custom_rules + results
-
-		root = tmp_root * 1
-		parents = copy.copy(parents_raw)
+    decisions = df["Decision"].value_counts().keys().tolist()
 
-	gc.collect()
-
-	return custom_rules
-
-def buildDecisionTree(df, root, file, config, dataset_features, parent_level = 0, leaf_id = 0, parents = 'root', tree_id = 0, validation_df = None, main_process_id = None):
+    entropy = 0
 
-	models = []
+    for i, decision in enumerate(decisions):
+        num_of_decisions = df["Decision"].value_counts().tolist()[i]
+        logger.debug(f"{decision} -> {num_of_decisions}")
 
-	decision_rules = []
+        class_probability = num_of_decisions / instances
 
-	feature_names = df.columns[0:-1]
+        entropy = entropy - class_probability * math.log(class_probability, 2)
 
-	enableParallelism = config['enableParallelism']
-	algorithm = config['algorithm']
+    return entropy
 
-	json_file = file.split(".")[0]+".json"
 
-	random_forest_enabled = config['enableRandomForest']
-	enableGBM = config['enableGBM']
-	enableAdaboost = config['enableAdaboost']
+def findDecision(df, config):
+    # information gain for id3, gain ratio for c4.5, gini for cart,
+    # chi square for chaid and std for regression
+    algorithm = config["algorithm"]
+
+    resp_obj = findGains(df, config)
+    gains = list(resp_obj["gains"].values())
+    entropy = resp_obj["entropy"]
+
+    if algorithm == "ID3":
+        winner_index = gains.index(max(gains))
+        metric_value = entropy
+        metric_name = "Entropy"
+    elif algorithm == "C4.5":
+        winner_index = gains.index(max(gains))
+        metric_value = entropy
+        metric_name = "Entropy"
+    elif algorithm == "CART":
+        winner_index = gains.index(min(gains))
+        metric_value = min(gains)
+        metric_name = "Gini"
+    elif algorithm == "CHAID":
+        winner_index = gains.index(max(gains))
+        metric_value = max(gains)
+        metric_name = "ChiSquared"
+    elif algorithm == "Regression":
+        winner_index = gains.index(max(gains))
+        metric_value = max(gains)
+        metric_name = "Std"
+
+    winner_name = df.columns[winner_index]
+
+    return winner_name, df.shape[0], metric_value, metric_name
 
-	if root == 1:
-		if random_forest_enabled != True and enableGBM != True and enableAdaboost != True:
-			raw_df = df.copy()
 
-	#--------------------------------------
+def findGains(df, config):
+    algorithm = config["algorithm"]
+    decision_classes = df["Decision"].unique()
 
-	df_copy = df.copy()
+    # -----------------------------
 
-	winner_name, num_of_instances, metric, metric_name = findDecision(df, config)
+    entropy = 0
 
-	#find winner index, this cannot be returned by find decision because columns dropped in previous steps
-	j = 0
-	for i in dataset_features:
-		if i == winner_name:
-			winner_index = j
-		j = j + 1
+    if algorithm in ["ID3", "C4.5"]:
+        entropy = calculateEntropy(df, config)
 
-	numericColumn = False
-	if dataset_features[winner_name] != 'object':
-		numericColumn = True
+    columns = df.shape[1]
+    instances = df.shape[0]
 
-	#restoration
-	columns = df.shape[1]
-	for i in range(0, columns-1):
-		#column_name = df.columns[i]; column_type = df[column_name].dtypes #numeric field already transformed to object. you cannot check it with df itself, you should check df_copy
-		column_name = df_copy.columns[i]; column_type = df_copy[column_name].dtypes
-		if column_type != 'object' and column_name != winner_name:
-			df[column_name] = df_copy[column_name]
+    gains = []
 
-	classes = df[winner_name].value_counts().keys().tolist()
-	#print("classes: ",classes," in ", winner_name)
-	#-----------------------------------------------------
+    for i in range(0, columns - 1):
+        column_name = df.columns[i]
+        column_type = df[column_name].dtypes
 
-	num_cores = config["num_cores"]
+        logger.debug(f"{column_name} -> {column_type}")
 
-	input_params = []
+        if column_type != "object":
+            df = Preprocess.processContinuousFeatures(algorithm, df, column_name, entropy, config)
 
-	#serial approach
-	for i in range(0,len(classes)):
-		current_class = classes[i]
-		subdataset = df[df[winner_name] == current_class]
-		subdataset = subdataset.drop(columns=[winner_name])
-		branch_index = i * 1
+        classes = df[column_name].value_counts()
 
-		#create branches serially
-		if enableParallelism != True:
+        splitinfo = 0
+        if algorithm in ["ID3", "C4.5"]:
+            gain = entropy * 1
+        else:
+            gain = 0
 
-			if i == 0:
+        for j in range(0, len(classes)):
+            current_class = classes.keys().tolist()[j]
+            logger.debug(f"{column_name} -> {current_class}")
 
-				descriptor = {
-					"feature": winner_name,
-					"instances": num_of_instances,
-					#"metric_name": metric_name,
-					"metric_value": round(metric, 4),
-					"depth": parent_level + 1
-				}
-				descriptor = "# "+json.dumps(descriptor)
+            subdataset = df[df[column_name] == current_class]
+            logger.debug(subdataset)
 
-				functions.storeRule(file, (functions.formatRule(root), "", descriptor))
+            subset_instances = subdataset.shape[0]
+            class_probability = subset_instances / instances
 
-			results = createBranch(config, current_class, subdataset, numericColumn, branch_index
-				, winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric, tree_id = tree_id, main_process_id = main_process_id)
+            if algorithm in ["ID3", "C4.5"]:
+                subset_entropy = calculateEntropy(subdataset, config)
+                gain = gain - class_probability * subset_entropy
 
-			decision_rules = decision_rules + results
+            if algorithm == "C4.5":
+                splitinfo = splitinfo - class_probability * math.log(class_probability, 2)
 
-		else:
-			input_params.append((config, current_class, subdataset, numericColumn, branch_index
-				, winner_name, winner_index, root, parents, file, dataset_features, num_of_instances, metric, tree_id, main_process_id))
+            elif algorithm == "CART":  # GINI index
+                decision_list = subdataset["Decision"].value_counts().tolist()
 
-	#---------------------------
-	#add else condition in the decision tree
+                subgini = 1
 
-	if df.Decision.dtypes == 'object': #classification
-		pivot = pd.DataFrame(subdataset.Decision.value_counts()).reset_index()
-		pivot = pivot.rename(columns = {"Decision": "Instances","index": "Decision"})
-		pivot = pivot.sort_values(by = ["Instances"], ascending = False).reset_index()
+                for current_decision in decision_list:
+                    subgini = subgini - math.pow((current_decision / subset_instances), 2)
 
-		else_decision = "return '%s'" % (pivot.iloc[0].Decision)
+                gain = gain + (subset_instances / instances) * subgini
 
-		if enableParallelism != True:
-			functions.storeRule(file,(functions.formatRule(root), "else:"))
-			functions.storeRule(file,(functions.formatRule(root+1), else_decision))
-		else: #parallelism
-			leaf_id = str(uuid.uuid1())
+            elif algorithm == "CHAID":
+                num_of_decisions = len(decision_classes)
 
-			check_rule = "else: "+else_decision
+                expected = subset_instances / num_of_decisions
 
-			sample_rule = {}
-			sample_rule["current_level"] = root
-			sample_rule["leaf_id"] = leaf_id
-			sample_rule["parents"] = parents
-			sample_rule["rule"] = check_rule
-			sample_rule["feature_idx"] = -1
-			sample_rule["feature_name"] = ""
-			sample_rule["instances"] = df.shape[0]
-			sample_rule["metric"] = 0
-			sample_rule["return_statement"] = 0
-			sample_rule["tree_id"] = tree_id
+                for d in decision_classes:
+                    num_of_d = subdataset[subdataset["Decision"] == d].shape[0]
 
-			#json to string
-			sample_rule = json.dumps(sample_rule)
-			decision_rules.append(sample_rule)
+                    chi_square_of_d = math.sqrt(
+                        ((num_of_d - expected) * (num_of_d - expected)) / expected
+                    )
 
-	else: #regression
-		else_decision = "return %s" % (subdataset.Decision.mean())
+                    gain += chi_square_of_d
 
-		if enableParallelism != True:
-			functions.storeRule(file,(functions.formatRule(root), "else:"))
-			functions.storeRule(file,(functions.formatRule(root+1), else_decision))
-		else:
-			leaf_id = str(uuid.uuid1())
+            elif algorithm == "Regression":
+                subset_stdev = subdataset["Decision"].std(ddof=0)
+                gain = gain + (subset_instances / instances) * subset_stdev
 
-			check_rule = "else: "+else_decision
+        # iterating over classes for loop end
+        # -------------------------------
 
-			sample_rule = {}
-			sample_rule["current_level"] = root
-			sample_rule["leaf_id"] = leaf_id
-			sample_rule["parents"] = parents
-			sample_rule["rule"] = check_rule
-			sample_rule["tree_id"] = tree_id
-			sample_rule["feature_name"] = ""
-			sample_rule["instances"] = 0
-			sample_rule["metric"] = 0
-			sample_rule["return_statement"] = 1
+        if algorithm == "Regression":
+            stdev = df["Decision"].std(ddof=0)
+            gain = stdev - gain
+        if algorithm == "C4.5":
+            if splitinfo == 0:
+                splitinfo = 100
+                # this can be if data set consists of 2 rows and current column consists
+                # of 1 class. still decision can be made (decisions for these 2 rows same).
+                # set splitinfo to very large value to make gain ratio very small.
+                # in this way, we won't find this column as the most dominant one.
+            gain = gain / splitinfo
 
-			#json to string
-			sample_rule = json.dumps(sample_rule)
-			decision_rules.append(sample_rule)
+        # ----------------------------------
 
-	#---------------------------
+        gains.append(gain)
 
-	try:
-		main_process = psutil.Process(main_process_id)
-		children = main_process.children(recursive=True)
-		active_processes = len(children) + 1 #plus parent
-		#active_processes = len(children)
-	except:
-		active_processes = 100 #set a large initial value
+    # -------------------------------------------------
 
-	results = []
-	#create branches in parallel
-	if enableParallelism == True:
+    resp_obj = {}
+    resp_obj["gains"] = {}
 
-		required_threads = active_processes + len(classes)
+    for idx, feature in enumerate(df.columns[0:-1]):  # Decision is always the last column
+        logger.debug(f"{idx}, {feature}")
+        resp_obj["gains"][feature] = gains[idx]
 
-		#if parent_level == 0 and random_forest_enabled != True:
-		if main_process_id != None and num_cores >= required_threads: #len(classes) branches will be run in parallel
+    resp_obj["entropy"] = entropy
 
-			#POOL_SIZE = num_cores
-			POOL_SIZE = len(classes)
+    return resp_obj
 
-			#with closing(multiprocessing.Pool(POOL_SIZE)) as pool:
-			with closing(MyPool(POOL_SIZE)) as pool:
-				funclist = []
 
-				for input_param in input_params:
-					f = pool.apply_async(createBranchWrapper, [createBranch, input_param])
-					funclist.append(f)
+def createBranchWrapper(func, args):
+    return func(*args)
+
+
+def createBranch(
+    config,
+    current_class,
+    subdataset,
+    numericColumn,
+    branch_index,
+    winner_name,
+    winner_index,
+    root,
+    parents,
+    file,
+    dataset_features,
+    num_of_instances,
+    metric,
+    tree_id=0,
+    main_process_id=None,
+):
+    custom_rules = []
+
+    algorithm = config["algorithm"]
+    enableAdaboost = config["enableAdaboost"]
+    enableGBM = config["enableGBM"]
+    max_depth = config["max_depth"]
+    enableParallelism = config["enableParallelism"]
+
+    charForResp = "'"
+    if algorithm == "Regression":
+        charForResp = ""
+
+    # ---------------------------
+
+    tmp_root = root * 1
+    parents_raw = copy.copy(parents)
+
+    # ---------------------------
+
+    if numericColumn == True:
+        compareTo = current_class  # current class might be <=x or >x in this case
+    else:
+        compareTo = " == '" + str(current_class) + "'"
+
+    terminateBuilding = False
+
+    # -----------------------------------------------
+    # can decision be made?
+
+    if enableGBM == True and root >= max_depth:  # max depth
+        final_decision = subdataset["Decision"].mean()
+        terminateBuilding = True
+    elif enableAdaboost == True:
+        # final_decision = subdataset['Decision'].value_counts().idxmax()
+        final_decision = functions.sign(subdataset["Decision"].mean())  # get average
+        terminateBuilding = True
+        enableParallelism = False
+    elif len(subdataset["Decision"].value_counts().tolist()) == 1:
+        final_decision = (
+            subdataset["Decision"].value_counts().keys().tolist()[0]
+        )  # all items are equal in this case
+        terminateBuilding = True
+    elif subdataset.shape[1] == 1:  # if decision cannot be made even though all columns dropped
+        final_decision = subdataset["Decision"].value_counts().idxmax()  # get the most frequent one
+        terminateBuilding = True
+    elif algorithm == "Regression" and (
+        subdataset.shape[0] < 5 or root >= max_depth
+    ):  # pruning condition
+        final_decision = subdataset["Decision"].mean()  # get average
+        terminateBuilding = True
+    elif algorithm in ["ID3", "C4.5", "CART", "CHAID"] and root >= max_depth:
+        final_decision = subdataset["Decision"].value_counts().idxmax()  # get the most frequent one
+        terminateBuilding = True
+
+    # -----------------------------------------------
+
+    if enableParallelism == True:
+        check_condition = "if"  # TODO: elif checks might be above than if statements in parallel
+    else:
+        if branch_index == 0:
+            check_condition = "if"
+        else:
+            check_condition = "elif"
+
+    check_rule = check_condition + " obj[" + str(winner_index) + "]" + compareTo + ":"
+
+    leaf_id = str(uuid.uuid1())
+
+    if enableParallelism != True:
+        functions.storeRule(file, (functions.formatRule(root), "", check_rule))
+    else:
+        sample_rule = {}
+        sample_rule["current_level"] = root
+        sample_rule["leaf_id"] = leaf_id
+        sample_rule["parents"] = parents
+        sample_rule["rule"] = check_rule
+        sample_rule["feature_idx"] = winner_index
+        sample_rule["feature_name"] = winner_name
+        sample_rule["instances"] = num_of_instances
+        sample_rule["metric"] = metric
+        sample_rule["return_statement"] = 0
+        sample_rule["tree_id"] = tree_id
+
+        # json to string
+        sample_rule = json.dumps(sample_rule)
+
+        custom_rules.append(sample_rule)
+
+    # -----------------------------------------------
+
+    if terminateBuilding == True:  # check decision is made
+        parents = copy.copy(leaf_id)
+        leaf_id = str(uuid.uuid1())
+
+        decision_rule = "return " + charForResp + str(final_decision) + charForResp
+
+        if enableParallelism != True:
+            # serial
+            functions.storeRule(file, (functions.formatRule(root + 1), decision_rule))
+        else:
+            # parallel
+            sample_rule = {}
+            sample_rule["current_level"] = root + 1
+            sample_rule["leaf_id"] = leaf_id
+            sample_rule["parents"] = parents
+            sample_rule["rule"] = decision_rule
+            sample_rule["feature_idx"] = winner_index
+            sample_rule["feature_name"] = winner_name
+            sample_rule["instances"] = num_of_instances
+            sample_rule["metric"] = 0
+            sample_rule["return_statement"] = 1
+            sample_rule["tree_id"] = tree_id
+
+            # json to string
+            sample_rule = json.dumps(sample_rule)
+
+            custom_rules.append(sample_rule)
+
+    else:  # decision is not made, continue to create branch and leafs
+        root = root + 1  # the following rule will be included by this rule. increase root
+        parents = copy.copy(leaf_id)
+
+        results = buildDecisionTree(
+            subdataset,
+            root,
+            file,
+            config,
+            dataset_features,
+            root - 1,
+            leaf_id,
+            parents,
+            tree_id=tree_id,
+            main_process_id=main_process_id,
+        )
+
+        custom_rules = custom_rules + results
+
+        root = tmp_root * 1
+        parents = copy.copy(parents_raw)
+
+    gc.collect()
+
+    return custom_rules
+
+
+def buildDecisionTree(
+    df,
+    root,
+    file,
+    config,
+    dataset_features,
+    parent_level=0,
+    leaf_id=0,
+    parents="root",
+    tree_id=0,
+    validation_df=None,
+    main_process_id=None,
+):
+    models = []
+
+    decision_rules = []
+
+    feature_names = df.columns[0:-1]
+
+    enableParallelism = config["enableParallelism"]
+
+    json_file = file.split(".")[0] + ".json"
+
+    # --------------------------------------
+
+    df_copy = df.copy()
+
+    winner_name, num_of_instances, metric, _ = findDecision(df, config)
+
+    # find winner index, this cannot be returned by find decision
+    # because columns dropped in previous steps
+    j = 0
+    for i in dataset_features:
+        if i == winner_name:
+            winner_index = j
+        j = j + 1
+
+    numericColumn = False
+    if dataset_features[winner_name] != "object":
+        numericColumn = True
+
+    # restoration
+    columns = df.shape[1]
+    for i in range(0, columns - 1):
+        column_name = df_copy.columns[i]
+        column_type = df_copy[column_name].dtypes
+        if column_type != "object" and column_name != winner_name:
+            df[column_name] = df_copy[column_name]
+
+    classes = df[winner_name].value_counts().keys().tolist()
+    logger.debug(f"classes: {classes} in {winner_name}")
+    # -----------------------------------------------------
+
+    num_cores = config["num_cores"]
+
+    input_params = []
+
+    # serial approach
+    for i, current_class in enumerate(classes):
+        subdataset = df[df[winner_name] == current_class]
+        subdataset = subdataset.drop(columns=[winner_name])
+        branch_index = i * 1
+
+        # create branches serially
+        if enableParallelism != True:
+            if i == 0:
+                descriptor = {
+                    "feature": winner_name,
+                    "instances": num_of_instances,
+                    # "metric_name": metric_name,
+                    "metric_value": round(metric, 4),
+                    "depth": parent_level + 1,
+                }
+                descriptor = "# " + json.dumps(descriptor)
+
+                functions.storeRule(file, (functions.formatRule(root), "", descriptor))
+
+            results = createBranch(
+                config,
+                current_class,
+                subdataset,
+                numericColumn,
+                branch_index,
+                winner_name,
+                winner_index,
+                root,
+                parents,
+                file,
+                dataset_features,
+                num_of_instances,
+                metric,
+                tree_id=tree_id,
+                main_process_id=main_process_id,
+            )
+
+            decision_rules = decision_rules + results
+
+        else:
+            input_params.append(
+                (
+                    config,
+                    current_class,
+                    subdataset,
+                    numericColumn,
+                    branch_index,
+                    winner_name,
+                    winner_index,
+                    root,
+                    parents,
+                    file,
+                    dataset_features,
+                    num_of_instances,
+                    metric,
+                    tree_id,
+                    main_process_id,
+                )
+            )
+
+    # ---------------------------
+    # add else condition in the decision tree
+
+    if df.Decision.dtypes == "object":  # classification
+        pivot = pd.DataFrame(subdataset.Decision.value_counts()).reset_index()
+        pivot = pivot.rename(columns={"Decision": "Instances", "index": "Decision"})
+        pivot = pivot.sort_values(by=["Instances"], ascending=False).reset_index()
+
+        else_decision = f"return '{pivot.iloc[0].Decision}'"
+
+        if enableParallelism != True:
+            functions.storeRule(file, (functions.formatRule(root), "else:"))
+            functions.storeRule(file, (functions.formatRule(root + 1), else_decision))
+        else:  # parallelism
+            leaf_id = str(uuid.uuid1())
+
+            check_rule = "else: " + else_decision
+
+            sample_rule = {}
+            sample_rule["current_level"] = root
+            sample_rule["leaf_id"] = leaf_id
+            sample_rule["parents"] = parents
+            sample_rule["rule"] = check_rule
+            sample_rule["feature_idx"] = -1
+            sample_rule["feature_name"] = ""
+            sample_rule["instances"] = df.shape[0]
+            sample_rule["metric"] = 0
+            sample_rule["return_statement"] = 0
+            sample_rule["tree_id"] = tree_id
+
+            # json to string
+            sample_rule = json.dumps(sample_rule)
+            decision_rules.append(sample_rule)
+
+    else:  # regression
+        else_decision = f"return {subdataset.Decision.mean()}"
+
+        if enableParallelism != True:
+            functions.storeRule(file, (functions.formatRule(root), "else:"))
+            functions.storeRule(file, (functions.formatRule(root + 1), else_decision))
+        else:
+            leaf_id = str(uuid.uuid1())
+
+            check_rule = "else: " + else_decision
+
+            sample_rule = {}
+            sample_rule["current_level"] = root
+            sample_rule["leaf_id"] = leaf_id
+            sample_rule["parents"] = parents
+            sample_rule["rule"] = check_rule
+            sample_rule["tree_id"] = tree_id
+            sample_rule["feature_name"] = ""
+            sample_rule["instances"] = 0
+            sample_rule["metric"] = 0
+            sample_rule["return_statement"] = 1
+
+            # json to string
+            sample_rule = json.dumps(sample_rule)
+            decision_rules.append(sample_rule)
+
+    # ---------------------------
+
+    try:
+        main_process = psutil.Process(main_process_id)
+        children = main_process.children(recursive=True)
+        active_processes = len(children) + 1  # plus parent
+        # active_processes = len(children)
+    except:
+        active_processes = 100  # set a large initial value
+
+    results = []
+    # create branches in parallel
+    if enableParallelism == True:
+        required_threads = active_processes + len(classes)
 
-				#all functions registered here
+        # if parent_level == 0 and random_forest_enabled != True:
+        if (
+            main_process_id != None and num_cores >= required_threads
+        ):  # len(classes) branches will be run in parallel
+            # POOL_SIZE = num_cores
+            POOL_SIZE = len(classes)
+
+            # with closing(multiprocessing.Pool(POOL_SIZE)) as pool:
+            with closing(MyPool(POOL_SIZE)) as pool:
+                funclist = []
 
-				for f in funclist:
-					branch_results = f.get(timeout = 100000)
+                for input_param in input_params:
+                    f = pool.apply_async(createBranchWrapper, [createBranch, input_param])
+                    funclist.append(f)
 
-					for branch_result in branch_results:
-						results.append(branch_result)
+                # all functions registered here
 
-				pool.close()
-				pool.terminate()
+                for f in funclist:
+                    branch_results = f.get(timeout=100000)
 
-			#--------------------------------
+                    for branch_result in branch_results:
+                        results.append(branch_result)
 
-		else: #serial
-			for input_param in input_params:
-				sub_results = createBranchWrapper(createBranch, input_param)
-				for sub_result in sub_results:
-					results.append(sub_result)
+                pool.close()
+                pool.terminate()
 
-		#--------------------------------
+            # --------------------------------
 
-		decision_rules = decision_rules + results
+        else:  # serial
+            for input_param in input_params:
+                sub_results = createBranchWrapper(createBranch, input_param)
+                for sub_result in sub_results:
+                    results.append(sub_result)
 
-		#--------------------------------
+        # --------------------------------
 
-		if root != 1: #return children results until the root node
-			return decision_rules
+        decision_rules = decision_rules + results
 
-	#---------------------------------------------
+        # --------------------------------
 
-	if root == 1:
+        if root != 1:  # return children results until the root node
+            return decision_rules
 
-		if enableParallelism == True:
+    # ---------------------------------------------
 
-			#custom rules are stored in decision_rules. merge them all in a json file first
+    if root == 1:
+        if enableParallelism == True:
+            # custom rules are stored in decision_rules. merge them all in a json file first
 
-			json_rules = "[\n" #initialize
+            json_rules = "[\n"  # initialize
 
-			file_index = 0
-			for custom_rule in decision_rules:
+            file_index = 0
+            for custom_rule in decision_rules:
+                json_rules += custom_rule
 
-				json_rules += custom_rule
+                if file_index < len(decision_rules) - 1:
+                    json_rules += ", "
 
-				if file_index < len(decision_rules) - 1:
-					json_rules += ", "
+                json_rules += "\n"
 
-				json_rules += "\n"
+                file_index = file_index + 1
 
-				file_index = file_index + 1
+            # -----------------------------------
 
-			#-----------------------------------
+            json_rules += "]"
+            functions.createFile(json_file, json_rules)
 
-			json_rules += "]"
-			functions.createFile(json_file, json_rules)
+            # -----------------------------------
+            # reconstruct rules from json to py
 
-			#-----------------------------------
-			#reconstruct rules from json to py
+            reconstructRules(json_file, feature_names)
 
-			reconstructRules(json_file, feature_names)
+            # -----------------------------------
 
-			#-----------------------------------
+        # is regular decision tree
+        if (
+            config["enableRandomForest"] != True
+            and config["enableGBM"] != True
+            and config["enableAdaboost"] != True
+        ):
+            # this is reguler decision tree. find accuracy here.
 
-		#is regular decision tree
-		if config['enableRandomForest'] != True and config['enableGBM'] != True and config['enableAdaboost'] != True:
-		#this is reguler decision tree. find accuracy here.
+            moduleName = "outputs/rules/rules"
+            fp, pathname, description = imp.find_module(moduleName)
+            myrules = imp.load_module(moduleName, fp, pathname, description)  # rules0
+            models.append(myrules)
 
-			moduleName = "outputs/rules/rules"
-			fp, pathname, description = imp.find_module(moduleName)
-			myrules = imp.load_module(moduleName, fp, pathname, description) #rules0
-			models.append(myrules)
+    return models
 
-	return models
 
 def findPrediction(row):
-	params = []
-	num_of_features = row.shape[0] - 1
-	for j in range(0, num_of_features):
-		params.append(row[j])
-
-	moduleName = "outputs/rules/rules"
-	fp, pathname, description = imp.find_module(moduleName)
-	myrules = imp.load_module(moduleName, fp, pathname, description) #rules0
-
-	prediction = myrules.findDecision(params)
-	return prediction
-
-"""
-If you set parelellisim True, then branches will be created parallel. Rules are stored in a json file randomly. This program reconstructs built rules in a tree form. In this way, we can build decision trees faster.
-"""
-
-def reconstructRules(source, feature_names, tree_id = 0):
+    params = []
+    num_of_features = row.shape[0] - 1
+    for j in range(0, num_of_features):
+        params.append(row[j])
 
-	#print("Reconstructing ",source)
+    moduleName = "outputs/rules/rules"
+    fp, pathname, description = imp.find_module(moduleName)
+    myrules = imp.load_module(moduleName, fp, pathname, description)  # rules0
+
+    prediction = myrules.findDecision(params)
+    return prediction
 
-	file_name = source.split(".json")[0]
-	file_name = file_name+".py"
 
-	#-----------------------------------
+# If you set parelellisim True, then branches will be created parallel. Rules are stored in a
+# json file randomly. This program reconstructs built rules in a tree form.
+# In this way, we can build decision trees faster.
 
-	constructor = "def findDecision(obj): #"
-	idx = 0
-	for feature in feature_names:
-		constructor = constructor + "obj["+str(idx)+"]: "+feature
 
-		if idx < len(feature_names) - 1:
-			constructor = constructor+", "
-		idx = idx + 1
+def reconstructRules(source, feature_names, tree_id=0):
+    logger.debug(f"Reconstructing {source}")
 
-	functions.createFile(file_name, constructor+"\n")
+    file_name = source.split(".json")[0]
+    file_name = file_name + ".py"
 
-	#-----------------------------------
+    # -----------------------------------
 
-	with open(source, 'r') as f:
-		rules = json.load(f)
+    constructor = "def findDecision(obj): #"
+    idx = 0
+    for feature in feature_names:
+        constructor = constructor + "obj[" + str(idx) + "]: " + feature
 
-	#print(rules)
+        if idx < len(feature_names) - 1:
+            constructor = constructor + ", "
+        idx = idx + 1
 
-	def padleft(rule, level):
-		for i in range(0, level):
-			rule = "\t"+rule
-		return rule
+    functions.createFile(file_name, constructor + "\n")
 
-	#print("def findDecision(obj):")
+    # -----------------------------------
 
-	max_level = 0
+    with open(source, "r", encoding="UTF-8") as f:
+        rules = json.load(f)
 
-	rule_set = []
-	#json file might not store rules respectively
-	for instance in rules:
-		if len(instance) > 0:
-			rule = []
-			rule.append(instance["current_level"])
-			rule.append(instance["leaf_id"])
-			rule.append(instance["parents"])
-			rule.append(instance["rule"])
-			rule.append(instance["feature_name"])
-			rule.append(instance["instances"])
-			rule.append(instance["metric"])
-			rule.append(instance["return_statement"])
-			rule_set.append(rule)
-			#print(padleft(instance["rule"], instance["current_level"]))
+    logger.debug(rules)
 
-	df = np.array(rule_set)
+    def padleft(rule, level):
+        for _ in range(0, level):
+            rule = "\t" + rule
+        return rule
 
-	def extractRules(df, parent = 'root', level=1):
+    logger.debug("def findDecision(obj):")
 
-		level_raw = level * 1; parent_raw = copy.copy(parent)
+    rule_set = []
+    # json file might not store rules respectively
+    for instance in rules:
+        if len(instance) > 0:
+            rule = []
+            rule.append(instance["current_level"])
+            rule.append(instance["leaf_id"])
+            rule.append(instance["parents"])
+            rule.append(instance["rule"])
+            rule.append(instance["feature_name"])
+            rule.append(instance["instances"])
+            rule.append(instance["metric"])
+            rule.append(instance["return_statement"])
+            rule_set.append(rule)
+            logger.debug(padleft(instance["rule"], instance["current_level"]))
 
-		else_rule = ""
+    df = np.array(rule_set)
 
-		leaf_idx = 0
-		for i in range(0 ,df.shape[0]):
-			current_level = int(df[i][0])
-			leaf_id = df[i][1]
-			parent_id = df[i][2]
-			rule = df[i][3]
-			feature_name = df[i][4]
-			instances = int(df[i][5])
-			metric = float(df[i][6])
-			return_statement = int(df[i][7])
+    def extractRules(df, parent="root", level=1):
+        level_raw = level * 1
+        parent_raw = copy.copy(parent)
 
-			if parent_id == parent:
+        else_rule = ""
 
-				if_statement = False
-				if rule[0:2] == "if":
-					if_statement = True
+        leaf_idx = 0
+        for i in range(0, df.shape[0]):
+            current_level = int(df[i][0])
+            leaf_id = df[i][1]
+            parent_id = df[i][2]
+            rule = df[i][3]
+            feature_name = df[i][4]
+            instances = int(df[i][5])
+            metric = float(df[i][6])
+            return_statement = int(df[i][7])
 
-				else_statement = False
-				if rule[0:5] == "else:":
-					else_statement = True
-					else_rule = rule
+            if parent_id == parent:
+                if_statement = False
+                if rule[0:2] == "if":
+                    if_statement = True
 
-				#------------------------
+                else_statement = False
+                if rule[0:5] == "else:":
+                    else_statement = True
+                    else_rule = rule
 
-				if else_statement != True:
+                # ------------------------
 
-					if if_statement == True and leaf_idx > 0:
-						rule = "el"+rule
+                if else_statement != True:
+                    if if_statement == True and leaf_idx > 0:
+                        rule = "el" + rule
 
-					#print(padleft(rule, level), "(", leaf_idx,")")
+                    logger.debug(f"{padleft(rule, level)} ({leaf_idx})")
 
-					if leaf_idx == 0 and return_statement == 0:
-						explainer = {}
-						explainer["feature"] = feature_name
-						explainer["instances"] = instances
-						explainer["metric_value"] = round(metric, 4)
-						explainer["depth"] = current_level
-						explainer = "# "+json.dumps(explainer)
-						functions.storeRule(file_name, padleft(explainer, level))
+                    if leaf_idx == 0 and return_statement == 0:
+                        explainer = {}
+                        explainer["feature"] = feature_name
+                        explainer["instances"] = instances
+                        explainer["metric_value"] = round(metric, 4)
+                        explainer["depth"] = current_level
+                        explainer = "# " + json.dumps(explainer)
+                        functions.storeRule(file_name, padleft(explainer, level))
 
-					functions.storeRule(file_name, padleft(rule, level))
+                    functions.storeRule(file_name, padleft(rule, level))
 
-					level = level + 1; parent = copy.copy(leaf_id)
-					extractRules(df, parent, level)
-					level = level_raw * 1; parent = copy.copy(parent_raw) #restore
+                    level = level + 1
+                    parent = copy.copy(leaf_id)
+                    extractRules(df, parent, level)
+                    level = level_raw * 1
+                    parent = copy.copy(parent_raw)  # restore
 
-					leaf_idx = leaf_idx + 1
+                    leaf_idx = leaf_idx + 1
 
-		#add else statement
+        # add else statement
 
-		if else_rule != "":
-			#print(padleft(else_rule, level))
-			functions.storeRule(file_name, padleft(else_rule, level))
+        if else_rule != "":
+            logger.debug(padleft(else_rule, level))
+            functions.storeRule(file_name, padleft(else_rule, level))
 
-	#------------------------------------
+    # ------------------------------------
 
-	extractRules(df)
+    extractRules(df)
 
-	#------------------------------------
+    # ------------------------------------
diff --git a/chefboost/tuning/adaboost.py b/chefboost/tuning/adaboost.py
index 297fa7c..87b0dd3 100644
--- a/chefboost/tuning/adaboost.py
+++ b/chefboost/tuning/adaboost.py
@@ -1,136 +1,165 @@
+import imp  # pylint: disable=deprecated-module
+import math
+
 import pandas as pd
 import numpy as np
+from tqdm import tqdm
 
-from chefboost.commons import functions, evaluate
+from chefboost.commons import functions
 from chefboost.training import Training
-from chefboost import Chefboost as cb
+from chefboost.commons.logger import Logger
 
-import imp
-import math
+# pylint: disable=unused-argument
+
+logger = Logger(module="chefboost/tuning/adaboost.py")
 
-from tqdm import tqdm
 
 def findPrediction(row):
-	epoch = row['Epoch']
-	row = row.drop(labels=['Epoch'])
-	columns = row.shape[0]
-	
-	params = []
-	for j in range(0, columns-1):
-		params.append(row[j])
-		
-	moduleName = "outputs/rules/rules_%d" % (epoch)
-	fp, pathname, description = imp.find_module(moduleName)
-	myrules = imp.load_module(moduleName, fp, pathname, description)
-	
-	prediction = functions.sign(myrules.findDecision(params))
-	
-	return prediction
-
-def apply(df, config, header, dataset_features, validation_df = None, process_id = None):
-
-	models = []; alphas = []
-	
-	initializeAlphaFile()
-	
-	num_of_weak_classifier = config['num_of_weak_classifier']
-	
-	#------------------------
-	
-	rows = df.shape[0]; columns = df.shape[1]
-	final_predictions = pd.DataFrame(np.zeros([rows, 1]), columns=['prediction'])
-	
-	worksheet = df.copy()
-	worksheet['Weight'] = 1 / rows #uniform distribution initially
-	
-	final_predictions = pd.DataFrame(np.zeros((df.shape[0], 2)), columns = ['Prediction', 'Actual'])
-	final_predictions['Actual'] = df['Decision']
-	
-	best_epoch_idx = 0; best_epoch_value = 1000000
-	
-	#for i in range(0, num_of_weak_classifier):
-	pbar = tqdm(range(0, num_of_weak_classifier), desc='Adaboosting')
-	for i in pbar:
-		worksheet['Decision'] = worksheet['Weight'] * worksheet['Decision']
-		
-		root = 1
-		file = "outputs/rules/rules_"+str(i)+".py"
-		
-		functions.createFile(file, header)
-		
-		#print(worksheet)
-		Training.buildDecisionTree(worksheet.drop(columns=['Weight'])
-			, root, file, config, dataset_features
-			, parent_level = 0, leaf_id = 0, parents = 'root', main_process_id = process_id)
-		
-		#---------------------------------------
-		
-		moduleName = "outputs/rules/rules_"+str(i)
-		fp, pathname, description = imp.find_module(moduleName)
-		myrules = imp.load_module(moduleName, fp, pathname, description)
-		models.append(myrules)
-		
-		#---------------------------------------
-		
-		df['Epoch'] = i
-		worksheet['Prediction'] = df.apply(findPrediction, axis=1)
-		df = df.drop(columns = ['Epoch'])
-		
-		#---------------------------------------
-		worksheet['Actual'] = df['Decision']
-		worksheet['Loss'] = abs(worksheet['Actual'] - worksheet['Prediction'])/2
-		worksheet['Weight_Times_Loss'] = worksheet['Loss'] * worksheet['Weight']
-		
-		epsilon = worksheet['Weight_Times_Loss'].sum()
-		alpha = math.log((1 - epsilon)/epsilon)/2 #use alpha to update weights in the next round
-		alphas.append(alpha)
-		
-		#-----------------------------
-		
-		#store alpha
-		addEpochAlpha(i, alpha)
-		
-		#-----------------------------
-		
-		worksheet['Alpha'] = alpha
-		worksheet['New_Weights'] = worksheet['Weight'] * (-alpha * worksheet['Actual'] * worksheet['Prediction']).apply(math.exp)
-		
-		#normalize
-		worksheet['New_Weights'] = worksheet['New_Weights'] / worksheet['New_Weights'].sum()
-		worksheet['Weight'] = worksheet['New_Weights']
-		worksheet['Decision'] = df['Decision']
-		
-		final_predictions['Prediction']  =  final_predictions['Prediction'] + worksheet['Alpha'] * worksheet['Prediction']
-		#print(final_predictions)
-		worksheet = worksheet.drop(columns = ['New_Weights', 'Prediction', 'Actual', 'Loss', 'Weight_Times_Loss', 'Alpha'])
-		
-		mae = (np.abs(final_predictions['Prediction'].apply(functions.sign) - final_predictions['Actual'])/2).sum()/final_predictions.shape[0]
-		#print(mae)
-		
-		if mae < best_epoch_value:
-			best_epoch_value = mae * 1
-			best_epoch_idx = i * 1
-		
-		pbar.set_description("Epoch %d. Loss: %d. Process: " % (i+1, mae))
-	
-	#------------------------------
-	
-	print("The best epoch is ",best_epoch_idx," with the ",best_epoch_value," MAE score")
-	
-	models = models[0: best_epoch_idx+1]
-	alphas = alphas[0: best_epoch_idx+1]
-	
-	#------------------------------
-	
-	return models, alphas
+    epoch = row["Epoch"]
+    row = row.drop(labels=["Epoch"])
+    columns = row.shape[0]
+
+    params = []
+    for j in range(0, columns - 1):
+        params.append(row[j])
+
+    moduleName = f"outputs/rules/rules_{int(epoch)}"
+    fp, pathname, description = imp.find_module(moduleName)
+    myrules = imp.load_module(moduleName, fp, pathname, description)
+
+    prediction = functions.sign(myrules.findDecision(params))
+
+    return prediction
+
+
+def apply(df, config, header, dataset_features, validation_df=None, process_id=None):
+    models = []
+    alphas = []
+
+    initializeAlphaFile()
+
+    num_of_weak_classifier = config["num_of_weak_classifier"]
+
+    # ------------------------
+
+    rows = df.shape[0]
+    final_predictions = pd.DataFrame(np.zeros([rows, 1]), columns=["prediction"])
+
+    worksheet = df.copy()
+    worksheet["Weight"] = 1 / rows  # uniform distribution initially
+
+    final_predictions = pd.DataFrame(np.zeros((df.shape[0], 2)), columns=["Prediction", "Actual"])
+    final_predictions["Actual"] = df["Decision"]
+
+    best_epoch_idx = 0
+    best_epoch_value = 1000000
+
+    # for i in range(0, num_of_weak_classifier):
+    pbar = tqdm(range(0, num_of_weak_classifier), desc="Adaboosting")
+    for i in pbar:
+        worksheet["Decision"] = worksheet["Weight"] * worksheet["Decision"]
+
+        root = 1
+        file = "outputs/rules/rules_" + str(i) + ".py"
+
+        functions.createFile(file, header)
+
+        logger.debug(worksheet)
+        Training.buildDecisionTree(
+            worksheet.drop(columns=["Weight"]),
+            root,
+            file,
+            config,
+            dataset_features,
+            parent_level=0,
+            leaf_id=0,
+            parents="root",
+            main_process_id=process_id,
+        )
+
+        # ---------------------------------------
+
+        moduleName = "outputs/rules/rules_" + str(i)
+        fp, pathname, description = imp.find_module(moduleName)
+        myrules = imp.load_module(moduleName, fp, pathname, description)
+        models.append(myrules)
+
+        # ---------------------------------------
+
+        df["Epoch"] = i
+        worksheet["Prediction"] = df.apply(findPrediction, axis=1)
+        df = df.drop(columns=["Epoch"])
+
+        # ---------------------------------------
+        worksheet["Actual"] = df["Decision"]
+        worksheet["Loss"] = abs(worksheet["Actual"] - worksheet["Prediction"]) / 2
+        worksheet["Weight_Times_Loss"] = worksheet["Loss"] * worksheet["Weight"]
+
+        epsilon = worksheet["Weight_Times_Loss"].sum()
+        alpha = (
+            math.log((1 - epsilon) / epsilon) / 2
+        )  # use alpha to update weights in the next round
+        alphas.append(alpha)
+
+        # -----------------------------
+
+        # store alpha
+        addEpochAlpha(i, alpha)
+
+        # -----------------------------
+
+        worksheet["Alpha"] = alpha
+        worksheet["New_Weights"] = worksheet["Weight"] * (
+            -alpha * worksheet["Actual"] * worksheet["Prediction"]
+        ).apply(math.exp)
+
+        # normalize
+        worksheet["New_Weights"] = worksheet["New_Weights"] / worksheet["New_Weights"].sum()
+        worksheet["Weight"] = worksheet["New_Weights"]
+        worksheet["Decision"] = df["Decision"]
+
+        final_predictions["Prediction"] = (
+            final_predictions["Prediction"] + worksheet["Alpha"] * worksheet["Prediction"]
+        )
+        logger.debug(final_predictions)
+        worksheet = worksheet.drop(
+            columns=["New_Weights", "Prediction", "Actual", "Loss", "Weight_Times_Loss", "Alpha"]
+        )
+
+        mae = (
+            np.abs(
+                final_predictions["Prediction"].apply(functions.sign) - final_predictions["Actual"]
+            )
+            / 2
+        ).sum() / final_predictions.shape[0]
+        logger.debug(mae)
+
+        if mae < best_epoch_value:
+            best_epoch_value = mae * 1
+            best_epoch_idx = i * 1
+
+        pbar.set_description(f"Epoch {i + 1}. Loss: {mae}. Process: ")
+
+    # ------------------------------
+
+    logger.info(f"The best epoch is {best_epoch_idx} with the {best_epoch_value} MAE score")
+
+    models = models[0 : best_epoch_idx + 1]
+    alphas = alphas[0 : best_epoch_idx + 1]
+
+    # ------------------------------
+
+    return models, alphas
+
 
 def initializeAlphaFile():
-	file = "outputs/rules/alphas.py"
-	header = "def findAlpha(epoch):\n"
-	functions.createFile(file, header)
+    file = "outputs/rules/alphas.py"
+    header = "def findAlpha(epoch):\n"
+    functions.createFile(file, header)
+
 
 def addEpochAlpha(epoch, alpha):
-	file = "outputs/rules/alphas.py"
-	content = "   if epoch == "+str(epoch)+":\n"
-	content += "      return "+str(alpha)
-	functions.storeRule(file, content)
+    file = "outputs/rules/alphas.py"
+    content = "   if epoch == " + str(epoch) + ":\n"
+    content += "      return " + str(alpha)
+    functions.storeRule(file, content)
diff --git a/chefboost/tuning/gbm.py b/chefboost/tuning/gbm.py
index 0c99477..53a6098 100644
--- a/chefboost/tuning/gbm.py
+++ b/chefboost/tuning/gbm.py
@@ -1,314 +1,329 @@
+import imp  # pylint: disable=deprecated-module
+import gc
+
 import pandas as pd
 import numpy as np
+from tqdm import tqdm
 
-import imp
+from chefboost.commons import functions
+from chefboost.training import Training
+from chefboost.commons.logger import Logger
 
-from chefboost.commons import functions, evaluate
-from chefboost.training import Preprocess, Training
-from chefboost import Chefboost as cb
+# pylint: disable=unused-argument
 
-from tqdm import tqdm
+logger = Logger(module="chefboost/tuning/gbm.py")
 
-import gc
 
 def findPrediction(row):
-	epoch = row['Epoch']
-	row = row.drop(labels=['Epoch'])
-	columns = row.shape[0]
-	
-	params = []
-	for j in range(0, columns-1):
-		params.append(row[j])
-		
-	moduleName = "outputs/rules/rules%s" % (epoch-1)
-	fp, pathname, description = imp.find_module(moduleName)
-	myrules = imp.load_module(moduleName, fp, pathname, description)
-	
-	#prediction = int(myrules.findDecision(params)) 
-	prediction = myrules.findDecision(params)
-	
-	return prediction
-
-def regressor(df, config, header, dataset_features, validation_df = None, process_id = None):
-	models = []
-	
-	#we will update decisions in every epoch, this will be used to restore
-	base_actuals = df.Decision.values
-	
-	algorithm = config['algorithm']
-	
-	enableRandomForest = config['enableRandomForest']
-	num_of_trees = config['num_of_trees']
-	enableMultitasking = config['enableMultitasking']
-
-	enableGBM = config['enableGBM']
-	epochs = config['epochs']
-	learning_rate = config['learning_rate']
-
-	enableAdaboost = config['enableAdaboost']
-	
-	#------------------------------
-	
-	boosted_from = 0; boosted_to = 0
-	
-	#------------------------------
-	
-	base_df = df.copy()
-	
-	#gbm will manipulate actuals. store its raw version.
-	target_values = base_df['Decision'].values
-	num_of_instances = target_values.shape[0]
-	
-	root = 1
-	file = "outputs/rules/rules0.py"; json_file = "outputs/rules/rules0.json"
-	functions.createFile(file, header)
-	functions.createFile(json_file, "[\n")
-	
-	Training.buildDecisionTree(df,root,file, config, dataset_features
-		, parent_level = 0, leaf_id = 0, parents = 'root') #generate rules0
-	
-	#functions.storeRule(json_file," {}]")
-	
-	df = base_df.copy()
-	
-	base_df['Boosted_Prediction'] = 0
-	
-	#------------------------------
-	
-	best_epoch_idx = 0; best_epoch_loss = 1000000
-	
-	pbar = tqdm(range(1, epochs+1), desc='Boosting')
-	
-	#for index in range(1,epochs+1):
-	#for index in tqdm(range(1,epochs+1), desc='Boosting'):
-	for index in pbar:
-		#print("epoch ",index," - ",end='')
-		loss = 0
-		
-		#run data(i-1) and rules(i-1), save data1
-		
-		#dynamic import
-		moduleName = "outputs/rules/rules%s" % (index-1)
-		fp, pathname, description = imp.find_module(moduleName)
-		myrules = imp.load_module(moduleName, fp, pathname, description) #rules0
-		
-		models.append(myrules)
-		
-		new_data_set = "outputs/data/data%s.csv" % (index)
-		f = open(new_data_set, "w")
-		
-		#put header in the following file
-		columns = df.shape[1]
-		
-		mae = 0
-		
-		#----------------------------------------
-		
-		df['Epoch'] = index
-		df['Prediction'] = df.apply(findPrediction, axis=1)
-		
-		base_df['Boosted_Prediction'] += df['Prediction']
-		
-		loss = (base_df['Boosted_Prediction'] - base_df['Decision']).pow(2).sum()
-		current_loss = loss / num_of_instances #mse
-		
-		if index == 1: 
-			boosted_from = current_loss * 1
-		elif index == epochs:
-			boosted_to = current_loss * 1
-		
-		if current_loss < best_epoch_loss:
-			best_epoch_loss = current_loss * 1
-			best_epoch_idx = index * 1
-		
-		df['Decision'] = int(learning_rate)*(df['Decision'] - df['Prediction'])
-		df = df.drop(columns = ['Epoch', 'Prediction'])
-		
-		#---------------------------------
-		
-		df.to_csv(new_data_set, index=False)
-		#data(i) created
-		
-		#---------------------------------
-		
-		file = "outputs/rules/rules"+str(index)+".py"
-		json_file = "outputs/rules/rules"+str(index)+".json"
-		
-		functions.createFile(file, header)
-		functions.createFile(json_file, "[\n")
-		
-		current_df = df.copy()
-		Training.buildDecisionTree(df,root,file, config, dataset_features
-			, parent_level = 0, leaf_id = 0, parents = 'root', main_process_id = process_id)
-		
-		#functions.storeRule(json_file," {}]")
-		
-		df = current_df.copy() #numeric features require this restoration to apply findDecision function
-		
-		#rules(i) created
-		
-		loss = loss / num_of_instances
-		#print("epoch ",index," - loss: ",loss)
-		#print("loss: ",loss)
-		pbar.set_description("Epoch %d. Loss: %d. Process: " % (index, loss))
-		
-		gc.collect()
-		
-	#---------------------------------
-	
-	print("The best epoch is ", best_epoch_idx," with ", best_epoch_loss," loss value")
-	models = models[0:best_epoch_idx]
-	config["epochs"] = best_epoch_idx
-	
-	print("MSE of ",num_of_instances," instances are boosted from ",boosted_from," to ",best_epoch_loss," in ",epochs," epochs")
-	
-	return models
-
-def classifier(df, config, header, dataset_features, validation_df = None, process_id = None):
-	
-	models = []
-	
-	print("gradient boosting for classification")
-	
-	epochs = config['epochs']
-	enableParallelism = config['enableParallelism']
-	
-	temp_df = df.copy()
-	original_dataset = df.copy()
-	worksheet = df.copy()
-	
-	classes = df['Decision'].unique()
-	
-	boosted_predictions = np.zeros([df.shape[0], len(classes)])
-	
-	pbar = tqdm(range(0, epochs), desc='Boosting')
-	
-	#store actual set, we will use this to calculate loss
-	actual_set = pd.DataFrame(np.zeros([df.shape[0], len(classes)]), columns=classes)
-	for i in range(0, len(classes)):
-		current_class = classes[i]
-		actual_set[current_class] = np.where(df['Decision'] == current_class, 1, 0)
-	actual_set = actual_set.values #transform it to numpy array
-	
-	best_accuracy_idx = 0; best_accuracy_value = 0
-	accuracies = []
-	
-	#for epoch in range(0, epochs):
-	for epoch in pbar:
-		for i in range(0, len(classes)):
-			current_class = classes[i]
-			
-			if epoch == 0:
-				temp_df['Decision'] = np.where(df['Decision'] == current_class, 1, 0)
-				worksheet['Y_'+str(i)] = temp_df['Decision']
-			else:
-				temp_df['Decision'] = worksheet['Y-P_'+str(i)]
-			
-			predictions = []
-			
-			#change data type for decision column
-			temp_df[['Decision']].astype('int64')
-			
-			root = 1
-			file_base = "outputs/rules/rules-for-"+current_class+"-round-"+str(epoch)
-			
-			file = file_base+".py"
-			functions.createFile(file, header)
-			
-			if enableParallelism == True:
-				json_file = file_base+".json"
-				functions.createFile(json_file, "[\n")
-			
-			Training.buildDecisionTree(temp_df, root, file, config, dataset_features
-				, parent_level = 0, leaf_id = 0, parents = 'root', main_process_id = process_id)
-				
-			#decision rules created
-			#----------------------------
-			
-			#dynamic import
-			moduleName = "outputs/rules/rules-for-"+current_class+"-round-"+str(epoch)
-			fp, pathname, description = imp.find_module(moduleName)
-			myrules = imp.load_module(moduleName, fp, pathname, description) #rules0
-			
-			models.append(myrules)
-			
-			num_of_columns = df.shape[1]
-			
-			for row, instance in df.iterrows():
-				features = []
-				for j in range(0, num_of_columns-1): #iterate on features
-					features.append(instance[j])
-				
-				actual = temp_df.loc[row]['Decision']
-				prediction = myrules.findDecision(features)
-												
-				predictions.append(prediction)
-					
-			#----------------------------
-			if epoch == 0:
-				worksheet['F_'+str(i)] = 0
-			else:
-				worksheet['F_'+str(i)] = pd.Series(predictions).values
-			
-			boosted_predictions[:,i] = boosted_predictions[:,i] + worksheet['F_'+str(i)].values.astype(np.float32)
-			
-			#print(boosted_predictions[0:5,:])
-			
-			worksheet['P_'+str(i)] = 0
-			
-			#----------------------------
-			temp_df = df.copy() #restoration
-		
-		for row, instance in worksheet.iterrows():
-			f_scores = []
-			for i in range(0, len(classes)):
-				f_scores.append(instance['F_'+str(i)])
-							
-			probabilities = functions.softmax(f_scores)
-							
-			for j in range(0, len(probabilities)):
-				instance['P_'+str(j)] = probabilities[j]
-			
-			worksheet.loc[row] = instance
-		
-		for i in range(0, len(classes)):
-			worksheet['Y-P_'+str(i)] = worksheet['Y_'+str(i)] - worksheet['P_'+str(i)]
-		
-		prediction_set = np.zeros([df.shape[0], len(classes)])
-		for i in range(0, boosted_predictions.shape[0]):
-			predicted_index = np.argmax(boosted_predictions[i])
-			prediction_set[i][predicted_index] = 1
-		
-		#----------------------------
-		#find loss for this epoch: prediction_set vs actual_set
-		classified = 0
-		for i in range(0, actual_set.shape[0]):
-			actual = np.argmax(actual_set[i])
-			prediction = np.argmax(prediction_set[i])
-			#print("actual: ",actual," - prediction: ",prediction)
-			
-			if actual == prediction:
-				classified = classified + 1
-		
-		accuracy = 100 * classified / actual_set.shape[0]
-		accuracies.append(accuracy)
-		
-		if accuracy > best_accuracy_value:
-			best_accuracy_value = accuracy * 1
-			best_accuracy_idx = epoch * 1
-		
-		#----------------------------
-		
-		#print(worksheet.head())
-		#print("round ",epoch+1)
-		pbar.set_description("Epoch %d. Accuracy: %d. Process: " % (epoch+1, accuracy))
-		
-		gc.collect()
-		
-	#--------------------------------
-	
-	print("The best accuracy got in ",best_accuracy_idx," epoch with the score ", best_accuracy_value)
-	
-	models = models[0: best_accuracy_idx * len(classes) + len(classes)]
-	
-	return models, classes
+    epoch = row["Epoch"]
+    row = row.drop(labels=["Epoch"])
+    columns = row.shape[0]
+
+    params = []
+    for j in range(0, columns - 1):
+        params.append(row[j])
+
+    moduleName = f"outputs/rules/rules{epoch - 1}"
+    fp, pathname, description = imp.find_module(moduleName)
+    myrules = imp.load_module(moduleName, fp, pathname, description)
+
+    # prediction = int(myrules.findDecision(params))
+    prediction = myrules.findDecision(params)
+
+    return prediction
+
+
+def regressor(df, config, header, dataset_features, validation_df=None, process_id=None):
+    models = []
+
+    # we will update decisions in every epoch, this will be used to restore
+    epochs = config["epochs"]
+    learning_rate = config["learning_rate"]
+
+    boosted_from = 0
+    boosted_to = 0
+
+    base_df = df.copy()
+
+    # gbm will manipulate actuals. store its raw version.
+    target_values = base_df["Decision"].values
+    num_of_instances = target_values.shape[0]
+
+    root = 1
+    file = "outputs/rules/rules0.py"
+    json_file = "outputs/rules/rules0.json"
+    functions.createFile(file, header)
+    functions.createFile(json_file, "[\n")
+
+    Training.buildDecisionTree(
+        df, root, file, config, dataset_features, parent_level=0, leaf_id=0, parents="root"
+    )  # generate rules0
+
+    # functions.storeRule(json_file," {}]")
+
+    df = base_df.copy()
+
+    base_df["Boosted_Prediction"] = 0
+
+    # ------------------------------
+
+    best_epoch_idx = 0
+    best_epoch_loss = 1000000
+
+    pbar = tqdm(range(1, epochs + 1), desc="Boosting")
+
+    # for index in range(1,epochs+1):
+    # for index in tqdm(range(1,epochs+1), desc='Boosting'):
+    for index in pbar:
+        logger.debug(f"epoch {index} - ")
+        loss = 0
+
+        # run data(i-1) and rules(i-1), save data1
+
+        # dynamic import
+        moduleName = f"outputs/rules/rules{index - 1}"
+        fp, pathname, description = imp.find_module(moduleName)
+        myrules = imp.load_module(moduleName, fp, pathname, description)  # rules0
+
+        models.append(myrules)
+
+        new_data_set = f"outputs/data/data{index}.csv"
+        with open(new_data_set, "w", encoding="UTF-8"):
+            pass
+
+        # ----------------------------------------
+
+        df["Epoch"] = index
+        df["Prediction"] = df.apply(findPrediction, axis=1)
+
+        base_df["Boosted_Prediction"] += df["Prediction"]
+
+        loss = (base_df["Boosted_Prediction"] - base_df["Decision"]).pow(2).sum()
+        current_loss = loss / num_of_instances  # mse
+
+        if index == 1:
+            boosted_from = current_loss * 1
+        elif index == epochs:
+            boosted_to = current_loss * 1
+            logger.debug(f"Boosted to {boosted_to}")
+
+        if current_loss < best_epoch_loss:
+            best_epoch_loss = current_loss * 1
+            best_epoch_idx = index * 1
+
+        df["Decision"] = int(learning_rate) * (df["Decision"] - df["Prediction"])
+        df = df.drop(columns=["Epoch", "Prediction"])
+
+        # ---------------------------------
+
+        df.to_csv(new_data_set, index=False)
+        # data(i) created
+
+        # ---------------------------------
+
+        file = "outputs/rules/rules" + str(index) + ".py"
+        json_file = "outputs/rules/rules" + str(index) + ".json"
+
+        functions.createFile(file, header)
+        functions.createFile(json_file, "[\n")
+
+        current_df = df.copy()
+        Training.buildDecisionTree(
+            df,
+            root,
+            file,
+            config,
+            dataset_features,
+            parent_level=0,
+            leaf_id=0,
+            parents="root",
+            main_process_id=process_id,
+        )
+
+        # functions.storeRule(json_file," {}]")
+
+        df = (
+            current_df.copy()
+        )  # numeric features require this restoration to apply findDecision function
+
+        # rules(i) created
+
+        loss = loss / num_of_instances
+        logger.debug(f"epoch {index} - loss: {loss}")
+        logger.debug(f"loss: {loss}")
+        pbar.set_description(f"Epoch {index}. Loss: {loss}. Process: ")
+
+        gc.collect()
+
+    # ---------------------------------
+
+    logger.info(f"The best epoch is {best_epoch_idx} with {best_epoch_loss} loss value")
+    models = models[0:best_epoch_idx]
+    config["epochs"] = best_epoch_idx
+
+    logger.info(
+        f"MSE of {num_of_instances} instances are boosted from {boosted_from}"
+        f"to {best_epoch_loss} in {epochs} epochs"
+    )
+
+    return models
+
+
+def classifier(df, config, header, dataset_features, validation_df=None, process_id=None):
+    models = []
+
+    logger.info("gradient boosting for classification")
+
+    epochs = config["epochs"]
+    enableParallelism = config["enableParallelism"]
+
+    temp_df = df.copy()
+    worksheet = df.copy()
+
+    classes = df["Decision"].unique()
+
+    boosted_predictions = np.zeros([df.shape[0], len(classes)])
+
+    pbar = tqdm(range(0, epochs), desc="Boosting")
+
+    # store actual set, we will use this to calculate loss
+    actual_set = pd.DataFrame(np.zeros([df.shape[0], len(classes)]), columns=classes)
+    for current_class in classes:
+        actual_set[current_class] = np.where(df["Decision"] == current_class, 1, 0)
+    actual_set = actual_set.values  # transform it to numpy array
+
+    best_accuracy_idx = 0
+    best_accuracy_value = 0
+    accuracies = []
+
+    # for epoch in range(0, epochs):
+    for epoch in pbar:
+        for i, current_class in enumerate(classes):
+
+            if epoch == 0:
+                temp_df["Decision"] = np.where(df["Decision"] == current_class, 1, 0)
+                worksheet["Y_" + str(i)] = temp_df["Decision"]
+            else:
+                temp_df["Decision"] = worksheet["Y-P_" + str(i)]
+
+            predictions = []
+
+            # change data type for decision column
+            temp_df[["Decision"]].astype("int64")
+
+            root = 1
+            file_base = "outputs/rules/rules-for-" + current_class + "-round-" + str(epoch)
+
+            file = file_base + ".py"
+            functions.createFile(file, header)
+
+            if enableParallelism == True:
+                json_file = file_base + ".json"
+                functions.createFile(json_file, "[\n")
+
+            Training.buildDecisionTree(
+                temp_df,
+                root,
+                file,
+                config,
+                dataset_features,
+                parent_level=0,
+                leaf_id=0,
+                parents="root",
+                main_process_id=process_id,
+            )
+
+            # decision rules created
+            # ----------------------------
+
+            # dynamic import
+            moduleName = "outputs/rules/rules-for-" + current_class + "-round-" + str(epoch)
+            fp, pathname, description = imp.find_module(moduleName)
+            myrules = imp.load_module(moduleName, fp, pathname, description)  # rules0
+
+            models.append(myrules)
+
+            num_of_columns = df.shape[1]
+
+            for row, instance in df.iterrows():
+                features = []
+                for j in range(0, num_of_columns - 1):  # iterate on features
+                    features.append(instance[j])
+
+                actual = temp_df.loc[row]["Decision"]
+                prediction = myrules.findDecision(features)
+
+                predictions.append(prediction)
+
+            # ----------------------------
+            if epoch == 0:
+                worksheet["F_" + str(i)] = 0
+            else:
+                worksheet["F_" + str(i)] = pd.Series(predictions).values
+
+            boosted_predictions[:, i] = boosted_predictions[:, i] + worksheet[
+                "F_" + str(i)
+            ].values.astype(np.float32)
+
+            logger.debug(boosted_predictions[0:5, :])
+
+            worksheet["P_" + str(i)] = 0
+
+            # ----------------------------
+            temp_df = df.copy()  # restoration
+
+        for row, instance in worksheet.iterrows():
+            f_scores = []
+            for i in range(0, len(classes)):
+                f_scores.append(instance["F_" + str(i)])
+
+            probabilities = functions.softmax(f_scores)
+
+            for j, current_prob in enumerate(probabilities):
+                instance["P_" + str(j)] = current_prob
+
+            worksheet.loc[row] = instance
+
+        for i in range(0, len(classes)):
+            worksheet["Y-P_" + str(i)] = worksheet["Y_" + str(i)] - worksheet["P_" + str(i)]
+
+        prediction_set = np.zeros([df.shape[0], len(classes)])
+        for i in range(0, boosted_predictions.shape[0]):
+            predicted_index = np.argmax(boosted_predictions[i])
+            prediction_set[i][predicted_index] = 1
+
+        # ----------------------------
+        # find loss for this epoch: prediction_set vs actual_set
+        classified = 0
+        for i in range(0, actual_set.shape[0]):
+            actual = np.argmax(actual_set[i])
+            prediction = np.argmax(prediction_set[i])
+            logger.debug(f"actual: {actual} - prediction: {prediction}")
+
+            if actual == prediction:
+                classified = classified + 1
+
+        accuracy = 100 * classified / actual_set.shape[0]
+        accuracies.append(accuracy)
+
+        if accuracy > best_accuracy_value:
+            best_accuracy_value = accuracy * 1
+            best_accuracy_idx = epoch * 1
+
+        # ----------------------------
+
+        logger.debug(worksheet.head())
+        logger.debug("round {epoch+1}")
+        pbar.set_description(f"Epoch {epoch + 1}. Accuracy: {accuracy}. Process: ")
+
+        gc.collect()
+
+    # --------------------------------
+
+    logger.info(
+        f"The best accuracy got in {best_accuracy_idx} epoch with the score {best_accuracy_value}"
+    )
+
+    models = models[0 : best_accuracy_idx * len(classes) + len(classes)]
+
+    return models, classes
diff --git a/chefboost/tuning/randomforest.py b/chefboost/tuning/randomforest.py
index 163c8a5..743a8f0 100644
--- a/chefboost/tuning/randomforest.py
+++ b/chefboost/tuning/randomforest.py
@@ -1,92 +1,123 @@
-import pandas as pd
-import numpy as np
-from multiprocessing import Pool
 import multiprocessing
 from contextlib import closing
-from chefboost.commons import functions, evaluate
-from chefboost.training import Training
-from chefboost import Chefboost as cb
-from tqdm import tqdm
-import imp
-import os
-
-def apply(df, config, header, dataset_features, validation_df = None, process_id = None):
-
-	models = []
-
-	num_of_trees = config['num_of_trees']
-
-	parallelism_on = config["enableParallelism"]
-
-	#TODO: is this logical for 48x2 cores?
-	#config["enableParallelism"] = False #run each tree in parallel but each branch in serial
-
-	#TODO: reconstruct for parallel run is problematic. you should reconstruct based on tree id.
-
-	input_params = []
-
-	pbar = tqdm(range(0, num_of_trees), desc='Bagging')
-	for i in pbar:
-		pbar.set_description("Sub decision tree %d is processing" % (i+1))
-		subset = df.sample(frac=1/num_of_trees)
-
-		root = 1
-
-		moduleName = "outputs/rules/rule_"+str(i)
-		file = moduleName+".py"
+import imp  # pylint: disable=deprecated-module
 
-		functions.createFile(file, header)
-
-		if parallelism_on: #parallel run
-			input_params.append((subset, root, file, config, dataset_features, 0, 0, 'root', i, None, process_id))
-
-		else: #serial run
-			Training.buildDecisionTree(subset,root, file, config, dataset_features, parent_level = 0, leaf_id = 0, parents = 'root', tree_id = i, main_process_id = process_id)
-
-	#-------------------------------
-
-	if parallelism_on:
-		num_cores = config["num_cores"]
-
-		#---------------------------------
-
-		if num_of_trees <= num_cores:
-			POOL_SIZE = num_of_trees
-		else:
-			POOL_SIZE = num_cores
-
-		with closing(multiprocessing.Pool(POOL_SIZE)) as pool:
-			funclist = []
-			for input_param in input_params:
-				f = pool.apply_async(buildDecisionTree, [*input_param])
-				funclist.append(f)
-
-			#all functions registered here
-			#results = []
-			for f in tqdm(funclist):
-				branch_results = f.get(timeout = 100000)
-				#results.append(branch_results)
-
-			pool.close()
-			pool.terminate()
+from tqdm import tqdm
 
-	#-------------------------------
-	#collect models for both serial and parallel here
-	for i in range(0, num_of_trees):
-		moduleName = "outputs/rules/rule_"+str(i)
-		fp, pathname, description = imp.find_module(moduleName)
-		myrules = imp.load_module(moduleName, fp, pathname, description)
-		models.append(myrules)
+from chefboost.commons import functions
+from chefboost.training import Training
 
-	#-------------------------------
+# pylint: disable=unused-argument
 
-	return models
 
-#wrapper for parallel run
-def buildDecisionTree(df, root, file, config, dataset_features, parent_level, leaf_id, parents, tree_id, validation_df = None, process_id = None):
-	Training.buildDecisionTree(df, root, file, config, dataset_features, parent_level = parent_level, leaf_id =leaf_id, parents = parents, tree_id = tree_id, main_process_id = process_id)
+def apply(df, config, header, dataset_features, validation_df=None, process_id=None):
+    models = []
 
-"""
-def buildDecisionTreeWrapper(func, args):
-	return func(*args)
-"""
+    num_of_trees = config["num_of_trees"]
+
+    parallelism_on = config["enableParallelism"]
+
+    # TODO: is this logical for 48x2 cores?
+    # config["enableParallelism"] = False #run each tree in parallel but each branch in serial
+
+    # TODO: reconstruct for parallel run is problematic. you should reconstruct based on tree id.
+
+    input_params = []
+
+    pbar = tqdm(range(0, num_of_trees), desc="Bagging")
+    for i in pbar:
+        pbar.set_description(f"Sub decision tree {i + 1} is processing")
+        subset = df.sample(frac=1 / num_of_trees)
+
+        root = 1
+
+        moduleName = "outputs/rules/rule_" + str(i)
+        file = moduleName + ".py"
+
+        functions.createFile(file, header)
+
+        if parallelism_on:  # parallel run
+            input_params.append(
+                (subset, root, file, config, dataset_features, 0, 0, "root", i, None, process_id)
+            )
+
+        else:  # serial run
+            Training.buildDecisionTree(
+                subset,
+                root,
+                file,
+                config,
+                dataset_features,
+                parent_level=0,
+                leaf_id=0,
+                parents="root",
+                tree_id=i,
+                main_process_id=process_id,
+            )
+
+    # -------------------------------
+
+    if parallelism_on:
+        num_cores = config["num_cores"]
+
+        # ---------------------------------
+
+        if num_of_trees <= num_cores:
+            POOL_SIZE = num_of_trees
+        else:
+            POOL_SIZE = num_cores
+
+        with closing(multiprocessing.Pool(POOL_SIZE)) as pool:
+            funclist = []
+            for input_param in input_params:
+                f = pool.apply_async(buildDecisionTree, [*input_param])
+                funclist.append(f)
+
+            # all functions registered here
+            # results = []
+            for f in tqdm(funclist):
+                _ = f.get(timeout=100000)  # this was branch_results
+                # results.append(branch_results)
+
+            pool.close()
+            pool.terminate()
+
+    # -------------------------------
+    # collect models for both serial and parallel here
+    for i in range(0, num_of_trees):
+        moduleName = "outputs/rules/rule_" + str(i)
+        fp, pathname, description = imp.find_module(moduleName)
+        myrules = imp.load_module(moduleName, fp, pathname, description)
+        models.append(myrules)
+
+    # -------------------------------
+
+    return models
+
+
+# wrapper for parallel run
+def buildDecisionTree(
+    df,
+    root,
+    file,
+    config,
+    dataset_features,
+    parent_level,
+    leaf_id,
+    parents,
+    tree_id,
+    validation_df=None,
+    process_id=None,
+):
+    Training.buildDecisionTree(
+        df,
+        root,
+        file,
+        config,
+        dataset_features,
+        parent_level=parent_level,
+        leaf_id=leaf_id,
+        parents=parents,
+        tree_id=tree_id,
+        main_process_id=process_id,
+    )
diff --git a/requirements.txt b/requirements.txt
index 845b072..ccd2cff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-pip install pandas==0.22.0
-pip install numpy==1.14.0
-pip install tqdm==4.30.0
-pip install psutil==5.4.3
\ No newline at end of file
+pandas>=0.22.0
+numpy>=1.14.0
+tqdm>=4.30.0
+psutil>=5.4.3
\ No newline at end of file
diff --git a/scripts/push-release.sh b/scripts/push-release.sh
new file mode 100644
index 0000000..5b3e6fa
--- /dev/null
+++ b/scripts/push-release.sh
@@ -0,0 +1,11 @@
+cd ..
+
+echo "deleting existing release related files"
+rm -rf dist/*
+rm -rf build/*
+
+echo "creating a package for current release - pypi compatible"
+python setup.py sdist bdist_wheel
+
+echo "pushing the release to pypi"
+python -m twine upload dist/*
\ No newline at end of file
diff --git a/setup.py b/setup.py
index b2594b9..d157070 100644
--- a/setup.py
+++ b/setup.py
@@ -3,6 +3,9 @@
 with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 
+with open("requirements.txt", "r", encoding="utf-8") as f:
+    requirements = f.read().split("\n")
+
 setuptools.setup(
     name="chefboost",
     version="0.0.18",
@@ -19,5 +22,5 @@
         "Operating System :: OS Independent",
     ],
     python_requires='>=3.6',
-    install_requires=["pandas>=0.22.0", "numpy>=1.14.0", "tqdm>=4.30.0", "psutil>=5.4.3"]
+    install_requires=requirements
 )
diff --git a/tests/global-unit-test.py b/tests/global-unit-test.py
index ba513ff..a519828 100644
--- a/tests/global-unit-test.py
+++ b/tests/global-unit-test.py
@@ -1,309 +1,339 @@
+import gc
 import pandas as pd
-import sys
 from chefboost import Chefboost as cb
-import gc
+from chefboost.commons.logger import Logger
 
-pd.set_option('display.max_rows', 500)
-pd.set_option('display.max_columns', 500)
-pd.set_option('display.width', 1000)
+pd.set_option("display.max_rows", 500)
+pd.set_option("display.max_columns", 500)
+pd.set_option("display.width", 1000)
 
-#----------------------------------------------
+logger = Logger(module="tests/global-unit-test.py")
+
+# ----------------------------------------------
 
 parallelism_cases = [True]
-#parallelism_cases = [False]
-#parallelism_cases = [False, True]
+# parallelism_cases = [False]
+# parallelism_cases = [False, True]
+
+if __name__ == "__main__":
 
-if __name__ == '__main__':
+    for enableParallelism in parallelism_cases:
 
-	for enableParallelism in parallelism_cases:
+        logger.info("*************************")
+        logger.info(f"enableParallelism is set to {enableParallelism}")
+        logger.info("*************************")
 
-		print("*************************")
-		print("enableParallelism is set to ",enableParallelism)
-		print("*************************")
+        logger.info("no config passed")
+        df = pd.read_csv("dataset/golf.txt")
+        model = cb.fit(df)
 
-		print("no config passed ")
-		df = pd.read_csv("dataset/golf.txt")
-		model = cb.fit(df)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("Validation set case")
 
-		print("Validation set case")
+        df = pd.read_csv("dataset/golf.txt")
+        validation_df = pd.read_csv("dataset/golf.txt")
+        config = {"algorithm": "ID3", "enableParallelism": enableParallelism}
+        model = cb.fit(df, config, validation_df=validation_df)
 
-		df = pd.read_csv("dataset/golf.txt")
-		validation_df = pd.read_csv("dataset/golf.txt")
-		config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
-		model = cb.fit(df, config, validation_df = validation_df)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("Feature importance")
+        # decision_rules = model["trees"][0].__dict__["__name__"]+".py"
+        decision_rules = model["trees"][0].__dict__["__spec__"].origin
+        logger.info(cb.feature_importance(decision_rules))
 
-		print("Feature importance")
-		#decision_rules = model["trees"][0].__dict__["__name__"]+".py"
-		decision_rules = model["trees"][0].__dict__["__spec__"].origin
-		print(cb.feature_importance(decision_rules))
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("ID3 for nominal features and nominal target:")
+        df = pd.read_csv("dataset/golf.txt")
 
-		print("ID3 for nominal features and nominal target:")
-		df = pd.read_csv("dataset/golf.txt")
+        config = {"algorithm": "ID3", "enableParallelism": enableParallelism}
+        model = cb.fit(df, config)
 
-		config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
-		model = cb.fit(df, config)
+        validation_df = pd.read_csv("dataset/golf.txt")
 
-		validation_df = pd.read_csv("dataset/golf.txt")
+        logger.info("External validation")
+        cb.evaluate(model, validation_df)
 
-		print("External validation")
-		cb.evaluate(model, validation_df)
+        cb.save_model(model)
+        logger.info("built model is saved to model.pkl")
 
-		cb.save_model(model)
-		print("built model is saved to model.pkl")
+        restored_model = cb.load_model("model.pkl")
+        logger.info("built model is restored from model.pkl")
 
-		restored_model = cb.load_model("model.pkl")
-		print("built model is restored from model.pkl")
+        instance = ["Sunny", "Hot", "High", "Weak"]
+        prediction = cb.predict(restored_model, instance)
 
-		instance = ['Sunny', 'Hot', 'High', 'Weak']
-		prediction = cb.predict(restored_model, instance)
+        logger.info(f"prediction for {instance} is {prediction}")
 
-		print("prediction for ", instance, "is ", prediction)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("ID3 for nominal/numeric features and nominal target:")
+        config = {"algorithm": "ID3", "enableParallelism": enableParallelism}
+        model = cb.fit(pd.read_csv("dataset/golf2.txt"), config)
 
-		print("ID3 for nominal/numeric features and nominal target:")
-		config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
-		model = cb.fit(pd.read_csv("dataset/golf2.txt"), config)
+        instance = ["Sunny", 85, 85, "Weak"]
+        prediction = cb.predict(model, instance)
+        logger.info(f"prediction for {instance} is {prediction}")
 
-		instance = ['Sunny', 85, 85, 'Weak']
-		prediction = cb.predict(model, instance)
-		print("prediction for ", instance, "is ", prediction)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("C4.5 for nominal/numeric features and nominal target:")
+        config = {"algorithm": "C4.5", "enableParallelism": enableParallelism}
+        cb.fit(pd.read_csv("dataset/golf2.txt"), config)
 
-		print("C4.5 for nominal/numeric features and nominal target:")
-		config = {'algorithm': 'C4.5', 'enableParallelism': enableParallelism}
-		cb.fit(pd.read_csv("dataset/golf2.txt"), config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("CART for nominal/numeric features and nominal target:")
+        config = {"algorithm": "CART", "enableParallelism": enableParallelism}
+        cb.fit(pd.read_csv("dataset/golf2.txt"), config)
 
-		print("CART for nominal/numeric features and nominal target:")
-		config = {'algorithm': 'CART', 'enableParallelism': enableParallelism}
-		cb.fit(pd.read_csv("dataset/golf2.txt"), config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("CHAID for nominal features and nominal target:")
+        config = {"algorithm": "CHAID", "enableParallelism": enableParallelism}
+        cb.fit(pd.read_csv("dataset/golf.txt"), config)
 
-		print("CHAID for nominal features and nominal target:")
-		config = {'algorithm': 'CHAID', 'enableParallelism': enableParallelism}
-		cb.fit(pd.read_csv("dataset/golf.txt"), config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("CHAID for nominal/numeric features and nominal target:")
+        config = {"algorithm": "CHAID", "enableParallelism": enableParallelism}
+        cb.fit(pd.read_csv("dataset/golf2.txt"), config)
 
-		print("CHAID for nominal/numeric features and nominal target:")
-		config = {'algorithm': 'CHAID', 'enableParallelism': enableParallelism}
-		cb.fit(pd.read_csv("dataset/golf2.txt"), config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("regression tree for nominal features, numeric target")
+        config = {"algorithm": "Regression", "enableParallelism": enableParallelism}
+        cb.fit(pd.read_csv("dataset/golf3.txt"), config)
 
-		print("regression tree for nominal features, numeric target")
-		config = {'algorithm': 'Regression', 'enableParallelism': enableParallelism}
-		cb.fit(pd.read_csv("dataset/golf3.txt"), config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("regression tree for nominal/numeric features, numeric target")
+        config = {"algorithm": "Regression", "enableParallelism": enableParallelism}
+        cb.fit(pd.read_csv("dataset/golf4.txt"), config)
 
-		print("regression tree for nominal/numeric features, numeric target")
-		config = {'algorithm': 'Regression', 'enableParallelism': enableParallelism}
-		cb.fit(pd.read_csv("dataset/golf4.txt"), config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info(
+            "algorithm must be regression tree for numetic target. set any other algorithm."
+        )
+        config = {"algorithm": "ID3", "enableParallelism": enableParallelism}
+        cb.fit(pd.read_csv("dataset/golf4.txt"), config)
 
-		print("algorithm must be regression tree for numetic target. set any other algorithm.")
-		config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
-		cb.fit(pd.read_csv("dataset/golf4.txt"), config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("ID3 for nominal features and target (large data set)")
+        config = {"algorithm": "ID3", "enableParallelism": enableParallelism}
+        model = cb.fit(pd.read_csv("dataset/car.data"), config)
 
-		print("ID3 for nominal features and target (large data set)")
-		config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
-		model = cb.fit(pd.read_csv("dataset/car.data"), config)
+        instance = ["vhigh", "vhigh", 2, "2", "small", "low"]
+        prediction = cb.predict(model, instance)
+        logger.info(prediction)
 
-		instance = ['vhigh','vhigh',2,'2','small','low']
-		prediction = cb.predict(model, instance)
-		print(prediction)
+        instance = ["high", "high", "4", "more", "big", "high"]
+        prediction = cb.predict(model, instance)
+        logger.info(prediction)
 
-		instance = ['high','high','4','more','big','high']
-		prediction = cb.predict(model, instance)
-		print(prediction)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("C4.5 for nominal features and target (large data set)")
+        config = {"algorithm": "C4.5", "enableParallelism": enableParallelism}
+        cb.fit(pd.read_csv("dataset/car.data"), config)
 
-		print("C4.5 for nominal features and target (large data set)")
-		config = {'algorithm': 'C4.5', 'enableParallelism': enableParallelism}
-		cb.fit(pd.read_csv("dataset/car.data"), config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("CART for nominal features and target (large data set)")
+        config = {"algorithm": "CART", "enableParallelism": enableParallelism}
+        cb.fit(pd.read_csv("dataset/car.data"), config)
 
-		print("CART for nominal features and target (large data set)")
-		config = {'algorithm': 'CART', 'enableParallelism': enableParallelism}
-		cb.fit(pd.read_csv("dataset/car.data"), config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("CHAID for nominal features and target (large data set)")
+        config = {"algorithm": "CHAID", "enableParallelism": enableParallelism}
+        df = pd.read_csv("dataset/car.data")
+        cb.fit(df, config)
 
-		print("CHAID for nominal features and target (large data set)")
-		config = {'algorithm': 'CHAID', 'enableParallelism': enableParallelism}
-		df = pd.read_csv("dataset/car.data")
-		cb.fit(df, config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("Iris with regular decision tree")
+        config = {"algorithm": "ID3"}
+        df = pd.read_csv(
+            "dataset/iris.data",
+            names=["Sepal length", "Sepal width", "Petal length", "Petal width", "Decision"],
+        )
+        model = cb.fit(df, config)
 
-		print("Iris with regular decision tree")
-		config = {'algorithm': 'ID3'}
-		df = pd.read_csv("dataset/iris.data", names=["Sepal length", "Sepal width", "Petal length", "Petal width", "Decision"])
-		model = cb.fit(df, config)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("Adaboost")
+        config = {
+            "algorithm": "ID3",
+            "enableAdaboost": True,
+            "num_of_weak_classifier": 10,
+            "enableParallelism": False,
+        }
+        df = pd.read_csv("dataset/adaboost.txt")
+        validation_df = df.copy()
 
-		print("Adaboost")
-		config = {'algorithm': 'ID3', 'enableAdaboost': True, 'num_of_weak_classifier': 10, 'enableParallelism': False}
-		df = pd.read_csv("dataset/adaboost.txt")
-		validation_df = df.copy()
+        model = cb.fit(df, config, validation_df=validation_df)
 
-		model = cb.fit(df, config
-						, validation_df = validation_df
-						)
+        instance = [4, 3.5]
 
-		instance = [4, 3.5]
-		#prediction = cb.predict(model, instance)
-		#print("prediction for ",instance," is ",prediction)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("Regular GBM")
+        config = {
+            "algorithm": "CART",
+            "enableGBM": True,
+            "epochs": 10,
+            "learning_rate": 1,
+            "enableParallelism": enableParallelism,
+        }
+        df = pd.read_csv("dataset/golf4.txt")
+        validation_df = pd.read_csv("dataset/golf4.txt")
+        model = cb.fit(df, config, validation_df=validation_df)
 
-		print("Regular GBM")
-		config = {'algorithm': 'CART', 'enableGBM': True, 'epochs': 10, 'learning_rate': 1, 'enableParallelism': enableParallelism}
-		df = pd.read_csv("dataset/golf4.txt")
-		validation_df = pd.read_csv("dataset/golf4.txt")
-		model = cb.fit(df, config
-						, validation_df = validation_df
-					)
+        instance = ["Sunny", 85, 85, "Weak"]
+        prediction = cb.predict(model, instance)
+        logger.info(f"prediction for {instance} is {prediction}")
 
-		instance = ['Sunny',85,85,'Weak']
-		prediction = cb.predict(model, instance)
-		print("prediction for ",instance," is ",prediction)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("GBM for classification")
+        config = {
+            "algorithm": "ID3",
+            "enableGBM": True,
+            "epochs": 10,
+            "learning_rate": 1,
+            "enableParallelism": enableParallelism,
+        }
 
-		print("GBM for classification")
-		config = {'algorithm': 'ID3', 'enableGBM': True, 'epochs': 10, 'learning_rate': 1, 'enableParallelism': enableParallelism}
+        df = pd.read_csv(
+            "dataset/iris.data",
+            names=["Sepal length", "Sepal width", "Petal length", "Petal width", "Decision"],
+        )
+        validation_df = df.copy()
 
-		df = pd.read_csv("dataset/iris.data", names=["Sepal length", "Sepal width", "Petal length", "Petal width", "Decision"])
-		validation_df = df.copy()
+        model = cb.fit(df, config, validation_df=validation_df)
 
-		model = cb.fit(df, config
-						, validation_df = validation_df
-					)
+        instance = [7.0, 3.2, 4.7, 1.4]
+        prediction = cb.predict(model, instance)
+        logger.info(f"prediction for {instance} is {prediction}")
 
-		instance = [7.0,3.2,4.7,1.4]
-		prediction = cb.predict(model, instance)
-		print("prediction for ",instance," is ",prediction)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("Random forest")
+        config = {
+            "algorithm": "ID3",
+            "enableRandomForest": True,
+            "num_of_trees": 3,
+            "enableParallelism": enableParallelism,
+        }
+        df = pd.read_csv("dataset/car.data")
+        validation_df = pd.read_csv("dataset/car.data")
+        model = cb.fit(
+            pd.read_csv("dataset/car.data"),
+            config
+            # , validation_df = validation_df
+        )
 
-		print("Random forest")
-		config = {'algorithm': 'ID3', 'enableRandomForest': True, 'num_of_trees': 3
-			, 'enableParallelism': enableParallelism
-		}
-		df = pd.read_csv("dataset/car.data")
-		validation_df = pd.read_csv("dataset/car.data")
-		model = cb.fit(pd.read_csv("dataset/car.data"), config
-						#, validation_df = validation_df
-						)
+        logger.info("Feature importance of random forest")
+        decision_rules = []
+        for tree in model["trees"]:
 
-		print("Feature importance of random forest")
-		decision_rules = []
-		for tree in model["trees"]:
+            decision_rule = tree.__dict__["__spec__"].origin
+            decision_rules.append(decision_rule)
 
-			decision_rule = tree.__dict__["__spec__"].origin
-			decision_rules.append(decision_rule)
+        df = cb.feature_importance(decision_rules)
+        logger.info(df)
 
-		df = cb.feature_importance(decision_rules)
-		print(df)
+        instance = ["vhigh", "vhigh", 2, "2", "small", "low"]
 
-		instance = ['vhigh','vhigh',2,'2','small','low']
+        prediction = cb.predict(model, instance)
+        logger.info(f"prediction for {instance} is {prediction}")
 
-		prediction = cb.predict(model, instance)
-		print("prediction for ",instance," is ",prediction)
+        instance = ["high", "high", 4, "more", "big", "high"]
 
-		instance = ['high','high',4,'more','big','high']
+        prediction = cb.predict(model, instance)
+        logger.info(f"prediction for {instance} is {prediction}")
 
-		prediction = cb.predict(model, instance)
-		print("prediction for ",instance," is ",prediction)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
+        logger.info("Random forest for regression")
 
-		print("Random forest for regression")
+        config = {
+            "algorithm": "ID3",
+            "enableRandomForest": True,
+            "num_of_trees": 5,
+            "enableMultitasking": False,
+            "enableParallelism": enableParallelism,
+        }
 
-		config = {'algorithm': 'ID3', 'enableRandomForest': True, 'num_of_trees': 5, 'enableMultitasking': False, 'enableParallelism': enableParallelism}
+        df = pd.read_csv("dataset/car_reg.data")
+        model = cb.fit(pd.read_csv("dataset/car_reg.data"), config)
 
-		df = pd.read_csv("dataset/car_reg.data")
-		model = cb.fit(pd.read_csv("dataset/car_reg.data"), config)
+        validation_df = pd.read_csv("dataset/car_reg.data")
+        cb.evaluate(model, validation_df)
 
-		validation_df = pd.read_csv("dataset/car_reg.data")
-		cb.evaluate(model, validation_df)
+        instance = ["high", "high", 4, "more", "big", "high"]
+        prediction = cb.predict(model, instance)
+        logger.info(f"prediction for {instance} is {prediction}")
 
-		instance = ['high','high',4,'more','big','high']
-		prediction = cb.predict(model, instance)
-		print("prediction for ",instance," is ",prediction)
+        gc.collect()
 
-		gc.collect()
+        logger.info("-------------------------")
 
-		print("-------------------------")
-		
-		print("Is there any none predictions?")
-		config = {'algorithm': 'C4.5', 'enableParallelism': enableParallelism}
-		model = cb.fit(pd.read_csv("dataset/none_train.txt"), config)
-		test_set = pd.read_csv("dataset/none_test.txt")
-		instance = test_set.iloc[3]
-		print(instance.values, "->", cb.predict(model, instance))
+        logger.info("Is there any none predictions?")
+        config = {"algorithm": "C4.5", "enableParallelism": enableParallelism}
+        model = cb.fit(pd.read_csv("dataset/none_train.txt"), config)
+        test_set = pd.read_csv("dataset/none_test.txt")
+        instance = test_set.iloc[3]
+        logger.info(f"{instance.values} -> {cb.predict(model, instance)}")
 
-		gc.collect()
+        gc.collect()
 
-		print("-------------------------")
+        logger.info("-------------------------")
 
-	print("-------------------------")
-	print("unit tests completed successfully...")
+    logger.info("-------------------------")
+    logger.info("unit tests completed successfully...")

From 1531ff73f8663df69190916296b13f087701c415 Mon Sep 17 00:00:00 2001
From: Sefik Ilkin Serengil <serengil@gmail.com>
Date: Sat, 23 Dec 2023 11:07:32 +0000
Subject: [PATCH 2/3] print df in actions

---
 tests/global-unit-test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/global-unit-test.py b/tests/global-unit-test.py
index a519828..defa3a0 100644
--- a/tests/global-unit-test.py
+++ b/tests/global-unit-test.py
@@ -25,6 +25,7 @@
 
         logger.info("no config passed")
         df = pd.read_csv("dataset/golf.txt")
+        logger.info(df.head())
         model = cb.fit(df)
 
         gc.collect()

From b0c06a97fd426c69d238e670d368daa148019632 Mon Sep 17 00:00:00 2001
From: Sefik Ilkin Serengil <serengil@gmail.com>
Date: Sat, 23 Dec 2023 11:12:08 +0000
Subject: [PATCH 3/3] enfore dependencies

---
 .github/workflows/tests.yml | 2 +-
 tests/global-unit-test.py   | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 65d670b..5a5350b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -34,7 +34,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install pytest
+        pip install pandas==1.3.5 numpy==1.22.3 tqdm==4.62.3 psutil==5.9.0
         pip install .
         
     - name: Test with pytest
diff --git a/tests/global-unit-test.py b/tests/global-unit-test.py
index defa3a0..a519828 100644
--- a/tests/global-unit-test.py
+++ b/tests/global-unit-test.py
@@ -25,7 +25,6 @@
 
         logger.info("no config passed")
         df = pd.read_csv("dataset/golf.txt")
-        logger.info(df.head())
         model = cb.fit(df)
 
         gc.collect()