From 03db9081da53123f1da5a451a6fbbe7807c92973 Mon Sep 17 00:00:00 2001
From: Anass Yarroudh <ayarroudh@uliege.be>
Date: Tue, 18 Apr 2023 23:22:28 +0200
Subject: [PATCH] New updates

---
 README.md                         |  38 +-
 semseg.py => SemanticML/main.py   | 582 +++++++++++++++---------------
 config.json => config/config.json |  96 ++---
 setup.py                          |  25 ++
 4 files changed, 389 insertions(+), 352 deletions(-)
 rename semseg.py => SemanticML/main.py (97%)
 rename config.json => config/config.json (97%)
 create mode 100644 setup.py
diff --git a/README.md b/README.md
index dada055..11e2287 100644
--- a/README.md
+++ b/README.md
@@ -15,14 +15,26 @@ Semantic segmentation of point clouds is the process of classifying each point i
 
 ## Installation
 
-The easiest way to install <code>Semseg</code> on Windows is to use the binary package on the [Release page](https://github.com/Yarroudh/Semseg/releases/tag/Semseg). In case you can not use the Windows installer, or if you are using a different operating system, you can build everything from source.
+You can install SemanticML from PyPI by easily running:
+
+```bash
+pip install semanticml
+```
+
+You can also build everything from source:
+
+```bash
+git clone https://github.com/Yarroudh/SemanticML
+cd SemanticML
+python setup.py install
+```
 
 ## Usage of the CLI
 
-After installation, you have a small program called <code>semseg</code>. Use <code>semseg --help</code> to see the detailed help:
+After installation, you have a small program called <code>sml</code>. Use <code>sml --help</code> to see the detailed help:
 
 ```
-Usage: semseg [OPTIONS] COMMAND [ARGS]...
+Usage: sml [OPTIONS] COMMAND [ARGS]...
 
   CLI tool to perform semantic segmentation of 3D point clouds using Machine
   Learning techniques.
@@ -41,10 +53,10 @@ The process consists of two distinct steps or <code>commands</code> :
 
 Model training is the process of using a set of labeled data, known as the training dataset, to adjust the parameters of Random Forest algorithm so that it can make accurate predictions on new, unseen data. The process of training a model involves providing the model with input-output pairs, where the input represents the features of the data and the output represents the desired label or prediction. The model then adjusts its internal parameters to minimize the difference between its predictions and the true labels. The goal is to find the set of parameters that result in the lowest prediction error on the training data.
 
-This is done using the first command <code>train</code>. Use <code>semseg train --help</code> to see the detailed help:
+This is done using the first command <code>train</code>. Use <code>sml train --help</code> to see the detailed help:
 
 ```
-Usage: semseg train [OPTIONS] CONFIG
+Usage: sml train [OPTIONS] CONFIG
 
   Train the model for semantic segmentation of 3D point clouds.      
 
@@ -112,19 +124,19 @@ The input data is a LAS file with specified features and <code>classification</c
 Thus, the command for Random Forest algorithm could be:
 
 ```
-semseg train --method RandomForest config.json
+sml train --method RandomForest config.json
 ```
 
 Or simply:
 
 ```
-semseg train config.json
+sml train config.json
 ```
 
 You can also choose to use Gradient Boosting classifier by typing:
 
 ```
-semseg train --method GradientBoosting config.json
+sml train --method GradientBoosting config.json
 ```
 
 The output is the model with the best parameters, saved as a <code>pickle</code> file in <code>./output/model</code>.
@@ -134,10 +146,10 @@ Pickling a model and saving it to disk allows you to save the state of the model
 
 Once the model is trained, it can be used to make predictions on new, unseen data. This is done by providing the model with input data, and the model generates an output, which is a <code>LAS</code> file with <code>classification</code> as a new scalar field.
 
-This is done using the second command <code>predict</code>. Use <code>semseg predict --help</code> to see the detailed help:
+This is done using the second command <code>predict</code>. Use <code>sml predict --help</code> to see the detailed help:
 
 ```
-Usage: semseg predict [OPTIONS] CONFIG POINTCLOUD MODEL
+Usage: sml predict [OPTIONS] CONFIG POINTCLOUD MODEL
 
   Perform semantic segmentation using pre-trained model.
 
@@ -154,7 +166,7 @@ Options:
 #### Basic usage
 
 ```
-semseg predict config.json unclassified.las ./output/model/ne60_mdNone.pkl --filename classified.las
+sml predict config.json unclassified.las ./output/model/ne60_mdNone.pkl --filename classified.las
 ```
 
 This uses the trained model stored as pickle file <code>ne60_mdNone.pkl</code> to perfoem semantic segmentation on <code>unclassified.las</code>. The output is named <code>classified.las</code> and can be found in the folder <code>./output/prediction</code>.
@@ -168,7 +180,7 @@ The rendering of the Random Forest can be improved by an algorithm that reduces
 This allows to update the value of the classification for specific points to a value determined by a K-nearest neighbors vote.
 
 ```
-semseg predict config.json unclassified.las ./output/model/ne60_mdNone.pkl --filename classified.las --regularize True -k 30
+sml predict config.json unclassified.las ./output/model/ne60_mdNone.pkl --filename classified.las --regularize True -k 30
 ```
 
 In this example, <code>regularization</code> is enabled and the number of neighbors to use is 30.
@@ -192,5 +204,5 @@ If you find our work useful in your research, please consider citing:
 }
 ```
 
-## About Semseg
+## About SemanticML
 This software was developped by [Kharroubi Abderrazzaq](https://github.com/akharroubi) and [Anass Yarroudh](https://github.com/Yarroudh), researchers at the Geomatics Unit of the University of Liege. For more detailed information please contact us via <akharroubi@uliege.be> or <ayarroudh@uliege.be>, we are pleased to send you the necessary information.
diff --git a/semseg.py b/SemanticML/main.py
similarity index 97%
rename from semseg.py
rename to SemanticML/main.py
index de38746..b98c320 100644
--- a/semseg.py
+++ b/SemanticML/main.py
@@ -1,291 +1,291 @@
-import click
-import collections
-import os
-import json
-import laspy
-import time
-import pickle
-import itertools
-import numpy as np
-import matplotlib.pyplot as plt
-from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score
-from sklearn.metrics import f1_score
-from sklearn.metrics import confusion_matrix
-from sklearn.metrics import jaccard_score
-from sklearn.metrics import recall_score
-from sklearn.metrics import precision_score
-from sklearn.preprocessing import StandardScaler
-from sklearn.neighbors import KNeighborsClassifier
-
-def train_model(method, X_train, Y_train, **kwargs):
-    ''' 
-    Train the model with the specified parameters and return it.
-    '''
-    if (method == "RandomForest"):
-        n_estimators = kwargs.get('n_estimators', None)
-        max_depth = kwargs.get('max_depth', None)
-        n_jobs = kwargs.get('n_jobs', None)
-        criterion = kwargs.get('criterion', None)
-        min_samples_split = kwargs.get('min_samples_split', None)
-        min_samples_leaf = kwargs.get('min_samples_leaf', None)
-        min_weight_fraction_leaf = kwargs.get('min_weight_fraction_leaf', None)
-        max_features = kwargs.get('max_features', None)
-        max_leaf_nodes = kwargs.get('max_leaf_nodes', None)
-        min_impurity_decrease = kwargs.get('min_impurity_decrease', None)
-        bootstrap = kwargs.get('bootstrap', None)
-        oob_score = kwargs.get('oob_score', None)
-        random_state = kwargs.get('random_state', None)
-        verbose = kwargs.get('verbose', None)
-        warm_start = kwargs.get('warm_start', None)
-        class_weight = kwargs.get('class_weight', None)
-        ccp_alpha = kwargs.get('ccp_alpha', None)
-        max_samples = kwargs.get('max_samples', None)
-
-        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples)
-    
-    elif (method == "GradientBoosting"):
-        loss = kwargs.get('loss', None)
-        learning_rate = kwargs.get('learning_rate', None)
-        n_estimators = kwargs.get('n_estimators', None)
-        subsample = kwargs.get('subsample', None)
-        criterion = kwargs.get('criterion', None)
-        min_samples_split = kwargs.get('min_samples_split', None)
-        min_samples_leaf = kwargs.get('min_samples_leaf', None)
-        min_weight_fraction_leaf = kwargs.get('min_weight_fraction_leaf', None)
-        max_depth = kwargs.get('max_depths', None)
-        min_impurity_decrease = kwargs.get('min_impurity_decrease', None)
-        init = kwargs.get('init', None)
-        random_state = kwargs.get('random_state', None)
-        max_features = kwargs.get('max_features', None)
-        verbose = kwargs.get('verbose', None)
-        max_leaf_nodes = kwargs.get('max_leaf_nodes', None)
-        warm_start = kwargs.get('warm_start', None)
-        validation_fraction = kwargs.get('validation_fraction', None)
-        n_iter_no_change = kwargs.get('n_iter_no_change', None)
-        tol = kwargs.get('tol', None)
-        ccp_alpha = kwargs.get('ccp_alpha', None)
-
-        model = GradientBoostingClassifier(loss=loss, learning_rate=learning_rate, n_estimators=n_estimators, subsample=subsample, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_depth=max_depth, min_impurity_decrease=min_impurity_decrease, init=init, random_state=random_state, max_features=max_features, verbose=verbose, max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha)
-
-    model.fit(X_train, Y_train)
-    return model
-
-def save_model(model, filename):
-    ''' Save the trained machine learning model as .pkl file
-        Attribures:
-            model (np.RandomForestClassifier)   :   Model to save
-            filename (string)                   :   Model output file
-    '''
-    with open(filename, 'wb') as out:
-        pickle.dump(model, out, pickle.HIGHEST_PROTOCOL)
-
-def read_model(filepath):
-    ''' Read the Random Forest model from a .pkl file
-        Attributes:
-            filepath (string)   :   Path to the .pkl file
-    '''
-    return pickle.load(open(filepath, 'rb'))
-
-class OrderedGroup(click.Group):
-    def __init__(self, name=None, commands=None, **attrs):
-        super(OrderedGroup, self).__init__(name, commands, **attrs)
-        self.commands = commands or collections.OrderedDict()
-
-    def list_commands(self, ctx):
-        return self.commands
-
-@click.group(cls=OrderedGroup, help="CLI tool to perform semantic segmentation of 3D point clouds using Random Forest algorithm.")
-def cli():
-    pass
-
-@click.command()
-@click.argument('config', type=click.Path(exists=True), required=True)
-@click.option('--method', help='Learning method for classification.', type=click.Choice(['RandomForest', 'GradientBoosting']), default="RandomForest", required=False, show_default=True)
-
-def train(config, method):
-    '''
-    Train the model for semantic segmentation of 3D point clouds.
-    '''
-    if (os.path.exists("./output")==False):
-        os.mkdir("./output")
-
-    if (os.path.exists("./output/model")==False):
-        os.mkdir("./output/model")
-
-    with open(config) as file:
-        configuration = json.load(file)
-
-
-    # Read train and validation data from a file
-    debug = True
-
-    file = laspy.read(configuration["training_data"])
-    features = configuration["features"]
-
-    fields = [field.name for field in file.point_format]
-    if ('classification' in fields):
-        Y = np.asarray(file.classification, dtype=np.float32)
-        fields.remove('classification')
-
-    X = np.asarray(np.column_stack([getattr(file, field) for field in features]), dtype=np.float32)
-
-    # Load data
-    print('\nLoading data')  
-    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
-    print('\tTraining samples: {}\n\tTesting samples: {}\n\tUsing features: {}'.format(len(Y_train), len(Y_test), features))
-
-    if (method == "RandomForest"):
-        # RF parameters
-        n_estimators = configuration["parameters"]["RandomForest"]["n_estimators"]
-        criterion = configuration["parameters"]["RandomForest"]["criterion"]
-        max_depths = [configuration["parameters"]["RandomForest"]["max_depths"]]
-        min_samples_split = configuration["parameters"]["RandomForest"]["min_samples_split"]
-        min_samples_leaf = configuration["parameters"]["RandomForest"]["min_samples_leaf"]
-        min_weight_fraction_leaf = configuration["parameters"]["RandomForest"]["min_weight_fraction_leaf"]
-        max_features = configuration["parameters"]["RandomForest"]["max_features"]
-        max_leaf_nodes = configuration["parameters"]["RandomForest"]["max_leaf_nodes"]
-        min_impurity_decrease = configuration["parameters"]["RandomForest"]["min_impurity_decrease"]
-        bootstrap = configuration["parameters"]["RandomForest"]["bootstrap"]
-        oob_score = configuration["parameters"]["RandomForest"]["oob_score"]
-        n_jobs = configuration["parameters"]["RandomForest"]["n_jobs"]
-        random_state = configuration["parameters"]["RandomForest"]["random_state"]
-        verbose = configuration["parameters"]["RandomForest"]["verbose"]
-        warm_start = configuration["parameters"]["RandomForest"]["warm_start"]
-        class_weight = configuration["parameters"]["RandomForest"]["class_weight"]
-        ccp_alpha = configuration["parameters"]["RandomForest"]["ccp_alpha"]
-        max_samples = configuration["parameters"]["RandomForest"]["max_samples"]
-
-    elif (method == "GradientBoosting"):
-        n_estimators = configuration["parameters"]["GradientBoosting"]["n_estimators"]
-        loss = configuration["parameters"]["GradientBoosting"]["loss"]
-        learning_rate = configuration["parameters"]["GradientBoosting"]["learning_rate"]
-        subsample = configuration["parameters"]["GradientBoosting"]["subsample"]
-        criterion = configuration["parameters"]["GradientBoosting"]["criterion"]
-        min_samples_split = configuration["parameters"]["GradientBoosting"]["min_samples_split"]
-        min_samples_leaf = configuration["parameters"]["GradientBoosting"]["min_samples_leaf"]
-        min_weight_fraction_leaf = configuration["parameters"]["GradientBoosting"]["min_weight_fraction_leaf"]
-        max_depths = configuration["parameters"]["GradientBoosting"]["max_depths"]
-        min_impurity_decrease = configuration["parameters"]["GradientBoosting"]["min_impurity_decrease"]
-        init = configuration["parameters"]["GradientBoosting"]["init"]
-        random_state = configuration["parameters"]["GradientBoosting"]["random_state"]
-        max_features = configuration["parameters"]["GradientBoosting"]["max_features"]
-        verbose = configuration["parameters"]["GradientBoosting"]["verbose"]
-        max_leaf_nodes = configuration["parameters"]["GradientBoosting"]["max_leaf_nodes"]
-        warm_start = configuration["parameters"]["GradientBoosting"]["warm_start"]
-        validation_fraction = configuration["parameters"]["GradientBoosting"]["validation_fraction"]
-        n_iter_no_change = configuration["parameters"]["GradientBoosting"]["n_iter_no_change"]
-        tol = configuration["parameters"]["GradientBoosting"]["tol"]
-        ccp_alpha = configuration["parameters"]["GradientBoosting"]["ccp_alpha"]
-
-    # Scale the features using StandardScaler
-    scaler = StandardScaler()
-    X_train = scaler.fit_transform(X_train)
-    X_test = scaler.transform(X_test)
-
-    # Train the model
-    print('\nTraining the model')
-    best_conf = {'ne' : 0, 'md' : 0} # Best configuration initialisation
-    best_f1 = 0
-    f1_results=[]
-    start = time.time()
-    for ne, md in list(itertools.product(n_estimators, max_depths)): # Train the model with different parameters and pick the one having the maximum f1-score on the test-set
-        # Train the model
-        if (method == "RandomForest"):
-            model = train_model(method, X_train, Y_train, n_estimators=ne, max_depth=md, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples)
-        elif (method == "GradientBoosting"):
-            model = train_model(method, X_train, Y_train, n_estimators=ne, max_depth=md, loss=loss, learning_rate=learning_rate, subsample=subsample, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, min_impurity_decrease=min_impurity_decrease, init=init, random_state=random_state, max_features=max_features, verbose=verbose, max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha)
-        
-        Y_test_pred = model.predict(X_test)  # Test the model, using only the specified features
-            
-        acc = accuracy_score(Y_test, Y_test_pred)    # Compute metrics and update best model
-        f1 = f1_score(Y_test, Y_test_pred, average='weighted')
-        f1_results.append(f1)
-        recall=recall_score(Y_test, Y_test_pred, average='weighted')
-        precision= precision_score(Y_test, Y_test_pred, average='weighted')
-        js=jaccard_score(Y_test, Y_test_pred, average='weighted')
-        
-        if f1 > best_f1: # Update best configuration
-            best_conf['ne'] = ne
-            best_conf['md'] = md
-            best_f1 = f1
-            
-        if debug: print('\tne: {}, md: {} - acc: {} f1: {} precision:{} recall:{} js: {} oob_score: {}'.format(ne, md, acc, f1, precision, recall, js, model.oob_score))
-
-    if (len(n_estimators) == 1):
-        pass
-    else:
-        model = train_model(X_train, Y_train, best_conf['ne'], best_conf['md'], criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples)
-    
-    save_model(model, './output/model/ne{}_md{}.pkl'.format(best_conf['ne'], best_conf['md']))
-
-    end = time.time()
-    processTime = end - start
-
-    print('\n\tBest parameters: ne: {}, md: {}'.format(n_estimators, max_depths))
-    print('\tFeature importance:\n{}'.format(model.feature_importances_))
-    print('\tConfusion matrix:\n{}'.format(confusion_matrix(Y_test, Y_test_pred)))
-    print('\tTraining time: {} seconds'.format(time.strftime("%H:%M:%S", time.gmtime(processTime))))
-
-
-@click.command()
-@click.argument('config', type=click.Path(exists=True), required=True)
-@click.argument('pointcloud', type=click.Path(exists=True), required=True)
-@click.argument('model', type=click.Path(exists=True), required=True)
-@click.option('--regularize', help='If checked the input data will be regularized.', type=bool, default=False, required=False, show_default=True)
-@click.option('-k', help='Number of neighbors to use if regularization is set.', type=click.INT, default=10, required=False, show_default=True)
-@click.option('--filename', help='Write the classified point cloud in a .LAS file.', type=click.Path(exists=False), required=True, show_default=True)
-
-def predict(config, pointcloud, model, regularize, k, filename):
-    '''
-    Perform semantic segmentation using pre-trained model.
-    '''
-    if (os.path.exists("./output")==False):
-        os.mkdir("./output")
-
-    if (os.path.exists("./output/prediction")==False):
-        os.mkdir("./output/prediction")
-
-    with open(config) as file:
-        configuration = json.load(file)
-
-    features = configuration["features"]
-
-    start = time.time()
-    # Load the model
-    model = read_model(model)
-
-    # Read data
-    file = laspy.read(pointcloud)
-    X = np.asarray(np.column_stack([getattr(file, field) for field in features]), dtype=np.float32)
-
-    # Perform semantic segmentation
-    print ('Classifying the dataset')
-    Y = model.predict(X)
-
-    # Regularization
-    if (regularize):
-        neigh = KNeighborsClassifier(n_neighbors=k, algorithm='kd_tree', n_jobs=-1)
-        neigh.fit(X, Y)
-        Y = neigh.predict(X)
-
-    # Save the results in LAS file
-    header = file.header
-    las = laspy.LasData(header)
-    las.points = file.points
-    las.classification = Y
-
-    # Export results
-    las.write("./output/prediction/{}".format(filename))
-
-    end = time.time()
-    processTime = end - start
-    print('Data classified in: {}'.format(time.strftime("%H:%M:%S", time.gmtime(processTime))))
-
-
-cli.add_command(train)
-cli.add_command(predict)
-
-if __name__ == '__main__':
-    cli(prog_name='semseg')
\ No newline at end of file
+import click
+import collections
+import os
+import json
+import laspy
+import time
+import pickle
+import itertools
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+from sklearn.metrics import f1_score
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import jaccard_score
+from sklearn.metrics import recall_score
+from sklearn.metrics import precision_score
+from sklearn.preprocessing import StandardScaler
+from sklearn.neighbors import KNeighborsClassifier
+
+def train_model(method, X_train, Y_train, **kwargs):
+    ''' 
+    Train the model with the specified parameters and return it.
+    '''
+    if (method == "RandomForest"):
+        n_estimators = kwargs.get('n_estimators', None)
+        max_depth = kwargs.get('max_depth', None)
+        n_jobs = kwargs.get('n_jobs', None)
+        criterion = kwargs.get('criterion', None)
+        min_samples_split = kwargs.get('min_samples_split', None)
+        min_samples_leaf = kwargs.get('min_samples_leaf', None)
+        min_weight_fraction_leaf = kwargs.get('min_weight_fraction_leaf', None)
+        max_features = kwargs.get('max_features', None)
+        max_leaf_nodes = kwargs.get('max_leaf_nodes', None)
+        min_impurity_decrease = kwargs.get('min_impurity_decrease', None)
+        bootstrap = kwargs.get('bootstrap', None)
+        oob_score = kwargs.get('oob_score', None)
+        random_state = kwargs.get('random_state', None)
+        verbose = kwargs.get('verbose', None)
+        warm_start = kwargs.get('warm_start', None)
+        class_weight = kwargs.get('class_weight', None)
+        ccp_alpha = kwargs.get('ccp_alpha', None)
+        max_samples = kwargs.get('max_samples', None)
+
+        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples)
+    
+    elif (method == "GradientBoosting"):
+        loss = kwargs.get('loss', None)
+        learning_rate = kwargs.get('learning_rate', None)
+        n_estimators = kwargs.get('n_estimators', None)
+        subsample = kwargs.get('subsample', None)
+        criterion = kwargs.get('criterion', None)
+        min_samples_split = kwargs.get('min_samples_split', None)
+        min_samples_leaf = kwargs.get('min_samples_leaf', None)
+        min_weight_fraction_leaf = kwargs.get('min_weight_fraction_leaf', None)
+        max_depth = kwargs.get('max_depths', None)
+        min_impurity_decrease = kwargs.get('min_impurity_decrease', None)
+        init = kwargs.get('init', None)
+        random_state = kwargs.get('random_state', None)
+        max_features = kwargs.get('max_features', None)
+        verbose = kwargs.get('verbose', None)
+        max_leaf_nodes = kwargs.get('max_leaf_nodes', None)
+        warm_start = kwargs.get('warm_start', None)
+        validation_fraction = kwargs.get('validation_fraction', None)
+        n_iter_no_change = kwargs.get('n_iter_no_change', None)
+        tol = kwargs.get('tol', None)
+        ccp_alpha = kwargs.get('ccp_alpha', None)
+
+        model = GradientBoostingClassifier(loss=loss, learning_rate=learning_rate, n_estimators=n_estimators, subsample=subsample, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_depth=max_depth, min_impurity_decrease=min_impurity_decrease, init=init, random_state=random_state, max_features=max_features, verbose=verbose, max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha)
+
+    model.fit(X_train, Y_train)
+    return model
+
+def save_model(model, filename):
+    ''' Save the trained machine learning model as .pkl file
+        Attribures:
+            model (np.RandomForestClassifier)   :   Model to save
+            filename (string)                   :   Model output file
+    '''
+    with open(filename, 'wb') as out:
+        pickle.dump(model, out, pickle.HIGHEST_PROTOCOL)
+
+def read_model(filepath):
+    ''' Read the Random Forest model from a .pkl file
+        Attributes:
+            filepath (string)   :   Path to the .pkl file
+    '''
+    return pickle.load(open(filepath, 'rb'))
+
+class OrderedGroup(click.Group):
+    def __init__(self, name=None, commands=None, **attrs):
+        super(OrderedGroup, self).__init__(name, commands, **attrs)
+        self.commands = commands or collections.OrderedDict()
+
+    def list_commands(self, ctx):
+        return self.commands
+
+@click.group(cls=OrderedGroup, help="CLI tool to perform semantic segmentation of 3D point clouds using Machine Learning algorithms.")
+def cli():
+    pass
+
+@click.command()
+@click.argument('config', type=click.Path(exists=True), required=True)
+@click.option('--method', help='Learning method for classification.', type=click.Choice(['RandomForest', 'GradientBoosting']), default="RandomForest", required=False, show_default=True)
+
+def train(config, method):
+    '''
+    Train the model for semantic segmentation of 3D point clouds.
+    '''
+    if (os.path.exists("./output")==False):
+        os.mkdir("./output")
+
+    if (os.path.exists("./output/model")==False):
+        os.mkdir("./output/model")
+
+    with open(config) as file:
+        configuration = json.load(file)
+
+
+    # Read train and validation data from a file
+    debug = True
+
+    file = laspy.read(configuration["training_data"])
+    features = configuration["features"]
+
+    fields = [field.name for field in file.point_format]
+    if ('classification' in fields):
+        Y = np.asarray(file.classification, dtype=np.float32)
+        fields.remove('classification')
+
+    X = np.asarray(np.column_stack([getattr(file, field) for field in features]), dtype=np.float32)
+
+    # Load data
+    print('\nLoading data')  
+    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
+    print('\tTraining samples: {}\n\tTesting samples: {}\n\tUsing features: {}'.format(len(Y_train), len(Y_test), features))
+
+    if (method == "RandomForest"):
+        # RF parameters
+        n_estimators = configuration["parameters"]["RandomForest"]["n_estimators"]
+        criterion = configuration["parameters"]["RandomForest"]["criterion"]
+        max_depths = [configuration["parameters"]["RandomForest"]["max_depths"]]
+        min_samples_split = configuration["parameters"]["RandomForest"]["min_samples_split"]
+        min_samples_leaf = configuration["parameters"]["RandomForest"]["min_samples_leaf"]
+        min_weight_fraction_leaf = configuration["parameters"]["RandomForest"]["min_weight_fraction_leaf"]
+        max_features = configuration["parameters"]["RandomForest"]["max_features"]
+        max_leaf_nodes = configuration["parameters"]["RandomForest"]["max_leaf_nodes"]
+        min_impurity_decrease = configuration["parameters"]["RandomForest"]["min_impurity_decrease"]
+        bootstrap = configuration["parameters"]["RandomForest"]["bootstrap"]
+        oob_score = configuration["parameters"]["RandomForest"]["oob_score"]
+        n_jobs = configuration["parameters"]["RandomForest"]["n_jobs"]
+        random_state = configuration["parameters"]["RandomForest"]["random_state"]
+        verbose = configuration["parameters"]["RandomForest"]["verbose"]
+        warm_start = configuration["parameters"]["RandomForest"]["warm_start"]
+        class_weight = configuration["parameters"]["RandomForest"]["class_weight"]
+        ccp_alpha = configuration["parameters"]["RandomForest"]["ccp_alpha"]
+        max_samples = configuration["parameters"]["RandomForest"]["max_samples"]
+
+    elif (method == "GradientBoosting"):
+        n_estimators = configuration["parameters"]["GradientBoosting"]["n_estimators"]
+        loss = configuration["parameters"]["GradientBoosting"]["loss"]
+        learning_rate = configuration["parameters"]["GradientBoosting"]["learning_rate"]
+        subsample = configuration["parameters"]["GradientBoosting"]["subsample"]
+        criterion = configuration["parameters"]["GradientBoosting"]["criterion"]
+        min_samples_split = configuration["parameters"]["GradientBoosting"]["min_samples_split"]
+        min_samples_leaf = configuration["parameters"]["GradientBoosting"]["min_samples_leaf"]
+        min_weight_fraction_leaf = configuration["parameters"]["GradientBoosting"]["min_weight_fraction_leaf"]
+        max_depths = configuration["parameters"]["GradientBoosting"]["max_depths"]
+        min_impurity_decrease = configuration["parameters"]["GradientBoosting"]["min_impurity_decrease"]
+        init = configuration["parameters"]["GradientBoosting"]["init"]
+        random_state = configuration["parameters"]["GradientBoosting"]["random_state"]
+        max_features = configuration["parameters"]["GradientBoosting"]["max_features"]
+        verbose = configuration["parameters"]["GradientBoosting"]["verbose"]
+        max_leaf_nodes = configuration["parameters"]["GradientBoosting"]["max_leaf_nodes"]
+        warm_start = configuration["parameters"]["GradientBoosting"]["warm_start"]
+        validation_fraction = configuration["parameters"]["GradientBoosting"]["validation_fraction"]
+        n_iter_no_change = configuration["parameters"]["GradientBoosting"]["n_iter_no_change"]
+        tol = configuration["parameters"]["GradientBoosting"]["tol"]
+        ccp_alpha = configuration["parameters"]["GradientBoosting"]["ccp_alpha"]
+
+    # Scale the features using StandardScaler
+    scaler = StandardScaler()
+    X_train = scaler.fit_transform(X_train)
+    X_test = scaler.transform(X_test)
+
+    # Train the model
+    print('\nTraining the model')
+    best_conf = {'ne' : 0, 'md' : 0} # Best configuration initialisation
+    best_f1 = 0
+    f1_results=[]
+    start = time.time()
+    for ne, md in list(itertools.product(n_estimators, max_depths)): # Train the model with different parameters and pick the one having the maximum f1-score on the test-set
+        # Train the model
+        if (method == "RandomForest"):
+            model = train_model(method, X_train, Y_train, n_estimators=ne, max_depth=md, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples)
+        elif (method == "GradientBoosting"):
+            model = train_model(method, X_train, Y_train, n_estimators=ne, max_depth=md, loss=loss, learning_rate=learning_rate, subsample=subsample, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, min_impurity_decrease=min_impurity_decrease, init=init, random_state=random_state, max_features=max_features, verbose=verbose, max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha)
+        
+        Y_test_pred = model.predict(X_test)  # Test the model, using only the specified features
+            
+        acc = accuracy_score(Y_test, Y_test_pred)    # Compute metrics and update best model
+        f1 = f1_score(Y_test, Y_test_pred, average='weighted')
+        f1_results.append(f1)
+        recall=recall_score(Y_test, Y_test_pred, average='weighted')
+        precision= precision_score(Y_test, Y_test_pred, average='weighted')
+        js=jaccard_score(Y_test, Y_test_pred, average='weighted')
+        
+        if f1 > best_f1: # Update best configuration
+            best_conf['ne'] = ne
+            best_conf['md'] = md
+            best_f1 = f1
+            
+        if debug: print('\tne: {}, md: {} - acc: {} f1: {} precision:{} recall:{} js: {} oob_score: {}'.format(ne, md, acc, f1, precision, recall, js, model.oob_score))
+
+    if (len(n_estimators) == 1):
+        pass
+    else:
+        model = train_model(X_train, Y_train, best_conf['ne'], best_conf['md'], criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples)
+    
+    save_model(model, './output/model/ne{}_md{}.pkl'.format(best_conf['ne'], best_conf['md']))
+
+    end = time.time()
+    processTime = end - start
+
+    print('\n\tBest parameters: ne: {}, md: {}'.format(n_estimators, max_depths))
+    print('\tFeature importance:\n{}'.format(model.feature_importances_))
+    print('\tConfusion matrix:\n{}'.format(confusion_matrix(Y_test, Y_test_pred)))
+    print('\tTraining time: {} seconds'.format(time.strftime("%H:%M:%S", time.gmtime(processTime))))
+
+
+@click.command()
+@click.argument('config', type=click.Path(exists=True), required=True)
+@click.argument('pointcloud', type=click.Path(exists=True), required=True)
+@click.argument('model', type=click.Path(exists=True), required=True)
+@click.option('--regularize', help='If checked the input data will be regularized.', type=bool, default=False, required=False, show_default=True)
+@click.option('-k', help='Number of neighbors to use if regularization is set.', type=click.INT, default=10, required=False, show_default=True)
+@click.option('--filename', help='Write the classified point cloud in a .LAS file.', type=click.Path(exists=False), required=True, show_default=True)
+
+def predict(config, pointcloud, model, regularize, k, filename):
+    '''
+    Perform semantic segmentation using pre-trained model.
+    '''
+    if (os.path.exists("./output")==False):
+        os.mkdir("./output")
+
+    if (os.path.exists("./output/prediction")==False):
+        os.mkdir("./output/prediction")
+
+    with open(config) as file:
+        configuration = json.load(file)
+
+    features = configuration["features"]
+
+    start = time.time()
+    # Load the model
+    model = read_model(model)
+
+    # Read data
+    file = laspy.read(pointcloud)
+    X = np.asarray(np.column_stack([getattr(file, field) for field in features]), dtype=np.float32)
+
+    # Perform semantic segmentation
+    print ('Classifying the dataset')
+    Y = model.predict(X)
+
+    # Regularization
+    if (regularize):
+        neigh = KNeighborsClassifier(n_neighbors=k, algorithm='kd_tree', n_jobs=-1)
+        neigh.fit(X, Y)
+        Y = neigh.predict(X)
+
+    # Save the results in LAS file
+    header = file.header
+    las = laspy.LasData(header)
+    las.points = file.points
+    las.classification = Y
+
+    # Export results
+    las.write("./output/prediction/{}".format(filename))
+
+    end = time.time()
+    processTime = end - start
+    print('Data classified in: {}'.format(time.strftime("%H:%M:%S", time.gmtime(processTime))))
+
+
+cli.add_command(train)
+cli.add_command(predict)
+
+if __name__ == '__main__':
+    cli(prog_name='sml')
\ No newline at end of file
diff --git a/config.json b/config/config.json
similarity index 97%
rename from config.json
rename to config/config.json
index d3b1a4e..26b6ae0 100644
--- a/config.json
+++ b/config/config.json
@@ -1,49 +1,49 @@
-{
-    "features": ["red", "blue", "green", "Verticality16", "Verticality8", "Linearity16", "Linearity8", "Planarity16", "Planarity8", "Surfacevariation5", "Numberneighbors10"],
-    "label": ["Ground", "Vegetation", "Rail", "Catenary pole", "Cable", "Infrastructure"],
-    "training_data": "C:/Users/Administrateur/Desktop/railway.las",
-    "parameters": {
-        "RandomForest": {
-            "n_estimators": [50],
-            "criterion": "entropy",
-            "max_depths": null,
-            "min_samples_split": 4,
-            "min_samples_leaf": 4,
-            "min_weight_fraction_leaf": 0,
-            "max_features": "sqrt",
-            "max_leaf_nodes": null,
-            "min_impurity_decrease": 0.0,
-            "bootstrap": true,
-            "oob_score": false,
-            "n_jobs": -1,
-            "random_state": null,
-            "verbose": 0,
-            "warm_start": false,
-            "class_weight": null,
-            "ccp_alpha": 0.0,
-            "max_samples": null
-        },
-        "GradientBoosting": {
-            "n_estimators": [50],
-            "loss":"log_loss",
-            "learning_rate":0.1,
-            "subsample": 1.0,
-            "criterion": "friedman_mse",
-            "min_samples_split": 2,
-            "min_samples_leaf": 1,
-            "min_weight_fraction_leaf": 0.0,
-            "max_depths": [3],
-            "min_impurity_decrease": 0.0,
-            "init": null,
-            "random_state": null,
-            "max_features": null,
-            "verbose": 0,
-            "max_leaf_nodes": null,
-            "warm_start": false,
-            "validation_fraction": 0.1,
-            "n_iter_no_change": null,
-            "tol": 1e-4,
-            "ccp_alpha": 0.0
-        }
-    }
+{
+    "features": ["red", "blue", "green", "Verticality16", "Verticality8", "Linearity16", "Linearity8", "Planarity16", "Planarity8", "Surfacevariation5", "Numberneighbors10"],
+    "label": ["Ground", "Vegetation", "Rail", "Catenary pole", "Cable", "Infrastructure"],
+    "training_data": "C:/Users/Administrateur/Desktop/railway.las",
+    "parameters": {
+        "RandomForest": {
+            "n_estimators": [50],
+            "criterion": "entropy",
+            "max_depths": null,
+            "min_samples_split": 4,
+            "min_samples_leaf": 4,
+            "min_weight_fraction_leaf": 0,
+            "max_features": "sqrt",
+            "max_leaf_nodes": null,
+            "min_impurity_decrease": 0.0,
+            "bootstrap": true,
+            "oob_score": false,
+            "n_jobs": -1,
+            "random_state": null,
+            "verbose": 0,
+            "warm_start": false,
+            "class_weight": null,
+            "ccp_alpha": 0.0,
+            "max_samples": null
+        },
+        "GradientBoosting": {
+            "n_estimators": [50],
+            "loss":"log_loss",
+            "learning_rate":0.1,
+            "subsample": 1.0,
+            "criterion": "friedman_mse",
+            "min_samples_split": 2,
+            "min_samples_leaf": 1,
+            "min_weight_fraction_leaf": 0.0,
+            "max_depths": [3],
+            "min_impurity_decrease": 0.0,
+            "init": null,
+            "random_state": null,
+            "max_features": null,
+            "verbose": 0,
+            "max_leaf_nodes": null,
+            "warm_start": false,
+            "validation_fraction": 0.1,
+            "n_iter_no_change": null,
+            "tol": 1e-4,
+            "ccp_alpha": 0.0
+        }
+    }
 }
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..78a1222
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,25 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='SemanticML',
+    version='0.1.1',
+    description="CLI tool to perform semantic segmentation of 3D point clouds using Machine Learning algorithms.",
+    long_description=open('README.md').read(),
+    long_description_content_type='text/markdown',
+    author = 'Anass Yarroudh',
+    author_email = 'ayarroudh@uliege.be',
+    url = 'https://github.com/Yarroudh/Optim3D',
+    packages=find_packages(),
+    include_package_data=True,
+    install_requires=[
+        'click==8.1.3',
+        'numpy==1.24.2',
+        'matplotlib==3.7.1',
+        'scikit-learn==1.2.2',
+        'laspy==2.4.1'
+    ],
+    entry_points='''
+        [console_scripts]
+        sml=SemanticML.main:cli
+    '''
+)