From 03db9081da53123f1da5a451a6fbbe7807c92973 Mon Sep 17 00:00:00 2001 From: Anass Yarroudh Date: Tue, 18 Apr 2023 23:22:28 +0200 Subject: [PATCH] New updates --- README.md | 38 +- semseg.py => SemanticML/main.py | 582 +++++++++++++++--------------- config.json => config/config.json | 96 ++--- setup.py | 25 ++ 4 files changed, 389 insertions(+), 352 deletions(-) rename semseg.py => SemanticML/main.py (97%) rename config.json => config/config.json (97%) create mode 100644 setup.py diff --git a/README.md b/README.md index dada055..11e2287 100644 --- a/README.md +++ b/README.md @@ -15,14 +15,26 @@ Semantic segmentation of point clouds is the process of classifying each point i ## Installation -The easiest way to install Semseg on Windows is to use the binary package on the [Release page](https://github.com/Yarroudh/Semseg/releases/tag/Semseg). In case you can not use the Windows installer, or if you are using a different operating system, you can build everything from source. +You can install SemanticML from PyPI by easily running: + +```bash +pip install semanticml +``` + +You can also build everything from source: + +```bash +git clone https://github.com/Yarroudh/SemanticML +cd SemanticML +python setup.py install +``` ## Usage of the CLI -After installation, you have a small program called semseg. Use semseg --help to see the detailed help: +After installation, you have a small program called sml. Use sml --help to see the detailed help: ``` -Usage: semseg [OPTIONS] COMMAND [ARGS]... +Usage: sml [OPTIONS] COMMAND [ARGS]... CLI tool to perform semantic segmentation of 3D point clouds using Machine Learning techniques. @@ -41,10 +53,10 @@ The process consists of two distinct steps or commands : Model training is the process of using a set of labeled data, known as the training dataset, to adjust the parameters of Random Forest algorithm so that it can make accurate predictions on new, unseen data. The process of training a model involves providing the model with input-output pairs, where the input represents the features of the data and the output represents the desired label or prediction. The model then adjusts its internal parameters to minimize the difference between its predictions and the true labels. The goal is to find the set of parameters that result in the lowest prediction error on the training data. -This is done using the first command train. Use semseg train --help to see the detailed help: +This is done using the first command train. Use sml train --help to see the detailed help: ``` -Usage: semseg train [OPTIONS] CONFIG +Usage: sml train [OPTIONS] CONFIG Train the model for semantic segmentation of 3D point clouds. @@ -112,19 +124,19 @@ The input data is a LAS file with specified features and classificationpickle file in ./output/model. @@ -134,10 +146,10 @@ Pickling a model and saving it to disk allows you to save the state of the model Once the model is trained, it can be used to make predictions on new, unseen data. This is done by providing the model with input data, and the model generates an output, which is a LAS file with classification as a new scalar field. -This is done using the second command predict. Use semseg predict --help to see the detailed help: +This is done using the second command predict. Use sml predict --help to see the detailed help: ``` -Usage: semseg predict [OPTIONS] CONFIG POINTCLOUD MODEL +Usage: sml predict [OPTIONS] CONFIG POINTCLOUD MODEL Perform semantic segmentation using pre-trained model. @@ -154,7 +166,7 @@ Options: #### Basic usage ``` -semseg predict config.json unclassified.las ./output/model/ne60_mdNone.pkl --filename classified.las +sml predict config.json unclassified.las ./output/model/ne60_mdNone.pkl --filename classified.las ``` This uses the trained model stored as pickle file ne60_mdNone.pkl to perfoem semantic segmentation on unclassified.las. The output is named classified.las and can be found in the folder ./output/prediction. @@ -168,7 +180,7 @@ The rendering of the Random Forest can be improved by an algorithm that reduces This allows to update the value of the classification for specific points to a value determined by a K-nearest neighbors vote. ``` -semseg predict config.json unclassified.las ./output/model/ne60_mdNone.pkl --filename classified.las --regularize True -k 30 +sml predict config.json unclassified.las ./output/model/ne60_mdNone.pkl --filename classified.las --regularize True -k 30 ``` In this example, regularization is enabled and the number of neighbors to use is 30. @@ -192,5 +204,5 @@ If you find our work useful in your research, please consider citing: } ``` -## About Semseg +## About SemanticML This software was developped by [Kharroubi Abderrazzaq](https://github.com/akharroubi) and [Anass Yarroudh](https://github.com/Yarroudh), researchers at the Geomatics Unit of the University of Liege. For more detailed information please contact us via or , we are pleased to send you the necessary information. diff --git a/semseg.py b/SemanticML/main.py similarity index 97% rename from semseg.py rename to SemanticML/main.py index de38746..b98c320 100644 --- a/semseg.py +++ b/SemanticML/main.py @@ -1,291 +1,291 @@ -import click -import collections -import os -import json -import laspy -import time -import pickle -import itertools -import numpy as np -import matplotlib.pyplot as plt -from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score -from sklearn.metrics import f1_score -from sklearn.metrics import confusion_matrix -from sklearn.metrics import jaccard_score -from sklearn.metrics import recall_score -from sklearn.metrics import precision_score -from sklearn.preprocessing import StandardScaler -from sklearn.neighbors import KNeighborsClassifier - -def train_model(method, X_train, Y_train, **kwargs): - ''' - Train the model with the specified parameters and return it. - ''' - if (method == "RandomForest"): - n_estimators = kwargs.get('n_estimators', None) - max_depth = kwargs.get('max_depth', None) - n_jobs = kwargs.get('n_jobs', None) - criterion = kwargs.get('criterion', None) - min_samples_split = kwargs.get('min_samples_split', None) - min_samples_leaf = kwargs.get('min_samples_leaf', None) - min_weight_fraction_leaf = kwargs.get('min_weight_fraction_leaf', None) - max_features = kwargs.get('max_features', None) - max_leaf_nodes = kwargs.get('max_leaf_nodes', None) - min_impurity_decrease = kwargs.get('min_impurity_decrease', None) - bootstrap = kwargs.get('bootstrap', None) - oob_score = kwargs.get('oob_score', None) - random_state = kwargs.get('random_state', None) - verbose = kwargs.get('verbose', None) - warm_start = kwargs.get('warm_start', None) - class_weight = kwargs.get('class_weight', None) - ccp_alpha = kwargs.get('ccp_alpha', None) - max_samples = kwargs.get('max_samples', None) - - model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples) - - elif (method == "GradientBoosting"): - loss = kwargs.get('loss', None) - learning_rate = kwargs.get('learning_rate', None) - n_estimators = kwargs.get('n_estimators', None) - subsample = kwargs.get('subsample', None) - criterion = kwargs.get('criterion', None) - min_samples_split = kwargs.get('min_samples_split', None) - min_samples_leaf = kwargs.get('min_samples_leaf', None) - min_weight_fraction_leaf = kwargs.get('min_weight_fraction_leaf', None) - max_depth = kwargs.get('max_depths', None) - min_impurity_decrease = kwargs.get('min_impurity_decrease', None) - init = kwargs.get('init', None) - random_state = kwargs.get('random_state', None) - max_features = kwargs.get('max_features', None) - verbose = kwargs.get('verbose', None) - max_leaf_nodes = kwargs.get('max_leaf_nodes', None) - warm_start = kwargs.get('warm_start', None) - validation_fraction = kwargs.get('validation_fraction', None) - n_iter_no_change = kwargs.get('n_iter_no_change', None) - tol = kwargs.get('tol', None) - ccp_alpha = kwargs.get('ccp_alpha', None) - - model = GradientBoostingClassifier(loss=loss, learning_rate=learning_rate, n_estimators=n_estimators, subsample=subsample, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_depth=max_depth, min_impurity_decrease=min_impurity_decrease, init=init, random_state=random_state, max_features=max_features, verbose=verbose, max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha) - - model.fit(X_train, Y_train) - return model - -def save_model(model, filename): - ''' Save the trained machine learning model as .pkl file - Attribures: - model (np.RandomForestClassifier) : Model to save - filename (string) : Model output file - ''' - with open(filename, 'wb') as out: - pickle.dump(model, out, pickle.HIGHEST_PROTOCOL) - -def read_model(filepath): - ''' Read the Random Forest model from a .pkl file - Attributes: - filepath (string) : Path to the .pkl file - ''' - return pickle.load(open(filepath, 'rb')) - -class OrderedGroup(click.Group): - def __init__(self, name=None, commands=None, **attrs): - super(OrderedGroup, self).__init__(name, commands, **attrs) - self.commands = commands or collections.OrderedDict() - - def list_commands(self, ctx): - return self.commands - -@click.group(cls=OrderedGroup, help="CLI tool to perform semantic segmentation of 3D point clouds using Random Forest algorithm.") -def cli(): - pass - -@click.command() -@click.argument('config', type=click.Path(exists=True), required=True) -@click.option('--method', help='Learning method for classification.', type=click.Choice(['RandomForest', 'GradientBoosting']), default="RandomForest", required=False, show_default=True) - -def train(config, method): - ''' - Train the model for semantic segmentation of 3D point clouds. - ''' - if (os.path.exists("./output")==False): - os.mkdir("./output") - - if (os.path.exists("./output/model")==False): - os.mkdir("./output/model") - - with open(config) as file: - configuration = json.load(file) - - - # Read train and validation data from a file - debug = True - - file = laspy.read(configuration["training_data"]) - features = configuration["features"] - - fields = [field.name for field in file.point_format] - if ('classification' in fields): - Y = np.asarray(file.classification, dtype=np.float32) - fields.remove('classification') - - X = np.asarray(np.column_stack([getattr(file, field) for field in features]), dtype=np.float32) - - # Load data - print('\nLoading data') - X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) - print('\tTraining samples: {}\n\tTesting samples: {}\n\tUsing features: {}'.format(len(Y_train), len(Y_test), features)) - - if (method == "RandomForest"): - # RF parameters - n_estimators = configuration["parameters"]["RandomForest"]["n_estimators"] - criterion = configuration["parameters"]["RandomForest"]["criterion"] - max_depths = [configuration["parameters"]["RandomForest"]["max_depths"]] - min_samples_split = configuration["parameters"]["RandomForest"]["min_samples_split"] - min_samples_leaf = configuration["parameters"]["RandomForest"]["min_samples_leaf"] - min_weight_fraction_leaf = configuration["parameters"]["RandomForest"]["min_weight_fraction_leaf"] - max_features = configuration["parameters"]["RandomForest"]["max_features"] - max_leaf_nodes = configuration["parameters"]["RandomForest"]["max_leaf_nodes"] - min_impurity_decrease = configuration["parameters"]["RandomForest"]["min_impurity_decrease"] - bootstrap = configuration["parameters"]["RandomForest"]["bootstrap"] - oob_score = configuration["parameters"]["RandomForest"]["oob_score"] - n_jobs = configuration["parameters"]["RandomForest"]["n_jobs"] - random_state = configuration["parameters"]["RandomForest"]["random_state"] - verbose = configuration["parameters"]["RandomForest"]["verbose"] - warm_start = configuration["parameters"]["RandomForest"]["warm_start"] - class_weight = configuration["parameters"]["RandomForest"]["class_weight"] - ccp_alpha = configuration["parameters"]["RandomForest"]["ccp_alpha"] - max_samples = configuration["parameters"]["RandomForest"]["max_samples"] - - elif (method == "GradientBoosting"): - n_estimators = configuration["parameters"]["GradientBoosting"]["n_estimators"] - loss = configuration["parameters"]["GradientBoosting"]["loss"] - learning_rate = configuration["parameters"]["GradientBoosting"]["learning_rate"] - subsample = configuration["parameters"]["GradientBoosting"]["subsample"] - criterion = configuration["parameters"]["GradientBoosting"]["criterion"] - min_samples_split = configuration["parameters"]["GradientBoosting"]["min_samples_split"] - min_samples_leaf = configuration["parameters"]["GradientBoosting"]["min_samples_leaf"] - min_weight_fraction_leaf = configuration["parameters"]["GradientBoosting"]["min_weight_fraction_leaf"] - max_depths = configuration["parameters"]["GradientBoosting"]["max_depths"] - min_impurity_decrease = configuration["parameters"]["GradientBoosting"]["min_impurity_decrease"] - init = configuration["parameters"]["GradientBoosting"]["init"] - random_state = configuration["parameters"]["GradientBoosting"]["random_state"] - max_features = configuration["parameters"]["GradientBoosting"]["max_features"] - verbose = configuration["parameters"]["GradientBoosting"]["verbose"] - max_leaf_nodes = configuration["parameters"]["GradientBoosting"]["max_leaf_nodes"] - warm_start = configuration["parameters"]["GradientBoosting"]["warm_start"] - validation_fraction = configuration["parameters"]["GradientBoosting"]["validation_fraction"] - n_iter_no_change = configuration["parameters"]["GradientBoosting"]["n_iter_no_change"] - tol = configuration["parameters"]["GradientBoosting"]["tol"] - ccp_alpha = configuration["parameters"]["GradientBoosting"]["ccp_alpha"] - - # Scale the features using StandardScaler - scaler = StandardScaler() - X_train = scaler.fit_transform(X_train) - X_test = scaler.transform(X_test) - - # Train the model - print('\nTraining the model') - best_conf = {'ne' : 0, 'md' : 0} # Best configuration initialisation - best_f1 = 0 - f1_results=[] - start = time.time() - for ne, md in list(itertools.product(n_estimators, max_depths)): # Train the model with different parameters and pick the one having the maximum f1-score on the test-set - # Train the model - if (method == "RandomForest"): - model = train_model(method, X_train, Y_train, n_estimators=ne, max_depth=md, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples) - elif (method == "GradientBoosting"): - model = train_model(method, X_train, Y_train, n_estimators=ne, max_depth=md, loss=loss, learning_rate=learning_rate, subsample=subsample, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, min_impurity_decrease=min_impurity_decrease, init=init, random_state=random_state, max_features=max_features, verbose=verbose, max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha) - - Y_test_pred = model.predict(X_test) # Test the model, using only the specified features - - acc = accuracy_score(Y_test, Y_test_pred) # Compute metrics and update best model - f1 = f1_score(Y_test, Y_test_pred, average='weighted') - f1_results.append(f1) - recall=recall_score(Y_test, Y_test_pred, average='weighted') - precision= precision_score(Y_test, Y_test_pred, average='weighted') - js=jaccard_score(Y_test, Y_test_pred, average='weighted') - - if f1 > best_f1: # Update best configuration - best_conf['ne'] = ne - best_conf['md'] = md - best_f1 = f1 - - if debug: print('\tne: {}, md: {} - acc: {} f1: {} precision:{} recall:{} js: {} oob_score: {}'.format(ne, md, acc, f1, precision, recall, js, model.oob_score)) - - if (len(n_estimators) == 1): - pass - else: - model = train_model(X_train, Y_train, best_conf['ne'], best_conf['md'], criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples) - - save_model(model, './output/model/ne{}_md{}.pkl'.format(best_conf['ne'], best_conf['md'])) - - end = time.time() - processTime = end - start - - print('\n\tBest parameters: ne: {}, md: {}'.format(n_estimators, max_depths)) - print('\tFeature importance:\n{}'.format(model.feature_importances_)) - print('\tConfusion matrix:\n{}'.format(confusion_matrix(Y_test, Y_test_pred))) - print('\tTraining time: {} seconds'.format(time.strftime("%H:%M:%S", time.gmtime(processTime)))) - - -@click.command() -@click.argument('config', type=click.Path(exists=True), required=True) -@click.argument('pointcloud', type=click.Path(exists=True), required=True) -@click.argument('model', type=click.Path(exists=True), required=True) -@click.option('--regularize', help='If checked the input data will be regularized.', type=bool, default=False, required=False, show_default=True) -@click.option('-k', help='Number of neighbors to use if regularization is set.', type=click.INT, default=10, required=False, show_default=True) -@click.option('--filename', help='Write the classified point cloud in a .LAS file.', type=click.Path(exists=False), required=True, show_default=True) - -def predict(config, pointcloud, model, regularize, k, filename): - ''' - Perform semantic segmentation using pre-trained model. - ''' - if (os.path.exists("./output")==False): - os.mkdir("./output") - - if (os.path.exists("./output/prediction")==False): - os.mkdir("./output/prediction") - - with open(config) as file: - configuration = json.load(file) - - features = configuration["features"] - - start = time.time() - # Load the model - model = read_model(model) - - # Read data - file = laspy.read(pointcloud) - X = np.asarray(np.column_stack([getattr(file, field) for field in features]), dtype=np.float32) - - # Perform semantic segmentation - print ('Classifying the dataset') - Y = model.predict(X) - - # Regularization - if (regularize): - neigh = KNeighborsClassifier(n_neighbors=k, algorithm='kd_tree', n_jobs=-1) - neigh.fit(X, Y) - Y = neigh.predict(X) - - # Save the results in LAS file - header = file.header - las = laspy.LasData(header) - las.points = file.points - las.classification = Y - - # Export results - las.write("./output/prediction/{}".format(filename)) - - end = time.time() - processTime = end - start - print('Data classified in: {}'.format(time.strftime("%H:%M:%S", time.gmtime(processTime)))) - - -cli.add_command(train) -cli.add_command(predict) - -if __name__ == '__main__': - cli(prog_name='semseg') \ No newline at end of file +import click +import collections +import os +import json +import laspy +import time +import pickle +import itertools +import numpy as np +import matplotlib.pyplot as plt +from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score +from sklearn.metrics import f1_score +from sklearn.metrics import confusion_matrix +from sklearn.metrics import jaccard_score +from sklearn.metrics import recall_score +from sklearn.metrics import precision_score +from sklearn.preprocessing import StandardScaler +from sklearn.neighbors import KNeighborsClassifier + +def train_model(method, X_train, Y_train, **kwargs): + ''' + Train the model with the specified parameters and return it. + ''' + if (method == "RandomForest"): + n_estimators = kwargs.get('n_estimators', None) + max_depth = kwargs.get('max_depth', None) + n_jobs = kwargs.get('n_jobs', None) + criterion = kwargs.get('criterion', None) + min_samples_split = kwargs.get('min_samples_split', None) + min_samples_leaf = kwargs.get('min_samples_leaf', None) + min_weight_fraction_leaf = kwargs.get('min_weight_fraction_leaf', None) + max_features = kwargs.get('max_features', None) + max_leaf_nodes = kwargs.get('max_leaf_nodes', None) + min_impurity_decrease = kwargs.get('min_impurity_decrease', None) + bootstrap = kwargs.get('bootstrap', None) + oob_score = kwargs.get('oob_score', None) + random_state = kwargs.get('random_state', None) + verbose = kwargs.get('verbose', None) + warm_start = kwargs.get('warm_start', None) + class_weight = kwargs.get('class_weight', None) + ccp_alpha = kwargs.get('ccp_alpha', None) + max_samples = kwargs.get('max_samples', None) + + model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples) + + elif (method == "GradientBoosting"): + loss = kwargs.get('loss', None) + learning_rate = kwargs.get('learning_rate', None) + n_estimators = kwargs.get('n_estimators', None) + subsample = kwargs.get('subsample', None) + criterion = kwargs.get('criterion', None) + min_samples_split = kwargs.get('min_samples_split', None) + min_samples_leaf = kwargs.get('min_samples_leaf', None) + min_weight_fraction_leaf = kwargs.get('min_weight_fraction_leaf', None) + max_depth = kwargs.get('max_depths', None) + min_impurity_decrease = kwargs.get('min_impurity_decrease', None) + init = kwargs.get('init', None) + random_state = kwargs.get('random_state', None) + max_features = kwargs.get('max_features', None) + verbose = kwargs.get('verbose', None) + max_leaf_nodes = kwargs.get('max_leaf_nodes', None) + warm_start = kwargs.get('warm_start', None) + validation_fraction = kwargs.get('validation_fraction', None) + n_iter_no_change = kwargs.get('n_iter_no_change', None) + tol = kwargs.get('tol', None) + ccp_alpha = kwargs.get('ccp_alpha', None) + + model = GradientBoostingClassifier(loss=loss, learning_rate=learning_rate, n_estimators=n_estimators, subsample=subsample, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_depth=max_depth, min_impurity_decrease=min_impurity_decrease, init=init, random_state=random_state, max_features=max_features, verbose=verbose, max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha) + + model.fit(X_train, Y_train) + return model + +def save_model(model, filename): + ''' Save the trained machine learning model as .pkl file + Attribures: + model (np.RandomForestClassifier) : Model to save + filename (string) : Model output file + ''' + with open(filename, 'wb') as out: + pickle.dump(model, out, pickle.HIGHEST_PROTOCOL) + +def read_model(filepath): + ''' Read the Random Forest model from a .pkl file + Attributes: + filepath (string) : Path to the .pkl file + ''' + return pickle.load(open(filepath, 'rb')) + +class OrderedGroup(click.Group): + def __init__(self, name=None, commands=None, **attrs): + super(OrderedGroup, self).__init__(name, commands, **attrs) + self.commands = commands or collections.OrderedDict() + + def list_commands(self, ctx): + return self.commands + +@click.group(cls=OrderedGroup, help="CLI tool to perform semantic segmentation of 3D point clouds using Machine Learning algorithms.") +def cli(): + pass + +@click.command() +@click.argument('config', type=click.Path(exists=True), required=True) +@click.option('--method', help='Learning method for classification.', type=click.Choice(['RandomForest', 'GradientBoosting']), default="RandomForest", required=False, show_default=True) + +def train(config, method): + ''' + Train the model for semantic segmentation of 3D point clouds. + ''' + if (os.path.exists("./output")==False): + os.mkdir("./output") + + if (os.path.exists("./output/model")==False): + os.mkdir("./output/model") + + with open(config) as file: + configuration = json.load(file) + + + # Read train and validation data from a file + debug = True + + file = laspy.read(configuration["training_data"]) + features = configuration["features"] + + fields = [field.name for field in file.point_format] + if ('classification' in fields): + Y = np.asarray(file.classification, dtype=np.float32) + fields.remove('classification') + + X = np.asarray(np.column_stack([getattr(file, field) for field in features]), dtype=np.float32) + + # Load data + print('\nLoading data') + X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) + print('\tTraining samples: {}\n\tTesting samples: {}\n\tUsing features: {}'.format(len(Y_train), len(Y_test), features)) + + if (method == "RandomForest"): + # RF parameters + n_estimators = configuration["parameters"]["RandomForest"]["n_estimators"] + criterion = configuration["parameters"]["RandomForest"]["criterion"] + max_depths = [configuration["parameters"]["RandomForest"]["max_depths"]] + min_samples_split = configuration["parameters"]["RandomForest"]["min_samples_split"] + min_samples_leaf = configuration["parameters"]["RandomForest"]["min_samples_leaf"] + min_weight_fraction_leaf = configuration["parameters"]["RandomForest"]["min_weight_fraction_leaf"] + max_features = configuration["parameters"]["RandomForest"]["max_features"] + max_leaf_nodes = configuration["parameters"]["RandomForest"]["max_leaf_nodes"] + min_impurity_decrease = configuration["parameters"]["RandomForest"]["min_impurity_decrease"] + bootstrap = configuration["parameters"]["RandomForest"]["bootstrap"] + oob_score = configuration["parameters"]["RandomForest"]["oob_score"] + n_jobs = configuration["parameters"]["RandomForest"]["n_jobs"] + random_state = configuration["parameters"]["RandomForest"]["random_state"] + verbose = configuration["parameters"]["RandomForest"]["verbose"] + warm_start = configuration["parameters"]["RandomForest"]["warm_start"] + class_weight = configuration["parameters"]["RandomForest"]["class_weight"] + ccp_alpha = configuration["parameters"]["RandomForest"]["ccp_alpha"] + max_samples = configuration["parameters"]["RandomForest"]["max_samples"] + + elif (method == "GradientBoosting"): + n_estimators = configuration["parameters"]["GradientBoosting"]["n_estimators"] + loss = configuration["parameters"]["GradientBoosting"]["loss"] + learning_rate = configuration["parameters"]["GradientBoosting"]["learning_rate"] + subsample = configuration["parameters"]["GradientBoosting"]["subsample"] + criterion = configuration["parameters"]["GradientBoosting"]["criterion"] + min_samples_split = configuration["parameters"]["GradientBoosting"]["min_samples_split"] + min_samples_leaf = configuration["parameters"]["GradientBoosting"]["min_samples_leaf"] + min_weight_fraction_leaf = configuration["parameters"]["GradientBoosting"]["min_weight_fraction_leaf"] + max_depths = configuration["parameters"]["GradientBoosting"]["max_depths"] + min_impurity_decrease = configuration["parameters"]["GradientBoosting"]["min_impurity_decrease"] + init = configuration["parameters"]["GradientBoosting"]["init"] + random_state = configuration["parameters"]["GradientBoosting"]["random_state"] + max_features = configuration["parameters"]["GradientBoosting"]["max_features"] + verbose = configuration["parameters"]["GradientBoosting"]["verbose"] + max_leaf_nodes = configuration["parameters"]["GradientBoosting"]["max_leaf_nodes"] + warm_start = configuration["parameters"]["GradientBoosting"]["warm_start"] + validation_fraction = configuration["parameters"]["GradientBoosting"]["validation_fraction"] + n_iter_no_change = configuration["parameters"]["GradientBoosting"]["n_iter_no_change"] + tol = configuration["parameters"]["GradientBoosting"]["tol"] + ccp_alpha = configuration["parameters"]["GradientBoosting"]["ccp_alpha"] + + # Scale the features using StandardScaler + scaler = StandardScaler() + X_train = scaler.fit_transform(X_train) + X_test = scaler.transform(X_test) + + # Train the model + print('\nTraining the model') + best_conf = {'ne' : 0, 'md' : 0} # Best configuration initialisation + best_f1 = 0 + f1_results=[] + start = time.time() + for ne, md in list(itertools.product(n_estimators, max_depths)): # Train the model with different parameters and pick the one having the maximum f1-score on the test-set + # Train the model + if (method == "RandomForest"): + model = train_model(method, X_train, Y_train, n_estimators=ne, max_depth=md, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples) + elif (method == "GradientBoosting"): + model = train_model(method, X_train, Y_train, n_estimators=ne, max_depth=md, loss=loss, learning_rate=learning_rate, subsample=subsample, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, min_impurity_decrease=min_impurity_decrease, init=init, random_state=random_state, max_features=max_features, verbose=verbose, max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha) + + Y_test_pred = model.predict(X_test) # Test the model, using only the specified features + + acc = accuracy_score(Y_test, Y_test_pred) # Compute metrics and update best model + f1 = f1_score(Y_test, Y_test_pred, average='weighted') + f1_results.append(f1) + recall=recall_score(Y_test, Y_test_pred, average='weighted') + precision= precision_score(Y_test, Y_test_pred, average='weighted') + js=jaccard_score(Y_test, Y_test_pred, average='weighted') + + if f1 > best_f1: # Update best configuration + best_conf['ne'] = ne + best_conf['md'] = md + best_f1 = f1 + + if debug: print('\tne: {}, md: {} - acc: {} f1: {} precision:{} recall:{} js: {} oob_score: {}'.format(ne, md, acc, f1, precision, recall, js, model.oob_score)) + + if (len(n_estimators) == 1): + pass + else: + model = train_model(X_train, Y_train, best_conf['ne'], best_conf['md'], criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples) + + save_model(model, './output/model/ne{}_md{}.pkl'.format(best_conf['ne'], best_conf['md'])) + + end = time.time() + processTime = end - start + + print('\n\tBest parameters: ne: {}, md: {}'.format(n_estimators, max_depths)) + print('\tFeature importance:\n{}'.format(model.feature_importances_)) + print('\tConfusion matrix:\n{}'.format(confusion_matrix(Y_test, Y_test_pred))) + print('\tTraining time: {} seconds'.format(time.strftime("%H:%M:%S", time.gmtime(processTime)))) + + +@click.command() +@click.argument('config', type=click.Path(exists=True), required=True) +@click.argument('pointcloud', type=click.Path(exists=True), required=True) +@click.argument('model', type=click.Path(exists=True), required=True) +@click.option('--regularize', help='If checked the input data will be regularized.', type=bool, default=False, required=False, show_default=True) +@click.option('-k', help='Number of neighbors to use if regularization is set.', type=click.INT, default=10, required=False, show_default=True) +@click.option('--filename', help='Write the classified point cloud in a .LAS file.', type=click.Path(exists=False), required=True, show_default=True) + +def predict(config, pointcloud, model, regularize, k, filename): + ''' + Perform semantic segmentation using pre-trained model. + ''' + if (os.path.exists("./output")==False): + os.mkdir("./output") + + if (os.path.exists("./output/prediction")==False): + os.mkdir("./output/prediction") + + with open(config) as file: + configuration = json.load(file) + + features = configuration["features"] + + start = time.time() + # Load the model + model = read_model(model) + + # Read data + file = laspy.read(pointcloud) + X = np.asarray(np.column_stack([getattr(file, field) for field in features]), dtype=np.float32) + + # Perform semantic segmentation + print ('Classifying the dataset') + Y = model.predict(X) + + # Regularization + if (regularize): + neigh = KNeighborsClassifier(n_neighbors=k, algorithm='kd_tree', n_jobs=-1) + neigh.fit(X, Y) + Y = neigh.predict(X) + + # Save the results in LAS file + header = file.header + las = laspy.LasData(header) + las.points = file.points + las.classification = Y + + # Export results + las.write("./output/prediction/{}".format(filename)) + + end = time.time() + processTime = end - start + print('Data classified in: {}'.format(time.strftime("%H:%M:%S", time.gmtime(processTime)))) + + +cli.add_command(train) +cli.add_command(predict) + +if __name__ == '__main__': + cli(prog_name='sml') \ No newline at end of file diff --git a/config.json b/config/config.json similarity index 97% rename from config.json rename to config/config.json index d3b1a4e..26b6ae0 100644 --- a/config.json +++ b/config/config.json @@ -1,49 +1,49 @@ -{ - "features": ["red", "blue", "green", "Verticality16", "Verticality8", "Linearity16", "Linearity8", "Planarity16", "Planarity8", "Surfacevariation5", "Numberneighbors10"], - "label": ["Ground", "Vegetation", "Rail", "Catenary pole", "Cable", "Infrastructure"], - "training_data": "C:/Users/Administrateur/Desktop/railway.las", - "parameters": { - "RandomForest": { - "n_estimators": [50], - "criterion": "entropy", - "max_depths": null, - "min_samples_split": 4, - "min_samples_leaf": 4, - "min_weight_fraction_leaf": 0, - "max_features": "sqrt", - "max_leaf_nodes": null, - "min_impurity_decrease": 0.0, - "bootstrap": true, - "oob_score": false, - "n_jobs": -1, - "random_state": null, - "verbose": 0, - "warm_start": false, - "class_weight": null, - "ccp_alpha": 0.0, - "max_samples": null - }, - "GradientBoosting": { - "n_estimators": [50], - "loss":"log_loss", - "learning_rate":0.1, - "subsample": 1.0, - "criterion": "friedman_mse", - "min_samples_split": 2, - "min_samples_leaf": 1, - "min_weight_fraction_leaf": 0.0, - "max_depths": [3], - "min_impurity_decrease": 0.0, - "init": null, - "random_state": null, - "max_features": null, - "verbose": 0, - "max_leaf_nodes": null, - "warm_start": false, - "validation_fraction": 0.1, - "n_iter_no_change": null, - "tol": 1e-4, - "ccp_alpha": 0.0 - } - } +{ + "features": ["red", "blue", "green", "Verticality16", "Verticality8", "Linearity16", "Linearity8", "Planarity16", "Planarity8", "Surfacevariation5", "Numberneighbors10"], + "label": ["Ground", "Vegetation", "Rail", "Catenary pole", "Cable", "Infrastructure"], + "training_data": "C:/Users/Administrateur/Desktop/railway.las", + "parameters": { + "RandomForest": { + "n_estimators": [50], + "criterion": "entropy", + "max_depths": null, + "min_samples_split": 4, + "min_samples_leaf": 4, + "min_weight_fraction_leaf": 0, + "max_features": "sqrt", + "max_leaf_nodes": null, + "min_impurity_decrease": 0.0, + "bootstrap": true, + "oob_score": false, + "n_jobs": -1, + "random_state": null, + "verbose": 0, + "warm_start": false, + "class_weight": null, + "ccp_alpha": 0.0, + "max_samples": null + }, + "GradientBoosting": { + "n_estimators": [50], + "loss":"log_loss", + "learning_rate":0.1, + "subsample": 1.0, + "criterion": "friedman_mse", + "min_samples_split": 2, + "min_samples_leaf": 1, + "min_weight_fraction_leaf": 0.0, + "max_depths": [3], + "min_impurity_decrease": 0.0, + "init": null, + "random_state": null, + "max_features": null, + "verbose": 0, + "max_leaf_nodes": null, + "warm_start": false, + "validation_fraction": 0.1, + "n_iter_no_change": null, + "tol": 1e-4, + "ccp_alpha": 0.0 + } + } } \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..78a1222 --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +from setuptools import setup, find_packages + +setup( + name='SemanticML', + version='0.1.1', + description="CLI tool to perform semantic segmentation of 3D point clouds using Machine Learning algorithms.", + long_description=open('README.md').read(), + long_description_content_type='text/markdown', + author = 'Anass Yarroudh', + author_email = 'ayarroudh@uliege.be', + url = 'https://github.com/Yarroudh/Optim3D', + packages=find_packages(), + include_package_data=True, + install_requires=[ + 'click==8.1.3', + 'numpy==1.24.2', + 'matplotlib==3.7.1', + 'scikit-learn==1.2.2', + 'laspy==2.4.1' + ], + entry_points=''' + [console_scripts] + sml=SemanticML.main:cli + ''' +)