diff --git a/src/utils/bb_extraction.py b/src/utils/bb_extraction.py new file mode 100644 index 0000000..748fd31 --- /dev/null +++ b/src/utils/bb_extraction.py @@ -0,0 +1,160 @@ +""" +This script is designed to process a collection of .laz files (containing LiDAR point cloud data) located in a specified directory. +The primary function of the script is to create bounding boxes around certain points of interest (poles) identified in an external Excel file +and extract the relevant point cloud data from each .laz file into new, separate .laz files. + +Key Features: +1. Reads .laz files from a given directory and extracts numerical values from their filenames. These numbers, once identified, are used to + determine specific sections of an Excel file (which contains coordinates of interest) to focus on. +2. Filters the coordinates in the Excel file based on the extracted values from each .laz file's name. This filtering narrows down the relevant + point coordinates around which bounding boxes will be created. +3. For each point of interest (pole), a bounding box is defined and applied to the corresponding .laz file to isolate the relevant points in the point cloud. +4. Extracts and writes these filtered points into new .laz files, each named according to the original file and the index of the pole in the data frame. + +Usage: +- The script requires laspy, re, pandas, numpy, and os libraries. +- In +""" + +import laspy +import re +import pandas as pd +import numpy as np +import os +import sys + + +def extract_numbers_from_string(s): + """ + Extracts two numbers from a string that follows a specific pattern and multiplies them by 50. + + The function uses a regular expression to search for a pattern 'final__.laz' in the given string. + If the pattern is found, it extracts the two numbers, multiplies them each by 50, and returns them. + + Parameters: + s (str): The string from which to extract the numbers. + + Returns: + tuple of int: A tuple containing the two extracted numbers multiplied by 50. + Returns None if the pattern does not match. + """ + + match = re.search(r'final_(\d+)_(\d+).laz', s) + if match: + # Extracting the two numbers + num1, num2 = match.groups() + return int(num1)*50 , int(num2)*50 + else: + # Return a default value or raise an error if the format doesn't match + return None + +def filter_pole_coordinates(filename, base_x, base_y): + """ + Filters pole coordinates from an Excel file based on specified base values for 'Grond X', 'Top X', 'Grond Y', and 'Top Y'. + + Parameters: + filename (str): Path to the Excel file. + base_x (float): The base value for filtering X coordinates. + base_y (float): The base value for filtering Y coordinates. + + Returns: + pandas.DataFrame: Filtered DataFrame with only the rows where 'Grond X', 'Top X', 'Grond Y', and 'Top Y' + fall within the specified range of base_x/base_y to base_x/base_y + 50. + """ + + # Read the Excel file + df = pd.read_excel(filename) + + # Assuming that the decimal separator is a comma, replace it with a dot and convert to float + df['Grond X'] = df['Grond X'].apply(lambda x: float(str(x).replace(',', '.'))) + df['Top X'] = df['Top X'].apply(lambda x: float(str(x).replace(',', '.'))) + df['Grond Y'] = df['Grond Y'].apply(lambda x: float(str(x).replace(',', '.'))) + df['Top Y'] = df['Top Y'].apply(lambda x: float(str(x).replace(',', '.'))) + + # Filter the data + filtered_df = df[(df['Grond X'] >= base_x) & (df['Grond X'] <= base_x + 50) & + (df['Top X'] >= base_x) & (df['Top X'] <= base_x + 50) & + (df['Grond Y'] >= base_y) & (df['Grond Y'] <= base_y + 50) & + (df['Top Y'] >= base_y) & (df['Top Y'] <= base_y + 50)] + + return filtered_df[['Grond X', 'Top X', 'Grond Y', 'Top Y', 'Grond Z', 'Top Z']] + +def create_bounding_box_laz(input_laz_file, base_filename, poles_df, box_size=1): + """ + Processes a given LAZ file, creating individual bounding boxes around each point defined in the provided DataFrame. + It then extracts points within each bounding box and writes them to new LAZ files. + + The function iterates over each row in the DataFrame, defines a bounding box around the point coordinates (expanded by a specified box size), + filters the points from the original LAZ file within this bounding box, and writes these points to a new LAZ file. + + Parameters: + input_laz_file (str): Path to the input LAZ file containing the original point cloud data. + base_filename (str): Base path and filename for the output LAZ files. Each file will be appended with an index number. + poles_df (pandas.DataFrame): DataFrame containing the coordinates ('Grond X', 'Top X', 'Grond Y', 'Top Y', 'Grond Z', 'Top Z') for each point. + box_size (float, optional): Size of the bounding box to be created around each point. Defaults to 1. + + Each output file is named using the base_filename followed by an underscore and the index of the row from the DataFrame. + """ + + # Read the original LAZ file + with laspy.open(input_laz_file) as file: + las = file.read() + + for index, row in poles_df.iterrows(): + # Define the bounding box for each point + bounding_box = { + 'min_x': row['Grond X'] - box_size, + 'max_x': row['Top X'] + box_size, + 'min_y': row['Grond Y'] - box_size, + 'max_y': row['Top Y'] + box_size, + 'min_z': row['Grond Z'] - box_size + 2, + 'max_z': row['Top Z'] + box_size + } + + # Apply the bounding box filter + mask = ( + (las.x >= bounding_box['min_x']) & (las.x <= bounding_box['max_x']) & + (las.y >= bounding_box['min_y']) & (las.y <= bounding_box['max_y']) & + (las.z >= bounding_box['min_z']) & (las.z <= bounding_box['max_z']) + ) + + # Extract points within the bounding box + filtered_points = las.points[mask] + + # Write the filtered points to a new LAZ file + output_laz_file = f"{base_filename}_{index}.laz" + + # Create a new LasData object and assign the filtered points + new_las = laspy.LasData(las.header) + new_las.points = filtered_points + + new_las.write(output_laz_file) + +def main(directory, output_base_filename, sheet): + """ + Processes all .laz files in the specified directory, creating bounding boxes around points defined in an external Excel file. + + For each .laz file in the directory, this function extracts the base X and Y coordinates from the filename, filters relevant + points from a provided Excel file, and then creates a new .laz file in the output directory with points within the bounding box. + + Parameters: + directory (str): Directory containing the original .laz files to process. + output_base_filename (str): Base path and filename for the output .laz files. + """ + + for filename in os.listdir(directory): + if filename.endswith(".laz"): + input_laz_file = os.path.join(directory, filename) + base_x, base_y = extract_numbers_from_string(filename) + filtered_data = filter_pole_coordinates(sheet, base_x, base_y) + + # Process each file + create_bounding_box_laz(input_laz_file, output_base_filename, filtered_data) + +if __name__ == "__main__": + directory = 'data/laz_pc_data' + output_base_filename = 'data/bb_extracted_data_unlabelled/bb' + sheet = 'data/sheets/fused_coordinates_poles.xlsx' + main(directory, output_base_filename, sheet) + + diff --git a/src/preprocessing_sara_data.py b/src/utils/preprocessing_sara_data.py similarity index 98% rename from src/preprocessing_sara_data.py rename to src/utils/preprocessing_sara_data.py index d47f229..5634097 100644 --- a/src/preprocessing_sara_data.py +++ b/src/utils/preprocessing_sara_data.py @@ -118,7 +118,7 @@ def add_label_streetlight_to_ply(input_directory, output_directory): if 'label' in ply_data['vertex'].data.dtype.names: labels = ply_data['vertex']['label'] - label_streetlight = np.array([2 if label == 2 else 1 for label in labels], dtype=np.int32) + label_streetlight = np.array([1 if label == 2 else 0 for label in labels], dtype=np.int32) new_dtype = ply_data['vertex'].data.dtype.descr + [('label_streetlight', 'i4')] new_data = np.empty(ply_data['vertex'].count, dtype=new_dtype) for prop in ply_data['vertex'].data.dtype.names: diff --git a/src/utils/utils.py b/src/utils/utils.py new file mode 100644 index 0000000..a7e679e --- /dev/null +++ b/src/utils/utils.py @@ -0,0 +1,29 @@ +import os +import laspy + +def average_points_in_laz_files(directory): + total_points = 0 + file_count = 0 + + # Iterate through all files in the directory + for filename in os.listdir(directory): + if filename.endswith(".laz"): + file_path = os.path.join(directory, filename) + try: + with laspy.open(file_path) as laz_file: + las = laz_file.read() + total_points += len(las.points) + file_count += 1 + except Exception as e: + print(f"Error processing file {filename}: {e}") + + # Calculate the average if any files were processed + if file_count > 0: + average_points = total_points / file_count + return average_points + else: + return "No LAZ files found or readable." + +directory = "data/bb_extracted_data_labelled" +average = average_points_in_laz_files(directory) +print(f"Average number of points per point cloud: {average}") \ No newline at end of file