Skip to content

Commit

Permalink
structure of the repo as well as new code for extraction of bounding …
Browse files Browse the repository at this point in the history
…boxes
  • Loading branch information
adamvln committed Mar 25, 2024
1 parent 83fafaa commit 289448f
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 1 deletion.
160 changes: 160 additions & 0 deletions src/utils/bb_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""
This script is designed to process a collection of .laz files (containing LiDAR point cloud data) located in a specified directory.
The primary function of the script is to create bounding boxes around certain points of interest (poles) identified in an external Excel file
and extract the relevant point cloud data from each .laz file into new, separate .laz files.
Key Features:
1. Reads .laz files from a given directory and extracts numerical values from their filenames. These numbers, once identified, are used to
determine specific sections of an Excel file (which contains coordinates of interest) to focus on.
2. Filters the coordinates in the Excel file based on the extracted values from each .laz file's name. This filtering narrows down the relevant
point coordinates around which bounding boxes will be created.
3. For each point of interest (pole), a bounding box is defined and applied to the corresponding .laz file to isolate the relevant points in the point cloud.
4. Extracts and writes these filtered points into new .laz files, each named according to the original file and the index of the pole in the data frame.
Usage:
- The script requires laspy, re, pandas, numpy, and os libraries.
- In
"""

import laspy
import re
import pandas as pd
import numpy as np
import os
import sys


def extract_numbers_from_string(s):
"""
Extracts two numbers from a string that follows a specific pattern and multiplies them by 50.
The function uses a regular expression to search for a pattern 'final_<number>_<number>.laz' in the given string.
If the pattern is found, it extracts the two numbers, multiplies them each by 50, and returns them.
Parameters:
s (str): The string from which to extract the numbers.
Returns:
tuple of int: A tuple containing the two extracted numbers multiplied by 50.
Returns None if the pattern does not match.
"""

match = re.search(r'final_(\d+)_(\d+).laz', s)
if match:
# Extracting the two numbers
num1, num2 = match.groups()
return int(num1)*50 , int(num2)*50
else:
# Return a default value or raise an error if the format doesn't match
return None

def filter_pole_coordinates(filename, base_x, base_y):
"""
Filters pole coordinates from an Excel file based on specified base values for 'Grond X', 'Top X', 'Grond Y', and 'Top Y'.
Parameters:
filename (str): Path to the Excel file.
base_x (float): The base value for filtering X coordinates.
base_y (float): The base value for filtering Y coordinates.
Returns:
pandas.DataFrame: Filtered DataFrame with only the rows where 'Grond X', 'Top X', 'Grond Y', and 'Top Y'
fall within the specified range of base_x/base_y to base_x/base_y + 50.
"""

# Read the Excel file
df = pd.read_excel(filename)

# Assuming that the decimal separator is a comma, replace it with a dot and convert to float
df['Grond X'] = df['Grond X'].apply(lambda x: float(str(x).replace(',', '.')))
df['Top X'] = df['Top X'].apply(lambda x: float(str(x).replace(',', '.')))
df['Grond Y'] = df['Grond Y'].apply(lambda x: float(str(x).replace(',', '.')))
df['Top Y'] = df['Top Y'].apply(lambda x: float(str(x).replace(',', '.')))

# Filter the data
filtered_df = df[(df['Grond X'] >= base_x) & (df['Grond X'] <= base_x + 50) &
(df['Top X'] >= base_x) & (df['Top X'] <= base_x + 50) &
(df['Grond Y'] >= base_y) & (df['Grond Y'] <= base_y + 50) &
(df['Top Y'] >= base_y) & (df['Top Y'] <= base_y + 50)]

return filtered_df[['Grond X', 'Top X', 'Grond Y', 'Top Y', 'Grond Z', 'Top Z']]

def create_bounding_box_laz(input_laz_file, base_filename, poles_df, box_size=1):
"""
Processes a given LAZ file, creating individual bounding boxes around each point defined in the provided DataFrame.
It then extracts points within each bounding box and writes them to new LAZ files.
The function iterates over each row in the DataFrame, defines a bounding box around the point coordinates (expanded by a specified box size),
filters the points from the original LAZ file within this bounding box, and writes these points to a new LAZ file.
Parameters:
input_laz_file (str): Path to the input LAZ file containing the original point cloud data.
base_filename (str): Base path and filename for the output LAZ files. Each file will be appended with an index number.
poles_df (pandas.DataFrame): DataFrame containing the coordinates ('Grond X', 'Top X', 'Grond Y', 'Top Y', 'Grond Z', 'Top Z') for each point.
box_size (float, optional): Size of the bounding box to be created around each point. Defaults to 1.
Each output file is named using the base_filename followed by an underscore and the index of the row from the DataFrame.
"""

# Read the original LAZ file
with laspy.open(input_laz_file) as file:
las = file.read()

for index, row in poles_df.iterrows():
# Define the bounding box for each point
bounding_box = {
'min_x': row['Grond X'] - box_size,
'max_x': row['Top X'] + box_size,
'min_y': row['Grond Y'] - box_size,
'max_y': row['Top Y'] + box_size,
'min_z': row['Grond Z'] - box_size + 2,
'max_z': row['Top Z'] + box_size
}

# Apply the bounding box filter
mask = (
(las.x >= bounding_box['min_x']) & (las.x <= bounding_box['max_x']) &
(las.y >= bounding_box['min_y']) & (las.y <= bounding_box['max_y']) &
(las.z >= bounding_box['min_z']) & (las.z <= bounding_box['max_z'])
)

# Extract points within the bounding box
filtered_points = las.points[mask]

# Write the filtered points to a new LAZ file
output_laz_file = f"{base_filename}_{index}.laz"

# Create a new LasData object and assign the filtered points
new_las = laspy.LasData(las.header)
new_las.points = filtered_points

new_las.write(output_laz_file)

def main(directory, output_base_filename, sheet):
"""
Processes all .laz files in the specified directory, creating bounding boxes around points defined in an external Excel file.
For each .laz file in the directory, this function extracts the base X and Y coordinates from the filename, filters relevant
points from a provided Excel file, and then creates a new .laz file in the output directory with points within the bounding box.
Parameters:
directory (str): Directory containing the original .laz files to process.
output_base_filename (str): Base path and filename for the output .laz files.
"""

for filename in os.listdir(directory):
if filename.endswith(".laz"):
input_laz_file = os.path.join(directory, filename)
base_x, base_y = extract_numbers_from_string(filename)
filtered_data = filter_pole_coordinates(sheet, base_x, base_y)

# Process each file
create_bounding_box_laz(input_laz_file, output_base_filename, filtered_data)

if __name__ == "__main__":
directory = 'data/laz_pc_data'
output_base_filename = 'data/bb_extracted_data_unlabelled/bb'
sheet = 'data/sheets/fused_coordinates_poles.xlsx'
main(directory, output_base_filename, sheet)


Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def add_label_streetlight_to_ply(input_directory, output_directory):

if 'label' in ply_data['vertex'].data.dtype.names:
labels = ply_data['vertex']['label']
label_streetlight = np.array([2 if label == 2 else 1 for label in labels], dtype=np.int32)
label_streetlight = np.array([1 if label == 2 else 0 for label in labels], dtype=np.int32)
new_dtype = ply_data['vertex'].data.dtype.descr + [('label_streetlight', 'i4')]
new_data = np.empty(ply_data['vertex'].count, dtype=new_dtype)
for prop in ply_data['vertex'].data.dtype.names:
Expand Down
29 changes: 29 additions & 0 deletions src/utils/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os
import laspy

def average_points_in_laz_files(directory):
total_points = 0
file_count = 0

# Iterate through all files in the directory
for filename in os.listdir(directory):
if filename.endswith(".laz"):
file_path = os.path.join(directory, filename)
try:
with laspy.open(file_path) as laz_file:
las = laz_file.read()
total_points += len(las.points)
file_count += 1
except Exception as e:
print(f"Error processing file {filename}: {e}")

# Calculate the average if any files were processed
if file_count > 0:
average_points = total_points / file_count
return average_points
else:
return "No LAZ files found or readable."

directory = "data/bb_extracted_data_labelled"
average = average_points_in_laz_files(directory)
print(f"Average number of points per point cloud: {average}")

0 comments on commit 289448f

Please sign in to comment.