-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
30 changed files
with
1,260 additions
and
396 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file added
BIN
+5.13 KB
src/data_processing/data_utils/__pycache__/split_dataset.cpython-38.pyc
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file added
BIN
+2.34 KB
src/data_processing/ml_processing/__pycache__/annotation_analysis.cpython-38.pyc
Binary file not shown.
Binary file added
BIN
+2.03 KB
src/data_processing/ml_processing/__pycache__/dataset_processing_module.cpython-38.pyc
Binary file not shown.
Binary file added
BIN
+7.24 KB
src/data_processing/ml_processing/__pycache__/image_analysis.cpython-38.pyc
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+3.08 KB
src/data_processing/ml_processing/__pycache__/recommendation_module.cpython-38.pyc
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import pandas as pd | ||
from tqdm import tqdm | ||
from collections import defaultdict | ||
import os | ||
|
||
def calculate_iou(bbox1, bbox2): | ||
""" | ||
Вычисляет Intersection over Union (IoU) для двух ограничивающих прямоугольников. | ||
Каждый bbox задается как [x_min, y_min, x_max, y_max]. | ||
""" | ||
x_left = max(bbox1[0], bbox2[0]) | ||
y_top = max(bbox1[1], bbox2[1]) | ||
x_right = min(bbox1[2], bbox2[2]) | ||
y_bottom = min(bbox1[3], bbox2[3]) | ||
|
||
if x_right < x_left or y_bottom < y_top: | ||
return 0.0 | ||
|
||
intersection_area = (x_right - x_left) * (y_bottom - y_top) | ||
bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]) | ||
bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]) | ||
try: | ||
iou = intersection_area / float(bbox1_area + bbox2_area - intersection_area) | ||
except: | ||
iou = intersection_area | ||
return iou | ||
|
||
|
||
def analysis_yolo_annotations(annotation_paths): | ||
bbox_sizes = [] | ||
aspect_ratios = [] | ||
objects_per_image = defaultdict(int) | ||
overlaps = [] | ||
|
||
for annotation_path in tqdm(annotation_paths, desc="Annotation analyze"): | ||
image_id = annotation_path.split('.')[0] | ||
bboxes = [] | ||
with open(os.path.join(annotation_path), 'r') as f: | ||
for line in f: | ||
parts = line.strip().split() | ||
if len(parts) != 5: | ||
continue | ||
_, x_center, y_center, width, height = map(float, parts) | ||
bboxes.append([x_center - width / 2, y_center - height / 2, | ||
x_center + width / 2, y_center + height / 2]) | ||
bbox_sizes.append((width, height)) | ||
try: | ||
aspect_ratios.append(width / height) | ||
except: | ||
aspect_ratios.append(width) | ||
objects_per_image[image_id] += 1 | ||
|
||
# Анализ перекрытий | ||
for i in range(len(bboxes)): | ||
for j in range(i + 1, len(bboxes)): | ||
iou = calculate_iou(bboxes[i], bboxes[j]) | ||
if iou > 0: | ||
overlaps.append(iou) | ||
try: | ||
avg_objects_per_image = sum(objects_per_image.values()) / len(objects_per_image) | ||
except: | ||
avg_objects_per_image = 1 | ||
bbox_sizes_df = pd.DataFrame(bbox_sizes, columns=['Width', 'Height']) | ||
aspect_ratios_df = pd.DataFrame(aspect_ratios, columns=['Aspect Ratio']) | ||
|
||
analysis_results = { | ||
'Average BBox Width': bbox_sizes_df['Width'].mean(), | ||
'Average BBox Height': bbox_sizes_df['Height'].mean(), | ||
'Min BBox Width': bbox_sizes_df['Width'].min(), | ||
'Min BBox Height': bbox_sizes_df['Height'].min(), | ||
'Max BBox Width': bbox_sizes_df['Width'].max(), | ||
'Max BBox Height': bbox_sizes_df['Height'].max(), | ||
'Average Aspect Ratio': aspect_ratios_df['Aspect Ratio'].mean(), | ||
'Average Objects Per Image': avg_objects_per_image, | ||
'Average Overlap': sum(overlaps) / len(overlaps) if overlaps else 0, | ||
} | ||
return analysis_results |
53 changes: 53 additions & 0 deletions
53
src/data_processing/ml_processing/dataset_processing_module.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
from pathlib import Path | ||
import numpy as np | ||
import pandas as pd | ||
import warnings | ||
import os | ||
from loguru import logger | ||
from pathlib import Path | ||
import numpy as np | ||
import csv | ||
import yaml | ||
from collections import Counter | ||
file = Path(__file__).resolve() | ||
|
||
from src.data_processing.ml_processing.plots import plot_class_balance | ||
from src.data_processing.ml_processing.annotation_analysis import analysis_yolo_annotations | ||
from src.data_processing.ml_processing.image_analysis import analysis_stats, analysis_image_dataset | ||
|
||
|
||
def find_paths(data_path, image_mode = True): #info_processor older find_image | ||
supported_extensions = {".jpg", ".jpeg", ".png"} if image_mode else {".txt"} | ||
paths = [] | ||
path = Path(data_path) | ||
folder_names = [folder.name for folder in path.iterdir() if folder.is_dir()] | ||
for name in folder_names: | ||
for root, dirs, files in os.walk(path / name): | ||
for file in files: | ||
if os.path.splitext(file)[1].lower() in supported_extensions: | ||
paths.append(os.path.join(root, file)) | ||
|
||
return paths | ||
|
||
|
||
def feature_extraction(dataset_path, classes_path, run_path): | ||
images_path = find_paths(dataset_path, image_mode=True) | ||
annotations_path = find_paths(dataset_path, image_mode=False) | ||
|
||
analyze_image, analyze_color_stats = analysis_image_dataset(images_path) | ||
analyze_annotations = analysis_yolo_annotations(annotations_path) | ||
analyze_stat = analysis_stats(images_path, annotations_path, classes_path, run_path) | ||
|
||
df_analyze_color_stats = pd.DataFrame([analyze_image]) | ||
df_color_stats = pd.DataFrame([pd.DataFrame(analyze_color_stats).mean().to_dict()]) | ||
df_analyze_annotations = pd.DataFrame([analyze_annotations]) | ||
df_analyze_stats = pd.DataFrame([analyze_stat]) | ||
df_dataset_features = pd.concat([df_analyze_color_stats, df_color_stats, df_analyze_annotations, df_analyze_stats], axis=1) | ||
df_dataset_features.to_csv(run_path / 'dataset_features.csv', index=False) | ||
|
||
|
||
return df_dataset_features | ||
|
||
|
||
|
||
|
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.