-
Notifications
You must be signed in to change notification settings - Fork 0
/
edit_dataset.py
33 lines (26 loc) · 1.04 KB
/
edit_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import csv
import random
import os
def combine_and_shuffle_csv(input_folder, output_file):
# Get the names of all CSV files in the folder
file_names = os.listdir(input_folder)
file_names = [f for f in file_names if f.endswith(".csv")]
# Read and combine the CSV files
combined_lines = []
for file_name in file_names:
file_path = os.path.join(input_folder, file_name)
with open(file_path, 'r') as file:
reader = csv.reader(file)
lines = list(reader)
combined_lines.extend(lines)
# Shuffle the combined lines
random.shuffle(combined_lines)
# Write the shuffled lines to the output CSV file
with open(output_file, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerows(combined_lines)
print("File successfully saved: ", output_file)
# Example usage
input_folder = 'csv_files_folder' # Folder containing the CSV files
output_file = 'combined_and_shuffled.csv' # Output file name
combine_and_shuffle_csv(input_folder, output_file)