-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
115 lines (92 loc) · 3.85 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Ficticious Data to show Data Analysis methods using pandas and matplotlib
# Data was generated by ChatGPT prompt
# Functions and code written by Peter Pisarz
from automobile_data import create_run
import pandas as pd
import matplotlib.pyplot as plt
import re
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
# Function to plot data with respect to time
def plot(df, param):
plt.plot(df['Time (s)'], df[f'{param}'], label=f'Engine Data: Time v {param}')
plt.xlabel('Time (s)')
units = re.search(r'\((.*?)\)', param)
units = units.group(1) if units else 'RPM'
plt.ylabel(f'{units}')
plt.legend()
if 'RPM' in param:
plt.axhline(y=6000, color='red', linestyle='--', label='Redline at 6000rpm')
plt.show()
# Function to find corresponding data values to a specific rpm
def getValueAtRPM(rpm, value):
try:
result = df.loc[df['RPM'] == rpm, value].values
return result
except ValueError as e:
print(f'Result for {rpm} RPM')
return None
def getRPMInRange(value, low, high):
rpm_values = df.loc[(df[value] >= low) & (df[value] < high), 'RPM']
rpm_list = rpm_values.tolist()
return rpm_list
if __name__ == '__main__':
df = create_run()
#Display options to view df
pd.set_option('display.width', 0)
pd.set_option('display.max_columns', None)
print('Here is the original dataframe:')
print(df, '\n')
# Fuel consumption at 2000 RPM
loc_rpm = 2000
param1 = 'Fuel Consumption (GPM)'
result = getValueAtRPM(loc_rpm, param1)
if result: print(f'The {param1} at {loc_rpm}rpm is {result[0]}.\n')
# Get the oil pressure within a certain range
low = 30.00
high = 45.00
param2 = 'Oil Pressure (psi)'
result2 = getRPMInRange(param2, low, high)
print(f'{param2} reads between {low} and {high} for the following RPM:\n', result2, '\n')
# Get the average Engine Temperature
avg_engine_temp = df['Engine Temperature (°F)'].mean()
print(f"The average Engine Temperature is {avg_engine_temp} °F\n")
# Get min and max values of Battery Voltage and their corresponding RPMs
min_v, max_v = df['Battery Voltage (V)'].min(), df['Battery Voltage (V)'].max()
min_v_rpm, max_v_rpm = df.loc[df['Battery Voltage (V)'] == min_v, 'RPM'].values, \
df.loc[df['Battery Voltage (V)'] == max_v, 'RPM'].values
print(f'The minimum voltage is {min_v}(V) at {min_v_rpm[0]}RPM, and'
f'the maximum voltage is {max_v}(V) at {max_v_rpm[0]}RPM.\n')
# Create a new df with rows containing Throttle position greater than 84%
rows = []
for index, row in df.iterrows():
if row['Throttle Position (%)'] > 85.00:
rows.append(row)
df_throttle = pd.DataFrame(rows, columns=df.columns)
print('Here is the dataframe displaying rows with Throttle > 85%:')
print(df_throttle, '\n')
# Plot RPMs over Time
plot(df, 'RPM')
#Preprocessing data for KMeans
plt.scatter(df['RPM'], df['Fuel Consumption (GPM)'])
plt.xlabel('RPM')
plt.ylabel('Fuel Consumption vs RPM')
plt.title('')
plt.show()
X = df[['RPM', 'Fuel Consumption (GPM)']].copy()
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=2, random_state=42)
df['Clusters'] = kmeans.fit_predict(X_scaled)
centers = kmeans.cluster_centers_
centers_original = scaler.inverse_transform(centers)
plt.scatter(df['RPM'], df['Fuel Consumption (GPM)'], c=df['Clusters'], cmap='viridis')
plt.scatter(centers_original[:, 0], centers_original[:, 1], c='red', s=200, alpha=0.75)
plt.xlabel('RPM')
plt.ylabel('Fuel Consumption vs RPM')
plt.title('Clusters')
plt.show()
print("Cluster centers (scaled data):")
print(centers)
# print("\nCluster assignment for each input:")
# print(df[['Time (s)', 'Clusters']])