Skip to content

Commit

Permalink
Trying to get the map working
Browse files Browse the repository at this point in the history
  • Loading branch information
kobinabrandon committed Aug 5, 2024
1 parent 627d5d1 commit 54ae426
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 21 deletions.
202 changes: 202 additions & 0 deletions .ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd \n",
"\n",
"data = pd.read_parquet(\"/home/kobina/ML/Projects/Hourly-Divvy-Trip-Predictor/data/geographical/indexer_two/start_replaced_missing_names_and_ids.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>start_lat</th>\n",
" <th>start_lng</th>\n",
" <th>start_station_id</th>\n",
" <th>start_station_name</th>\n",
" <th>start_hour</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>41.903267</td>\n",
" <td>-87.634737</td>\n",
" <td>KA1504000135</td>\n",
" <td>Wells St &amp; Elm St</td>\n",
" <td>2024-01-12 15:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>41.902937</td>\n",
" <td>-87.634440</td>\n",
" <td>KA1504000135</td>\n",
" <td>Wells St &amp; Elm St</td>\n",
" <td>2024-01-08 15:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>41.902951</td>\n",
" <td>-87.634470</td>\n",
" <td>KA1504000135</td>\n",
" <td>Wells St &amp; Elm St</td>\n",
" <td>2024-01-27 12:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>41.884295</td>\n",
" <td>-87.633963</td>\n",
" <td>TA1305000030</td>\n",
" <td>Wells St &amp; Randolph St</td>\n",
" <td>2024-01-29 16:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>41.948797</td>\n",
" <td>-87.675278</td>\n",
" <td>13253</td>\n",
" <td>Lincoln Ave &amp; Waveland Ave</td>\n",
" <td>2024-01-31 05:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>710716</th>\n",
" <td>41.968466</td>\n",
" <td>-87.674225</td>\n",
" <td>TA1309000066</td>\n",
" <td>Ravenswood Ave &amp; Lawrence Ave</td>\n",
" <td>2024-06-11 08:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>710717</th>\n",
" <td>41.967121</td>\n",
" <td>-87.679127</td>\n",
" <td>TA1307000158</td>\n",
" <td>Damen Ave &amp; Leland Ave</td>\n",
" <td>2024-06-24 11:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>710718</th>\n",
" <td>41.967154</td>\n",
" <td>-87.679091</td>\n",
" <td>TA1307000158</td>\n",
" <td>Damen Ave &amp; Leland Ave</td>\n",
" <td>2024-06-30 10:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>710719</th>\n",
" <td>41.954404</td>\n",
" <td>-87.647983</td>\n",
" <td>TA1308000022</td>\n",
" <td>Pine Grove Ave &amp; Irving Park Rd</td>\n",
" <td>2024-06-11 18:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>710720</th>\n",
" <td>41.968486</td>\n",
" <td>-87.674196</td>\n",
" <td>TA1309000066</td>\n",
" <td>Ravenswood Ave &amp; Lawrence Ave</td>\n",
" <td>2024-06-15 15:00:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2392578 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" start_lat start_lng start_station_id \\\n",
"0 41.903267 -87.634737 KA1504000135 \n",
"1 41.902937 -87.634440 KA1504000135 \n",
"2 41.902951 -87.634470 KA1504000135 \n",
"3 41.884295 -87.633963 TA1305000030 \n",
"4 41.948797 -87.675278 13253 \n",
"... ... ... ... \n",
"710716 41.968466 -87.674225 TA1309000066 \n",
"710717 41.967121 -87.679127 TA1307000158 \n",
"710718 41.967154 -87.679091 TA1307000158 \n",
"710719 41.954404 -87.647983 TA1308000022 \n",
"710720 41.968486 -87.674196 TA1309000066 \n",
"\n",
" start_station_name start_hour \n",
"0 Wells St & Elm St 2024-01-12 15:00:00 \n",
"1 Wells St & Elm St 2024-01-08 15:00:00 \n",
"2 Wells St & Elm St 2024-01-27 12:00:00 \n",
"3 Wells St & Randolph St 2024-01-29 16:00:00 \n",
"4 Lincoln Ave & Waveland Ave 2024-01-31 05:00:00 \n",
"... ... ... \n",
"710716 Ravenswood Ave & Lawrence Ave 2024-06-11 08:00:00 \n",
"710717 Damen Ave & Leland Ave 2024-06-24 11:00:00 \n",
"710718 Damen Ave & Leland Ave 2024-06-30 10:00:00 \n",
"710719 Pine Grove Ave & Irving Park Rd 2024-06-11 18:00:00 \n",
"710720 Ravenswood Ave & Lawrence Ave 2024-06-15 15:00:00 \n",
"\n",
"[2392578 rows x 5 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "src-ptrGecWl-py3.11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
28 changes: 17 additions & 11 deletions src/feature_pipeline/station_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def match_names_and_ids_by_station_proximity(self, save: bool = True) -> dict[in
dict[int, tuple[str|int, str]]: key, value pairs of row indices and their newly discovered station IDs
and names
"""
matched_coordinates_path = INDEXER_TWO / f"matched_{self.scenario}_coordinates_with_new_ids_and_names.json"
matched_coordinates_path = INDEXER_TWO / f"{self.scenario}_coordinates_with_new_ids_and_names.json"

if Path(matched_coordinates_path).exists():
logger.success("The matching operation has already been done. Fetching local file...")
Expand Down Expand Up @@ -339,22 +339,22 @@ def replace_missing_station_names_and_ids(self, save: bool = True) -> pd.DataFra

return self.data

def save_geodata(self) -> None:
def save_geodata(
self,
station_names: pd.Series,
station_ids: pd.Series,
latitudes: pd.Series,
longitudes: pd.Series
) -> None:
"""
Saves the station ID, mame, and coordinates for use in the frontend
"""

latitudes = self.data.iloc[:, self.latitudes_index]
longitudes = self.data.iloc[:, self.longitudes_index]
station_ids = self.data.iloc[:, self.station_id_index]
station_names = self.data.iloc[:, self.station_name_index]

geodata = {
station_name: [(latitude, longitude), station_id] for (latitude, longitude, station_id, station_name) \
str(station_name): [(latitude, longitude), station_id] for (latitude, longitude, station_id, station_name) \
in zip(latitudes, longitudes, station_ids, station_names)
}

with open(INDEXER_TWO / f"{self.scenario}_geodata_indexer_two.json", mode="w") as file:
with open(INDEXER_TWO / f"{self.scenario}_geodata.json", mode="w") as file:
json.dump(geodata, file)

def execute(self, delete_leftover_rows: bool = True, save: bool = True) -> pd.DataFrame:
Expand Down Expand Up @@ -408,7 +408,13 @@ def execute(self, delete_leftover_rows: bool = True, save: bool = True) -> pd.Da
self.data.iloc[:, self.station_id_index] = station_ids.map(old_and_new_ids)

self.data = self.data.reset_index(drop=True)
self.save_geodata()

self.save_geodata(
latitudes=self.data.iloc[:, self.latitudes_index],
longitudes=self.data.iloc[:, self.longitudes_index],
station_ids=self.data.iloc[:, self.station_id_index],
station_names=self.data.iloc[:, self.station_name_index],
)

self.data = self.data.drop(
columns=[f"{self.scenario}_lat", f"{self.scenario}_lat", f"{self.scenario}_station_name"]
Expand Down
18 changes: 8 additions & 10 deletions src/inference_pipeline/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,22 @@ def load_geodata(scenario: str, indexer: str = "two") -> pd.DataFrame | gpd.GeoD
return updated_geodata

elif indexer == "two":
with open(INDEXER_TWO/f"{scenario}_geodata_indexer_two.json") as file:
with open(INDEXER_TWO/f"{scenario}_geodata.json") as file:
geodata_dict = json.load(file)

coordinates = [value[0] for value in geodata_dict.values()]
station_ids = [value[1] for value in geodata_dict.values()]

geodata = gpd.GeoDataFrame(
geodata_df = gpd.GeoDataFrame(
{
f"{scenario}_station_names": geodata_dict.keys(),
f"{scenario}_station_ids": station_ids,
"coordinates": coordinates
}
)

st.sidebar.write("✅ Station IDs & Coordinates Obtained...")
return geodata
st.sidebar.write("✅ Retrieved Station Names, IDs & Coordinates")
return geodata_df


def get_hourly_predictions(
Expand Down Expand Up @@ -128,9 +128,6 @@ def get_hourly_predictions(
to_hour=to_hour
)

print(predictions_df.head())
breakpoint()

next_hour_ready = False if predictions_df[predictions_df[f"{scenario}_hour"] == to_hour].empty else True
previous_hour_ready = False if predictions_df[predictions_df[f"{scenario}_hour"] == to_hour].empty else True

Expand Down Expand Up @@ -249,7 +246,7 @@ def prep_data_for_plotting(scenario: str, predictions: pd.DataFrame, geodata: pd
Returns:
None.
"""
with st.spinner(text="Preparing data..."):
with st.spinner(text="Preparing data for plotting..."):
data = pd.merge(
left=geodata,
right=predictions,
Expand Down Expand Up @@ -280,6 +277,8 @@ def plot_time_series(scenario: str, features: pd.DataFrame, predictions: pd.Data

for row in row_indices[:n_to_plot]:
station_id = predictions[f"{scenario}_station_id"].iloc[row]


prediction = predictions[f"predicted_{scenario}s"].iloc[row]

st.metric(
Expand Down Expand Up @@ -320,9 +319,8 @@ def construct_page(model_name: str):

features = provide_features(scenario=scenario, target_date=current_hour)
geo_df = load_geodata(scenario=scenario)

predictions_df: pd.DataFrame = get_hourly_predictions(scenario=scenario, model_name=model_name)

station_map = make_map(geodata=geo_df)


if __name__ == "__main__":
Expand Down

0 comments on commit 54ae426

Please sign in to comment.