Skip to content

Commit

Permalink
Clean up files. Add more validation on FE and BE
Browse files Browse the repository at this point in the history
  • Loading branch information
phyeony committed Dec 8, 2022
1 parent c5d16af commit 714c884
Show file tree
Hide file tree
Showing 46 changed files with 29 additions and 23,913 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
.venv
.vscode
.python-version
.python-version
Binary file removed backend/__pycache__/main.cpython-310.pyc
Binary file not shown.
6 changes: 4 additions & 2 deletions backend/src/airbnb_model.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
from pydantic import BaseModel, root_validator
from pydantic import BaseModel, root_validator, conlist
from typing import List, Literal

class Options(BaseModel):
min_price: int = None
max_price: int = None
airbnb_room_type: List[Literal['Entire home/apt', 'Private room', 'Shared room', 'Hotel room']]
airbnb_room_type: conlist(Literal['Entire home/apt', 'Private room', 'Shared room', 'Hotel room'], min_items=1)
activity_preference: List [Literal["entertainment", "food", "leisure", "transportation", "shop", "tourism"]]

@root_validator
def check_prices(cls, values):
min_price, max_price = values.get('min_price'), values.get('max_price')
if min_price > max_price:
raise ValueError('max_price should be bigger than min_price.')
if max_price > 20000:
raise ValueError('Too expensive! No airbnbs above $20000 CAD. Lower max_price.')
return values

# Doesn't seem to do validation. TODO: might wanna look into it more.
Expand Down
3 changes: 1 addition & 2 deletions backend/src/airbnb_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ def compute_airbnb(user_preference: Options) -> pd.DataFrame:

# Filter with room type
# ['Entire home/apt', 'Private room', 'Shared room', 'Hotel room']
if len(airbnb_room_type)>0:
df = df[df['room_type'].isin(airbnb_room_type)]
df = df[df['room_type'].isin(airbnb_room_type)]

# Filter with price range
if min_price is not None and max_price is not None:
Expand Down
Empty file added data-analysis/.gitignore
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 1 addition & 3 deletions data-analysis/airbnb/airbnb_cleaning.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#%%
import pandas as pd
from shapely.geometry import Point, Polygon

airbnb = pd.read_csv('airbnb_listings.csv.gz')

Expand All @@ -14,10 +13,9 @@ def d_to_price(d_price):
# Data cleaning - only using the data we need
airbnb = airbnb[['name', 'listing_url','neighbourhood', 'latitude', 'longitude', 'price', 'room_type', 'review_scores_rating']]
airbnb['price'] = airbnb['price'].apply(lambda x: d_to_price(x))
# airbnb = airbnb.sort_values(by=['price']) #min = 0, max = 20000
airbnb = airbnb.sort_values(by=['price']) #min = 0, max = 20000

# Export to a cleaned csv file
airbnb.to_csv("cleaned_airbnb_data.csv", index=False)


# %%
5 changes: 2 additions & 3 deletions data-analysis/airbnb/airbnb_intersection.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pandas as pd
import sys
import geopandas as gpd
from shapely.geometry import Point, Polygon
from shapely.geometry import Point
import shapely

def count_overlapping_features(gdf: gpd.GeoDataFrame):
Expand All @@ -21,7 +20,7 @@ def count_overlapping_features(gdf: gpd.GeoDataFrame):

def amentiy_intersections(amenity: str):
# 1) get all amenity intersection points(coordinates) -> return only with points with intersection counts bigger than average count
amen = pd.read_csv(amenity)
amen = pd.read_csv(f'cleaned_data/{amenity}')
gdamen = gpd.GeoDataFrame(amen, geometry=gpd.points_from_xy(amen.lon, amen.lat), crs='epsg:4326')
gdamen = gdamen.to_crs("EPSG:32634")
gdamen['geometry'] = gdamen.buffer(300, 3)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,68 +1,44 @@
#%%
import pandas as pd
import sys
import geopandas as gpd
from shapely.geometry import Point, Polygon
import shapely
import airbnb_intersection as inters

def filtering(min_price, max_price, room_type):
airbnb = pd.read_csv('airbnb_score.csv')
# "Entire home/apt"
# "Hotel room"
# "Private room"
# "Shared room"

# pr = [0, 200]
# rt1 = ["Entire home/apt", "Hotel room"]
# rt2 = ["Private room"]
# rt3 = ["Entire home/apt", "Private room", "Shared room"]

# at1 = ["food"]
# at2 = ["transportation", "attraction"]

# filter price range
price_filtered = airbnb[(airbnb['price'] >= float(min_price)) & (airbnb['price'] <= float(max_price))]
# filter room type
return airbnb

# # cleaned_entertainment.csv
# airbnb = pd.read_csv('cleaned_airbnb_data.csv')
# airbnb = pd.read_csv('cleaned_data/cleaned_airbnb_data.csv')
# amen_intersection, name = inters.amentiy_intersections('cleaned_entertainment.csv')
# intersection_airbnb = inters.intersection_score(airbnb, amen_intersection, name)
# intersection_airbnb.to_csv('airbnb_' + name[0] +'.csv', index=False) # -> airbnb_e.csv with entertainment

# # cleaned_food.csv
# # airbnb = intersection_airbnb
# airbnb = pd.read_csv('airbnb_e.csv')
# airbnb = pd.read_csv('intermediate_scores/airbnb_e.csv')
# amen_intersection, name = inters.amentiy_intersections('cleaned_food.csv')
# intersection_airbnb = inters.intersection_score(airbnb, amen_intersection, name)
# intersection_airbnb.to_csv('airbnb_e' + name[0] +'.csv', index=False) # -> airbnb_ef.csv with entertainment,food

# # cleaned_leisure.csv
# # airbnb = intersection_airbnb
# airbnb = pd.read_csv('airbnb_ef.csv')
# airbnb = pd.read_csv('intermediate_scores/airbnb_ef.csv')
# amen_intersection, name = inters.amentiy_intersections('cleaned_leisure.csv')
# intersection_airbnb = inters.intersection_score(airbnb, amen_intersection, name)
# intersection_airbnb.to_csv('airbnb_ef' + name[0] +'.csv', index=False) # -> airbnb_food.csv with entertainment,food,leisure

# # cleaned_transportation.csv
# # airbnb = intersection_airbnb
# airbnb = pd.read_csv('airbnb_efl.csv')
# airbnb = pd.read_csv('intermediate_scores/airbnb_efl.csv')
# amen_intersection, name = inters.amentiy_intersections('cleaned_transportation.csv')
# intersection_airbnb = inters.intersection_score(airbnb, amen_intersection, name)
# intersection_airbnb.to_csv('airbnb_efl' + name[0] +'.csv', index=False) # -> airbnb_food.csv with entertainment,food,leisure,transportation

# # cleaned_shop.csv
# # airbnb = intersection_airbnb
# airbnb = pd.read_csv('airbnb_eflt.csv')
# airbnb = pd.read_csv('intermediate_scores/airbnb_eflt.csv')
# amen_intersection, name = inters.amentiy_intersections('cleaned_shop.csv')
# intersection_airbnb = inters.intersection_score(airbnb, amen_intersection, name)
# intersection_airbnb.to_csv('airbnb_eflt' + name[0] +'.csv', index=False) # -> airbnb_food.csv with entertainment,food,leisure,transportation,shop

# cleaned_tourism.csv
# airbnb = intersection_airbnb
# airbnb = pd.read_csv('airbnb_eflts.csv')
# airbnb = pd.read_csv('intermediate_scores/airbnb_eflts.csv')
# amen_intersection, name = inters.amentiy_intersections('cleaned_tourism.csv')
# intersection_airbnb = inters.intersection_score(airbnb, amen_intersection, name)
# intersection_airbnb.to_csv('airbnb_score.csv', index=False) # -> airbnb_food.csv with entertainment,food,leisure,transportation,shop,tourism
Expand Down
Loading

0 comments on commit 714c884

Please sign in to comment.