Skip to content

Commit

Permalink
Bella/classes (#141)
Browse files Browse the repository at this point in the history
* Created activity model and many to many relationship with gym model; added activity and gym rresolvers to the schema

* added activity model to import statements on init_db()

* deleted testing objects in init_db()

* deleted duplicate code

* Implemented PR edits in activity.py

* Implemented PR edits in gym.py

* Implemented PR changes in schema.py and added Price and Amenity class and price resolver in Activity class

* Populated models to test

* Cleaning up some merge conflicts

* almost implementing Facilities

* Created activity model and many to many relationship with gym model; added activity and gym rresolvers to the schema

* added activity model to import statements on init_db()

* deleted testing objects in init_db()

* deleted duplicate code

* Implemented PR edits in activity.py

* Implemented PR edits in gym.py

* Implemented PR changes in schema.py and added Price and Amenity class and price resolver in Activity class

* Populated models to test

* Cleaning up some merge conflicts

* almost implementing Facilities

* Activity model and some changes to gym

* Created some of gym scraper

* fixed facility.py

* wrote some of gym hour scraper

* Implement scraper for fitness center hours

* changed some of activities

* class changes

* bug fixes on the scraper

* fixing scraping hours

* fixing activities

* completed fixed activities

* modified gym_scraper to accomodate special_hours

* Scraped equipment and added it to the Schema

* Upgrade python version for the test workflow

* added back classes and class scraper

* fixed scheduling of classes scraper and cleaned up code

---------

Co-authored-by: Isabella Hoie <[email protected]>
Co-authored-by: Kidus Zegeye <[email protected]>
  • Loading branch information
3 people authored Mar 27, 2024
1 parent 681a875 commit ff15b38
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 26 deletions.
9 changes: 9 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from src.scrapers.scraper_helpers import clean_past_hours
from src.scrapers.sp_hours_scraper import fetch_sp_facility
from src.scrapers.equipment_scraper import scrape_equipment
from src.scrapers.class_scraper import fetch_classes
from src.utils.utils import create_gym_table


Expand Down Expand Up @@ -58,10 +59,18 @@ def scrape_capacities():

fetch_capacities()

# Scrape classes every hour
@scheduler.task("interval", id="scrape_classes", seconds=3600)
def scrape_classes():
logging.info("Scraping classes from group-fitness-classes...")

fetch_classes(3)


# Create database and fill it with data
init_db()
create_gym_table()
scrape_classes()
scrape_hours()
scrape_capacities()
scrape_equipment()
Expand Down
39 changes: 21 additions & 18 deletions schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,31 @@ type Capacity {
updated: Int!
}

enum CourtType {
BASKETBALL
VOLLEYBALL
BADMINTON
type Class {
id: ID!
name: String!
description: String!
gyms: [ClassInstance]
}

type Equipment {
type ClassInstance {
id: ID!
name: String!
equipmentType: EquipmentType!
facilityId: Int!
quantity: Int
accessibility: AccessibilityType
gymId: Int
classId: Int!
location: String!
instructor: String!
isCanceled: Boolean!
isVirtual: Boolean!
startTime: DateTime
endTime: DateTime
class_: Class
gym: Gym
}

enum EquipmentType {
CARDIO
RACKS_AND_BENCHES
SELECTORIZED
MULTI_CABLE
FREE_WEIGHTS
MISCELLANEOUS
PLATE_LOADED
enum CourtType {
BASKETBALL
VOLLEYBALL
BADMINTON
}

type Facility {
Expand Down Expand Up @@ -79,6 +81,7 @@ type Gym {
amenities: [Amenity]
facilities: [Facility]
hours: [OpenHours]
classes: [ClassInstance]
}

type OpenHours {
Expand Down
90 changes: 90 additions & 0 deletions src/models/classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import datetime
from src.database import Base
from sqlalchemy import (
Table,
Column,
DateTime,
ForeignKey,
Integer,
Float,
String,
Boolean,
func,
)
from sqlalchemy.orm import backref, relationship

classes_to_gyms = Table(
"classes_to_gyms",
Base.metadata,
Column("id", Integer(), primary_key=True),
Column("gym_id", ForeignKey("gym.id")),
Column("class_id", ForeignKey("class.id")),
Column("location", String()),
Column("instructor", String()),
Column("isCancelled", Boolean()),
Column("start_time", DateTime()),
Column("end_time", DateTime()),
)


class Class(Base):
__tablename__ = "class"

id = Column(Integer, primary_key=True)
name = Column(String(), nullable=False)
description = Column(String(), nullable=False)
gyms = relationship("ClassInstance", back_populates="class_")

def __init__(self, **kwargs):
self.id = kwargs.get("id")
self.name = kwargs.get("name")
self.description = kwargs.get("description")

def serialize(self):
return {
"id": self.id,
"name": self.name,
"description": self.description,
}


class ClassInstance(Base):
__tablename__ = "class_instance"

id = Column(Integer, primary_key=True)
gym_id = Column(Integer, ForeignKey("gym.id"), nullable=True)
class_id = Column(Integer, ForeignKey("class.id"), nullable=False)
location = Column(String(), nullable=False)
instructor = Column(String(), nullable=False)
isCanceled = Column(Boolean(), nullable=False, default=False)
isVirtual = Column(Boolean(), nullable=False, default=False)
start_time = Column(DateTime(), nullable=True)
end_time = Column(DateTime(), nullable=True)
class_ = relationship("Class", back_populates="gyms")
gym = relationship("Gym", back_populates="classes")

def __init__(self, **kwargs):
self.id = kwargs.get("id")
self.gym_id = kwargs.get("gym_id")
self.class_id = kwargs.get("class_id")
self.location = kwargs.get("location")
self.instructor = kwargs.get("instructor")
self.isCanceled = kwargs.get("isCanceled")
self.isVirtual = kwargs.get("isVirtual")
self.start_time = kwargs.get("start_time")
self.end_time = kwargs.get("end_time")

def serialize(self):
return {
"id": self.id,
"gym_id": self.gym_id,
"class_id": self.class_id,
"location": self.location,
"instructor": self.instructor,
"isCanceled": self.isCanceled,
"isVirtual": self.isVirtual,
"start_time": self.start_time,
"end_time": self.end_time,
}


2 changes: 2 additions & 0 deletions src/models/gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from sqlalchemy.orm import relationship
from src.database import Base
from src.models.openhours import OpenHours
from src.models.classes import ClassInstance, Class


class Gym(Base):
Expand All @@ -27,6 +28,7 @@ class Gym(Base):
amenities = relationship("Amenity")
facilities = relationship("Facility")
hours = relationship("OpenHours")
classes = relationship("ClassInstance", back_populates="gym")
image_url = Column(String, nullable=True)
latitude = Column(Float, nullable=False)
longitude = Column(Float, nullable=False)
Expand Down
10 changes: 2 additions & 8 deletions src/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from src.models.openhours import OpenHours as OpenHoursModel
from src.models.amenity import Amenity as AmenityModel
from src.models.equipment import Equipment as EquipmentModel
from src.models.classes import Class as ClassModel
from src.models.classes import ClassInstance as ClassInstanceModel


# MARK: - Gym
Expand Down Expand Up @@ -92,14 +94,6 @@ class Capacity(SQLAlchemyObjectType):
class Meta:
model = CapacityModel

# MARK: - Activity
# class Activity(SQLAlchemyObjectType):
# class Meta:
# model = ActivityModel

# facilities = graphene.List(lambda: Facility)



# MARK: - Query

Expand Down
114 changes: 114 additions & 0 deletions src/scrapers/class_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from datetime import datetime
from src.database import db_session
import time as t
import datetime as dt
import random
from bs4 import BeautifulSoup
import re
import requests
from src.utils.utils import get_gym_id
from src.utils.constants import GYMS, BASE_URL, CLASSES_PATH
from src.models.classes import Class, ClassInstance
from src.models.openhours import OpenHours

from src.models.facility import Facility

"""
Create a group class from a class page
Params:
class_href: href of class page from group-fitness-classes page
Returns:
Class Object created
"""
def create_group_class(class_href):
page = requests.get(BASE_URL + class_href).text
soup = BeautifulSoup(page, "lxml")
container = soup.select_one("#main-article")
name = container.select_one("h1").text
try:
contents = container.select("p")
except AttributeError as e:
print(e)
contents = [""]
description = ""
for c in contents:
if isinstance(c, str):
description += c
else:
description += c.text
model = Class(name=name, description=description)
db_session.add(model)
db_session.commit()
return model


"""
Scrape classes from the group-fitness-classes page
Params:
num_pages: number of pages to scrape - this determines how far in advance we scrape classes
Returns:
dict of ClassInstance objects
"""
def fetch_classes(num_pages):
classes = {}
db_session.query(ClassInstance).delete()
db_session.commit()
for i in range(num_pages):
page = requests.get(BASE_URL + CLASSES_PATH + str(i)).text
soup = BeautifulSoup(page, "lxml")
if len(soup.find_all("table")) == 1:
continue
schedule = soup.find_all("table")[1] # first table is irrelevant
data = schedule.find_all("tr")[1:] # first row is header
for row in data:
row_elems = row.find_all("td")
class_instance = ClassInstance()
class_name = row_elems[0].a.text
class_href = row_elems[0].a["href"]
try:
gym_class = db_session.query(Class).filter(Class.name == class_name).first()
assert gym_class is not None
except AssertionError:
gym_class = create_group_class(class_href)
class_instance.class_id = gym_class.id
date_string = row_elems[1].text.strip()
if "Today" in date_string:
date_string = datetime.strftime(datetime.now(), "%m/%d/%Y")

# special handling for time (cancelled)

time_str = row_elems[3].string.replace("\n", "").strip()
if time_str != "" and time_str != 'Canceled':
class_instance.is_canceled = False
time_strs = time_str.split(" - ")
start_time_string = time_strs[0].strip()
end_time_string = time_strs[1].strip()

class_instance.start_time = datetime.strptime(f"{date_string} {start_time_string}", "%m/%d/%Y %I:%M%p")
class_instance.end_time = datetime.strptime(f"{date_string} {end_time_string}", "%m/%d/%Y %I:%M%p")
if class_instance.end_time < datetime.now():
continue
else:
class_instance.isCanceled = True

try:
class_instance.instructor = row_elems[4].a.string
except:
class_instance.instructor = ""
try:
location = row_elems[5].a.string
class_instance.location = location
for gym in GYMS:
if gym in location:
if gym == "Virtual":
class_instance.isVirtual = True
else:
gym_id = get_gym_id(gym)
class_instance.gym_id = gym_id
break
except:
gym_class.location = ""
db_session.add(class_instance)
db_session.commit()
classes[class_instance.id] = class_instance
return classes
9 changes: 9 additions & 0 deletions src/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
# URL for Uplift image assets
ASSET_BASE_URL = "https://raw.githubusercontent.com/cuappdev/assets/master/uplift/"

# Base URL for Cornell Recreation Website
BASE_URL = "https://scl.cornell.edu/recreation/"

# The path for capacities
C2C_URL = "https://connect2concepts.com/connect2/?type=bar&key=355de24d-d0e4-4262-ae97-bc0c78b92839&loc_status=false"

Expand Down Expand Up @@ -30,12 +33,18 @@
# The marker for last updated in the HTML
CAPACITY_MARKER_UPDATED = "Updated: "

# The path for group classes
CLASSES_PATH = "/fitness-centers/group-fitness-classes?&page="

# Days of the week used in the spreadsheet
DAYS_OF_WEEK = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

# Eastern Timezone
EASTERN_TIMEZONE = "America/New_York"

# The list of gyms
GYMS = ["Helen Newman", "Toni Morrison", "Noyes", "Teagle"]

# The path for general gym hours
GYM_HOUR_BASE_URL = "https://scl.cornell.edu/recreation/cornell-fitness-centers"

Expand Down

0 comments on commit ff15b38

Please sign in to comment.