Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed capacity scraper to scrape from new URL #182

Merged
merged 2 commits into from
Jan 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions src/scrapers/capacities_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from src.models.capacity import Capacity
from src.utils.constants import (
C2C_URL,
CRC_URL_NEW,
CAPACITY_MARKER_COUNTS,
CAPACITY_MARKER_NAMES,
CAPACITY_MARKER_UPDATED,
Expand All @@ -14,8 +15,8 @@
)
from src.utils.utils import get_facility_id, unix_time


def fetch_capacities():
# Legacy scraper from old webpage using CRC_URL
def fetch_capacities_old():
"""
Fetch capacities for all facilities from Connect2Concepts.
"""
Expand Down Expand Up @@ -49,6 +50,44 @@ def fetch_capacities():
# Add to sheets
add_single_capacity(count, facility_id, percent, updated)

# New scraper from new API using CRC_URL_NEW
def fetch_capacities():
"""Fetch capacities from the new JSON API endpoint."""
try:
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0"
}

response = requests.get(CRC_URL_NEW, headers=headers)
facilities = response.json()

for facility in facilities:
try:
facility_name = facility["LocationName"]

# Map API name to database name
if facility_name not in CAPACITY_MARKER_NAMES:
print(f"Warning: No name mapping for facility: {facility_name}")
continue

db_name = CAPACITY_MARKER_NAMES[facility_name]
facility_id = get_facility_id(db_name)

count = int(facility["LastCount"])
updated_str = facility["LastUpdatedDateAndTime"]
total_capacity = int(facility["TotalCapacity"])

percent = count / total_capacity if total_capacity > 0 else 0.0
updated = datetime.strptime(updated_str.split(".")[0], "%Y-%m-%dT%H:%M:%S")

add_single_capacity(count, facility_id, percent, updated)

except Exception as e:
print(f"Error processing facility {facility.get('LocationName', 'unknown')}: {str(e)}")

except Exception as e:
print(f"Error fetching capacities: {str(e)}")
raise

def add_single_capacity(count, facility_id, percent, updated):
"""
Expand Down
4 changes: 3 additions & 1 deletion src/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
# Base URL for Cornell Recreation Website
BASE_URL = "https://scl.cornell.edu/recreation/"

# The path for capacities
# The old path for capacities
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it necessary to leave the old path?

C2C_URL = "https://connect2concepts.com/connect2/?type=bar&key=355de24d-d0e4-4262-ae97-bc0c78b92839&loc_status=false"
# The new path for capacities
CRC_URL_NEW = "https://goboardapi.azurewebsites.net/api/FacilityCount/GetCountsByAccount?AccountAPIKey=355de24d-d0e4-4262-ae97-bc0c78b92839"

# The marker for counts in the HTML
CAPACITY_MARKER_COUNTS = "Last Count: "
Expand Down
Loading