-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add GBFS feeds to the database (#674)
- Loading branch information
Showing
21 changed files
with
689 additions
and
274 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import pandas as pd | ||
from sqlalchemy.orm import joinedload | ||
from database_gen.sqlacodegen_models import Gbfsfeed | ||
|
||
|
||
def generate_system_csv_from_db(df, db_session): | ||
"""Generate a DataFrame from the database with the same columns as the CSV file.""" | ||
stable_ids = "gbfs-" + df["System ID"] | ||
query = db_session.query(Gbfsfeed) | ||
query = query.filter(Gbfsfeed.stable_id.in_(stable_ids.to_list())) | ||
query = query.options( | ||
joinedload(Gbfsfeed.locations), joinedload(Gbfsfeed.gbfsversions), joinedload(Gbfsfeed.externalids) | ||
) | ||
feeds = query.all() | ||
data = [] | ||
for feed in feeds: | ||
system_id = feed.externalids[0].associated_id | ||
auto_discovery_url = feed.auto_discovery_url | ||
feed.gbfsversions.sort(key=lambda x: x.version, reverse=False) | ||
supported_versions = [version.version for version in feed.gbfsversions] | ||
data.append( | ||
{ | ||
"System ID": system_id, | ||
"Name": feed.operator, | ||
"URL": feed.operator_url, | ||
"Country Code": feed.locations[0].country_code, | ||
"Location": feed.locations[0].municipality, | ||
"Auto-Discovery URL": auto_discovery_url, | ||
"Supported Versions": " ; ".join(supported_versions), | ||
} | ||
) | ||
return pd.DataFrame(data) | ||
|
||
|
||
def compare_db_to_csv(df_from_db, df_from_csv, logger): | ||
"""Compare the database to the CSV file and return the differences.""" | ||
df_from_csv = df_from_csv[df_from_db.columns] | ||
df_from_db = df_from_db.fillna("") | ||
df_from_csv = df_from_csv.fillna("") | ||
|
||
if df_from_db.empty: | ||
logger.info("No data found in the database.") | ||
return None, None | ||
|
||
# Align both DataFrames by "System ID" | ||
df_from_db.set_index("System ID", inplace=True) | ||
df_from_csv.set_index("System ID", inplace=True) | ||
|
||
# Find rows that are in the CSV but not in the DB (new feeds) | ||
missing_in_db = df_from_csv[~df_from_csv.index.isin(df_from_db.index)] | ||
if not missing_in_db.empty: | ||
logger.info("New feeds found in CSV:") | ||
logger.info(missing_in_db) | ||
|
||
# Find rows that are in the DB but not in the CSV (deprecated feeds) | ||
missing_in_csv = df_from_db[~df_from_db.index.isin(df_from_csv.index)] | ||
if not missing_in_csv.empty: | ||
logger.info("Deprecated feeds found in DB:") | ||
logger.info(missing_in_csv) | ||
|
||
# Find rows that are in both, but with differences | ||
common_ids = df_from_db.index.intersection(df_from_csv.index) | ||
df_db_common = df_from_db.loc[common_ids] | ||
df_csv_common = df_from_csv.loc[common_ids] | ||
differences = df_db_common != df_csv_common | ||
differing_rows = df_db_common[differences.any(axis=1)] | ||
|
||
if not differing_rows.empty: | ||
logger.info("Rows with differences:") | ||
for idx in differing_rows.index: | ||
logger.info(f"Differences for System ID {idx}:") | ||
db_row = df_db_common.loc[idx] | ||
csv_row = df_csv_common.loc[idx] | ||
diff = db_row != csv_row | ||
logger.info(f"DB Row: {db_row[diff].to_dict()}") | ||
logger.info(f"CSV Row: {csv_row[diff].to_dict()}") | ||
logger.info(80 * "-") | ||
|
||
# Merge differing rows with missing_in_db to capture all new or updated feeds | ||
all_differing_or_new_rows = pd.concat([differing_rows, missing_in_db]).reset_index() | ||
|
||
return all_differing_or_new_rows, missing_in_csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
import requests | ||
|
||
|
||
def fetch_data(auto_discovery_url, logger, urls=[], fields=[]): | ||
"""Fetch data from the auto-discovery URL and return the specified fields.""" | ||
fetched_data = {} | ||
if not auto_discovery_url: | ||
return | ||
try: | ||
response = requests.get(auto_discovery_url) | ||
response.raise_for_status() | ||
data = response.json() | ||
for field in fields: | ||
fetched_data[field] = data.get(field) | ||
feeds = None | ||
for lang_code, lang_data in data.get("data", {}).items(): | ||
if isinstance(lang_data, list): | ||
lang_feeds = lang_data | ||
else: | ||
lang_feeds = lang_data.get("feeds", []) | ||
if lang_code == "en": | ||
feeds = lang_feeds | ||
break | ||
elif not feeds: | ||
feeds = lang_feeds | ||
logger.info(f"Feeds found from auto-discovery URL {auto_discovery_url}: {feeds}") | ||
if feeds: | ||
for url in urls: | ||
fetched_data[url] = get_field_url(feeds, url) | ||
return fetched_data | ||
except requests.RequestException as e: | ||
logger.error(f"Error fetching data for autodiscovery url {auto_discovery_url}: {e}") | ||
return fetched_data | ||
|
||
|
||
def get_data_content(url, logger): | ||
"""Utility function to fetch data content from a URL.""" | ||
try: | ||
if url: | ||
response = requests.get(url) | ||
response.raise_for_status() | ||
system_info = response.json().get("data", {}) | ||
return system_info | ||
except requests.RequestException as e: | ||
logger.error(f"Error fetching data content for url {url}: {e}") | ||
return None | ||
|
||
|
||
def get_field_url(fields, field_name): | ||
"""Utility function to get the URL of a specific feed by name.""" | ||
for field in fields: | ||
if field.get("name") == field_name: | ||
return field.get("url") | ||
return None | ||
|
||
|
||
def get_gbfs_versions(gbfs_versions_url, auto_discovery_url, auto_discovery_version, logger): | ||
"""Get the GBFS versions from the gbfs_versions_url.""" | ||
# Default version info extracted from auto-discovery url | ||
version_info = { | ||
"version": auto_discovery_version if auto_discovery_version else "1.0", | ||
"url": auto_discovery_url, | ||
} | ||
try: | ||
if not gbfs_versions_url: | ||
return [version_info] | ||
logger.info(f"Fetching GBFS versions from: {gbfs_versions_url}") | ||
data = get_data_content(gbfs_versions_url, logger) | ||
if not data: | ||
logger.warning(f"No data found in the GBFS versions URL -> {gbfs_versions_url}.") | ||
return [version_info] | ||
gbfs_versions = data.get("versions", []) | ||
|
||
# Append the version info from auto-discovery if it doesn't exist | ||
if not any(gv.get("version") == auto_discovery_version for gv in gbfs_versions): | ||
gbfs_versions.append(version_info) | ||
|
||
return gbfs_versions | ||
except Exception as e: | ||
logger.error(f"Error fetching version data: {e}") | ||
return [version_info] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
OFFICIAL_VERSIONS = [ | ||
"1.0", | ||
"1.1-RC", | ||
"1.1", | ||
"2.0-RC", | ||
"2.0", | ||
"2.1-RC", | ||
"2.1-RC2", | ||
"2.1", | ||
"2.2-RC", | ||
"2.2", | ||
"2.3-RC", | ||
"2.3-RC2", | ||
"2.3", | ||
"3.0-RC", | ||
"3.0-RC2", | ||
"3.0", | ||
"3.1-RC", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
LICENSE_URL_MAP = { | ||
"CC0-1.0": "https://creativecommons.org/publicdomain/zero/1.0/", | ||
"CC-BY-4.0": "https://creativecommons.org/licenses/by/4.0/", | ||
"CDLA-Permissive-1.0": "https://cdla.io/permissive-1-0/", | ||
"ODC-By-1.0": "https://www.opendatacommons.org/licenses/by/1.0/", | ||
} | ||
|
||
DEFAULT_LICENSE_URL = "https://creativecommons.org/licenses/by/4.0/" | ||
|
||
|
||
def get_license_url(system_info, logger): | ||
"""Get the license URL from the system information.""" | ||
try: | ||
if system_info is None: | ||
return None | ||
|
||
# Fetching license_url or license_id | ||
license_url = system_info.get("license_url") | ||
if not license_url: | ||
license_id = system_info.get("license_id") | ||
if license_id: | ||
return LICENSE_URL_MAP.get(license_id, DEFAULT_LICENSE_URL) | ||
return DEFAULT_LICENSE_URL | ||
return license_url | ||
except Exception as e: | ||
logger.error(f"Error fetching license url data from system info {system_info}: \n{e}") | ||
return None |
Oops, something went wrong.