From a1e981570a28d32b51fccc9b1c05caa179f6e3ac Mon Sep 17 00:00:00 2001 From: Pablo Rosado Date: Thu, 12 Dec 2024 16:59:49 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20Create=20wizard=20page=20on=20da?= =?UTF-8?q?ta=20producer=20analytics=20(#3711)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🎉 Create wizard page on data producer analytics * Add pandas-gbq * Create wizard page for producer analytics (WIP) * Improve layout * Improve tables layout * Make URLs clickable * Simplify code * Let user enter minimum and maximum dates * Improve format * Add summary to copy paste * Generalize function to get grapher views * Add chart displaying daily views * Refactor and improvements * Minor improvements * Add 'exclude_steps' argument to VersionTracker * Add option to exclude auxiliary steps like population * Minor improvements * Improve formatting and remove unnecessary function * change title emoji, reduce verbosity * abstract function, typos, improve spinner message * re-structure * feature: align vertically * clarify instructions, add minor docstrings * clarify spinner msg --------- Co-authored-by: lucasrodes --- apps/wizard/app_pages/producer_analytics.py | 564 ++++++++++++++++++++ apps/wizard/config/config.yml | 7 + apps/wizard/utils/components.py | 27 +- etl/version_tracker.py | 46 +- pyproject.toml | 9 +- uv.lock | 197 +++++-- 6 files changed, 776 insertions(+), 74 deletions(-) create mode 100644 apps/wizard/app_pages/producer_analytics.py diff --git a/apps/wizard/app_pages/producer_analytics.py b/apps/wizard/app_pages/producer_analytics.py new file mode 100644 index 00000000000..3be62be9165 --- /dev/null +++ b/apps/wizard/app_pages/producer_analytics.py @@ -0,0 +1,564 @@ +from datetime import datetime, timedelta +from typing import Optional, cast + +import owid.catalog.processing as pr +import pandas as pd +import plotly.express as px +import streamlit as st +from pandas_gbq import read_gbq +from st_aggrid import AgGrid, GridUpdateMode, JsCode +from st_aggrid.grid_options_builder import GridOptionsBuilder +from structlog import get_logger + +from apps.wizard.utils.components import st_horizontal +from etl.snapshot import Snapshot +from etl.version_tracker import VersionTracker + +# Initialize log. +log = get_logger() + +# Define constants. +TODAY = datetime.today() +# Date when the new views metric started to be recorded. +MIN_DATE = datetime.strptime("2024-11-01", "%Y-%m-%d") +GRAPHERS_BASE_URL = "https://ourworldindata.org/grapher/" +# List of auxiliary steps to be (optionally) excluded from the DAG. +# It may be convenient to ignore these steps because the analytics are heavily affected by a few producers (e.g. those that are involved in the population and income groups datasets). +AUXILIARY_STEPS = [ + "data://garden/demography/.*/population", + # Primary energy consumption is loaded by GCB. + "data://garden/energy/.*/primary_energy_consumption", + "data://garden/ggdc/.*/maddison_project_database", + "data://garden/wb/.*/income_groups", +] + +# PAGE CONFIG +st.set_page_config( + page_title="Wizard: Producer analytics", + layout="wide", + page_icon="🪄", +) + + +######################################################################################################################## +# FUNCTIONS & GLOBAL VARS +######################################################################################################################## +def columns_producer(min_date, max_date): + # Define columns to be shown. + cols_prod = { + "producer": { + "headerName": "Producer", + "headerTooltip": "Name of the producer. This is NOT the name of the dataset.", + }, + "n_charts": { + "headerName": "Charts", + "headerTooltip": "Number of charts using data from a producer.", + }, + "renders_custom": { + "headerName": "Views in custom range", + "headerTooltip": f"Number of renders between {min_date} and {max_date}.", + }, + "renders_365d": { + "headerName": "Views 365 days", + "headerTooltip": "Number of renders in the last 365 days.", + }, + "renders_30d": { + "headerName": "Views 30 days", + "headerTooltip": "Number of renders in the last 30 days.", + }, + } + return cols_prod + + +@st.cache_data(show_spinner=False) +def get_grapher_views( + date_start: str = MIN_DATE.strftime("%Y-%m-%d"), + date_end: str = TODAY.strftime("%Y-%m-%d"), + groupby: Optional[list[str]] = None, + grapher_urls: Optional[list[str]] = None, +) -> pd.DataFrame: + grapher_filter = "" + if grapher_urls: + # If a list of grapher URLs is given, consider only those. + grapher_urls_formatted = ", ".join(f"'{url}'" for url in grapher_urls) + grapher_filter = f"AND grapher IN ({grapher_urls_formatted})" + else: + # If no list is given, consider all grapher URLs. + grapher_filter = f"AND grapher LIKE '{GRAPHERS_BASE_URL}%'" + + if not groupby: + # If a groupby list is not given, assume the simplest case, which gives total views for each grapher. + groupby = ["grapher"] + + # Prepare the query. + groupby_clause = ", ".join(groupby) + select_clause = f"{groupby_clause}, SUM(events) AS renders" + query = f""" + SELECT + {select_clause} + FROM prod_google_analytics4.grapher_views_by_day_page_grapher_device_country_iframe + WHERE + day >= '{date_start}' + AND day <= '{date_end}' + {grapher_filter} + GROUP BY {groupby_clause} + ORDER BY {groupby_clause} + """ + + # Execute the query. + df_views = read_gbq(query, project_id="owid-analytics") + + return cast(pd.DataFrame, df_views) + + +@st.cache_data(show_spinner=False) +def get_chart_renders(min_date: str, max_date: str) -> pd.DataFrame: + # List ranges of dates to fetch views. + date_ranges = { + "renders_365d": ((TODAY - timedelta(days=365)).strftime("%Y-%m-%d"), TODAY.strftime("%Y-%m-%d")), + "renders_30d": ((TODAY - timedelta(days=30)).strftime("%Y-%m-%d"), TODAY.strftime("%Y-%m-%d")), + "renders_custom": (min_date, max_date), # Use user-defined date range. + } + + # Get analytics for those ranges, for all grapher URLs. + list_renders = [ + get_grapher_views(date_start=date_start, date_end=date_end, grapher_urls=None, groupby=["grapher"]).rename( + columns={"renders": column_name} + ) + for column_name, (date_start, date_end) in date_ranges.items() + ] + + # Merge all dataframes. + df_renders = pr.multi_merge(list_renders, on="grapher", how="outer") # type: ignore + + return df_renders + + +@st.cache_data(show_spinner=False) +def load_steps_df(excluded_steps) -> pd.DataFrame: + # Load steps dataframe. + steps_df = VersionTracker(exclude_steps=excluded_steps).steps_df + + return steps_df + + +@st.cache_data(show_spinner=False) +def load_steps_df_with_producer_data(excluded_steps) -> pd.DataFrame: + # Load steps dataframe. + # st.toast("⌛ Loading data from VersionTracker...") + steps_df = load_steps_df(excluded_steps=excluded_steps) + + # st.toast("⌛ Processing VersionTracker data...") + # Select only active snapshots. + df = steps_df[(steps_df["channel"] == "snapshot") & (steps_df["state"] == "active")].reset_index(drop=True) + + # Select only relevant columns. + df = df[["step", "all_chart_slugs"]] + + # Add a column of producer to steps df (where possible). + for i, row in df.iterrows(): + snap_uri = row["step"].split("snapshot://" if "snapshot://" in row["step"] else "snapshot-private://")[1] + snap = Snapshot(snap_uri) + origin = snap.metadata.origin + if (origin is not None) and (snap.metadata.namespace not in ["dummy"]): + producer = snap.metadata.origin.producer # type: ignore + df.loc[i, "producer"] = producer + + # Select only relevant columns. + df = df[["producer", "all_chart_slugs"]] + + # Remove rows with no producer. + df = df.dropna(subset=["producer"]).reset_index(drop=True) + + # Ignore the chart id, and keep only the slug. + df["all_chart_slugs"] = [sorted(set([slug for _, slug in id_slug])) for id_slug in df["all_chart_slugs"]] + + # Create a row for each producer-slug pair. Fill with "" (in cases where the producer has no charts). + df_expanded = df.explode("all_chart_slugs") + + # Remove duplicates. + # NOTE: This happens because df contains one row per snapshot. Some grapher datasets come from a combination of multiple snapshots (often from the same producer). We want to count producer-chart pairs only once. + df_expanded = df_expanded.drop_duplicates(subset=["producer", "all_chart_slugs"]).reset_index(drop=True) + + # Add a column for grapher URL. + df_expanded["grapher"] = GRAPHERS_BASE_URL + df_expanded["all_chart_slugs"] + + return df_expanded + + +@st.cache_data(show_spinner=False) +def get_producer_charts_analytics(min_date, max_date, excluded_steps): + # Get chart renders using user-defined date range for "renders_custom". + # st.toast("⌛ Getting analytics on chart renders...") + df_renders = get_chart_renders(min_date=min_date, max_date=max_date) + + # Load the steps dataframe with producer data. + df_expanded = load_steps_df_with_producer_data(excluded_steps=excluded_steps) + + # Add columns with the numbers of chart renders. + df_expanded = df_expanded.merge(df_renders, on="grapher", how="left").drop(columns=["all_chart_slugs"]) + + return df_expanded + + +@st.cache_data(show_spinner=False) +def get_producer_analytics_per_chart(min_date, max_date, excluded_steps): + # Load the steps dataframe with producer data and analytics. + df_expanded = get_producer_charts_analytics(min_date=min_date, max_date=max_date, excluded_steps=excluded_steps) + + # Create an expanded table with number of views per chart. + df_renders_per_chart = df_expanded.dropna(subset=["grapher"]).fillna(0).reset_index(drop=True) + df_renders_per_chart = df_renders_per_chart.sort_values("renders_custom", ascending=False).reset_index(drop=True) + + return df_renders_per_chart + + +@st.cache_data(show_spinner=False) +def get_producer_analytics_per_producer(min_date, max_date, excluded_steps): + # Load the steps dataframe with producer data and analytics. + df_expanded = get_producer_charts_analytics(min_date=min_date, max_date=max_date, excluded_steps=excluded_steps) + + # st.toast("⌛ Adapting the data for presentation...") + # Group by producer and get the full list of chart slugs for each producer. + df_grouped = df_expanded.groupby("producer", observed=True, as_index=False).agg( + { + "grapher": lambda x: [item for item in x if pd.notna(item)], # Filter out NaN values + "renders_365d": "sum", + "renders_30d": "sum", + "renders_custom": "sum", + } + ) + df_grouped["n_charts"] = df_grouped["grapher"].apply(len) + + # Check if lists are unique. If not, make them unique in the previous line. + error = "Duplicated chart slugs found for a given producer." + assert df_grouped["grapher"].apply(lambda x: len(x) == len(set(x))).all(), error + + # Drop unnecessary columns. + df_grouped = df_grouped.drop(columns=["grapher"]) + + # Sort conveniently. + df_grouped = df_grouped.sort_values(["renders_custom"], ascending=False).reset_index(drop=True) + + return df_grouped + + +def show_producers_grid(df_producers, min_date, max_date): + """Show table with producers analytics.""" + gb = GridOptionsBuilder.from_dataframe(df_producers) + gb.configure_grid_options(domLayout="autoHeight", enableCellTextSelection=True) + gb.configure_selection( + selection_mode="multiple", + use_checkbox=True, + rowMultiSelectWithClick=True, + suppressRowDeselection=False, + groupSelectsChildren=True, + groupSelectsFiltered=True, + ) + gb.configure_default_column(editable=False, groupable=True, sortable=True, filterable=True, resizable=True) + + # Enable column auto-sizing for the grid. + gb.configure_grid_options(suppressSizeToFit=False) # Allows dynamic resizing to fit. + gb.configure_default_column(autoSizeColumns=True) # Ensures all columns can auto-size. + + # Configure individual columns with specific settings. + COLUMNS_PRODUCERS = columns_producer(min_date, max_date) + for column in COLUMNS_PRODUCERS: + gb.configure_column(column, **COLUMNS_PRODUCERS[column]) + # Configure pagination with dynamic page size. + gb.configure_pagination(paginationAutoPageSize=False, paginationPageSize=20) + # Build the grid options. + grid_options = gb.build() + # Custom CSS to ensure the table stretches across the page. + custom_css = { + ".ag-theme-streamlit": { + "max-width": "100% !important", + "width": "100% !important", + "margin": "0 auto !important", # Centers the grid horizontally. + }, + } + # Display the grid table with the updated grid options. + grid_response = AgGrid( + data=df_producers, + gridOptions=grid_options, + height=1000, + width="100%", + update_mode=GridUpdateMode.MODEL_CHANGED, + fit_columns_on_grid_load=True, # Automatically adjust columns when the grid loads. + allow_unsafe_jscode=True, + theme="streamlit", + custom_css=custom_css, + # excel_export_mode=ExcelExportMode.MANUAL, # Doesn't work? + ) + + # Get the selected producers from the first table. + producers_selected = [row["producer"] for row in grid_response["selected_rows"]] + + return producers_selected + + +def plot_chart_analytics(df): + """Show chart with analytics on producer's charts.""" + # Get total daily views of selected producers. + grapher_urls_selected = df["grapher"].unique().tolist() # type: ignore + df_total_daily_views = get_grapher_views( + date_start=min_date, date_end=max_date, groupby=["day"], grapher_urls=grapher_urls_selected + ) + + # Get daily views of the top 10 charts. + grapher_urls_top_10 = ( + df.sort_values("renders_custom", ascending=False)["grapher"].unique().tolist()[0:10] # type: ignore + ) + df_top_10_daily_views = get_grapher_views( + date_start=min_date, date_end=max_date, groupby=["day", "grapher"], grapher_urls=grapher_urls_top_10 + ) + + # Get total number of views and average daily views. + total_views = df_total_daily_views["renders"].sum() + average_daily_views = df_total_daily_views["renders"].mean() + # Get total views of the top 10 charts in the selected date range. + df_top_10_total_views = df_top_10_daily_views.groupby("grapher", as_index=False).agg({"renders": "sum"}) + + # Create a line chart. + df_plot = pd.concat([df_total_daily_views.assign(**{"grapher": "Total"}), df_top_10_daily_views]).rename( + columns={"grapher": "Chart slug"} + ) + df_plot["Chart slug"] = df_plot["Chart slug"].apply(lambda x: x.split("/")[-1]) + df_plot["day"] = pd.to_datetime(df_plot["day"]).dt.strftime("%a. %Y-%m-%d") + fig = px.line( + df_plot, + x="day", + y="renders", + color="Chart slug", + title="Total daily views and views of top 10 charts", + ).update_layout(xaxis_title=None, yaxis_title=None) + + # Display the chart. + st.plotly_chart(fig, use_container_width=True) + + return total_views, average_daily_views, df_top_10_total_views + + +def show_producer_charts_grid(df): + """Show table with analytics on producer's charts.""" + # Configure and display the second table. + gb2 = GridOptionsBuilder.from_dataframe(df) + gb2.configure_grid_options(domLayout="autoHeight", enableCellTextSelection=True) + gb2.configure_default_column(editable=False, groupable=True, sortable=True, filterable=True, resizable=True) + + # Create a JavaScript renderer for clickable slugs. + grapher_slug_jscode = JsCode( + r""" + class UrlCellRenderer { + init(params) { + this.eGui = document.createElement('a'); + if (params.value) { + // Extract the slug from the full URL. + const url = new URL(params.value); + const slug = url.pathname.split('/').pop(); // Get the last part of the path as the slug. + this.eGui.innerText = slug; + this.eGui.setAttribute('href', params.value); + } else { + this.eGui.innerText = ''; + } + this.eGui.setAttribute('style', "text-decoration:none; color:blue"); + this.eGui.setAttribute('target', "_blank"); + } + getGui() { + return this.eGui; + } + } + """ + ) + + # Define columns to be shown, including the cell renderer for "grapher". + COLUMNS_PRODUCERS = columns_producer(min_date, max_date) + COLUMNS_PRODUCER_CHARTS = { + column: ( + { + "headerName": "Chart URL", + "headerTooltip": "URL of the chart in the grapher.", + "cellRenderer": grapher_slug_jscode, + } + if column == "grapher" + else COLUMNS_PRODUCERS[column] + ) + for column in ["producer", "renders_custom", "renders_365d", "renders_30d", "grapher"] + } + # Configure and display the second table. + gb2 = GridOptionsBuilder.from_dataframe(df) + gb2.configure_grid_options(domLayout="autoHeight", enableCellTextSelection=True) + gb2.configure_default_column(editable=False, groupable=True, sortable=True, filterable=True, resizable=True) + + # Apply column configurations directly from the dictionary. + for column, config in COLUMNS_PRODUCER_CHARTS.items(): + gb2.configure_column(column, **config) + + # Configure pagination with dynamic page size. + gb2.configure_pagination(paginationAutoPageSize=False, paginationPageSize=20) + grid_options2 = gb2.build() + + # Display the grid. + AgGrid( + data=df, + gridOptions=grid_options2, + height=500, + width="100%", + fit_columns_on_grid_load=True, + allow_unsafe_jscode=True, + theme="streamlit", + # excel_export_mode=ExcelExportMode.MANUAL, # Doesn't work? + ) + + +def prepare_summary( + df_top_10_total_views, producers_selected, total_views, average_daily_views, min_date, max_date +) -> str: + """Prepare summary at the end of the app.""" + # Prepare the total number of views. + total_views_str = f"{total_views:9,}" + # Prepare the average daily views. + average_views_str = f"{round(average_daily_views):9,}" + # Prepare a summary of the top 10 charts to be copy-pasted. + if len(producers_selected) == 0: + producers_selected_str = "all producers" + elif len(producers_selected) == 1: + producers_selected_str = producers_selected[0] + else: + producers_selected_str = ", ".join(producers_selected[:-1]) + " and " + producers_selected[-1] + # NOTE: I tried .to_string() and .to_markdown() and couldn't find a way to keep a meaningful format. + df_summary_str = "" + for _, row in df_top_10_total_views.sort_values("renders", ascending=False).iterrows(): + df_summary_str += f"{row['renders']:9,}" + " - " + row["grapher"] + "\n" + + # Define the content to copy. + summary = f"""\ +Analytics of charts using data by {producers_selected_str} between {min_date} and {max_date}: +- Total number of chart views: {total_views_str} +- Average daily chart views: {average_views_str} +- Views of top performing charts: +{df_summary_str} + + """ + return summary + + +######################################################################################################################## +# RENDER +######################################################################################################################## + +# Streamlit app layout. +st.title(":material/bar_chart: Producer analytics") +st.markdown("Explore analytics of data producers.") + +# SEARCH BOX +with st.container(border=True): + st.markdown( + f"Select a custom date range (note that this metric started to be recorded on {MIN_DATE.strftime('%Y-%m-%d')})." + ) + + with st_horizontal(vertical_alignment="center"): + # Create input fields for minimum and maximum dates. + min_date = st.date_input( + "Select minimum date", + value=MIN_DATE, + key="min_date", + format="YYYY-MM-DD", + ).strftime( # type: ignore + "%Y-%m-%d" + ) + max_date = st.date_input( + "Select maximum date", + value=TODAY, + key="max_date", + format="YYYY-MM-DD", + ).strftime( # type: ignore + "%Y-%m-%d" + ) + exclude_auxiliary_steps = st.checkbox( + "Exclude auxiliary steps (e.g. population)", + False, + help="Exclude steps that are commonly used as auxiliary data, so they do not skew the analytics in favor of a few producers. But note that this will exclude all uses of these steps, even when they are the main datasets (not auxiliary). Auxiliary steps are:\n- " + + "\n- ".join(sorted(f"`{s}`" for s in AUXILIARY_STEPS)), + ) + +if exclude_auxiliary_steps: + # If the user wants to exclude auxiliary steps, take the default list of excluded steps. + excluded_steps = AUXILIARY_STEPS +else: + # Otherwise, do not exclude any steps. + excluded_steps = [] + +######################################################################################################################## +# 1/ PRODUCER ANALYTICS: Display main table, with analytics per producer. +# Allow the user to select a subset of producers. +######################################################################################################################## +st.header("Analytics by producer") +st.markdown( + "Total number of charts and chart views for each producer. Producers selected in this table will be used to filter the producer-charts table below." +) + +# Load table content and select only columns to be shown. +with st.spinner("Loading producer data. We are accessing various databases. This can take few seconds..."): + df_producers = get_producer_analytics_per_producer( + min_date=min_date, max_date=max_date, excluded_steps=excluded_steps + ) + +# Prepare and display the grid table with producer analytics. +producers_selected = show_producers_grid( + df_producers=df_producers, + min_date=min_date, + max_date=max_date, +) + +######################################################################################################################## +# 2/ CHART ANALYTICS: Display a chart with the total number of daily views, and the daily views of the top performing charts. +######################################################################################################################## +st.header("Analytics by chart") +st.markdown("Number of views for each chart that uses data by the selected producers.") + +# Load detailed analytics per producer-chart. +with st.spinner("Loading chart data. This can take few seconds..."): + df_producer_charts = get_producer_analytics_per_chart( + min_date=min_date, max_date=max_date, excluded_steps=excluded_steps + ) + +# Get the selected producers from the first table. +if len(producers_selected) == 0: + # If no producers are selected, show all producer-charts. + df_producer_charts_filtered = df_producer_charts +else: + # Filter producer-charts by selected producers. + df_producer_charts_filtered = df_producer_charts[df_producer_charts["producer"].isin(producers_selected)] + +# Show chart with chart analytics, and get some summary data. +total_views, average_daily_views, df_top_10_total_views = plot_chart_analytics(df_producer_charts_filtered) + +# Show table +show_producer_charts_grid(df_producer_charts_filtered) + +######################################################################################################################## +# 3/ SUMMARY: Display a summary to be shared with the data producer. +######################################################################################################################## + +# Prepare the summary to be copy-pasted. +summary = prepare_summary( + df_top_10_total_views=df_top_10_total_views, + producers_selected=producers_selected, + total_views=total_views, + average_daily_views=average_daily_views, + min_date=min_date, + max_date=max_date, +) + +# Display the content. +st.markdown( + """## Summary for data producers + +For now, to share analytics with a data producer you can so any of the following: +- **Table export**: Right-click on a cell in the above's table and export as a CSV or Excel file. +- **Chart export**: Click on the camera icon on the top right of the chart to download the chart as a PNG. +- **Copy summary**: Click on the upper right corner of the box below to copy the summary to the clipboard. +""" +) +st.code(summary, language="text") diff --git a/apps/wizard/config/config.yml b/apps/wizard/config/config.yml index f00f6090fc0..ca80284a32b 100644 --- a/apps/wizard/config/config.yml +++ b/apps/wizard/config/config.yml @@ -151,6 +151,13 @@ sections: entrypoint: app_pages/dataset_explorer.py icon: ":material/search:" image_url: "https://upload.wikimedia.org/wikipedia/commons/c/c3/NGC_4414_%28NASA-med%29.jpg" + - title: "Producer analytics" + alias: producer-analytics + description: "Extract analytics to share with data producers" + maintainer: "@pablo" + entrypoint: app_pages/producer_analytics.py + icon: ":material/developer_board:" + image_url: "https://upload.wikimedia.org/wikipedia/commons/thumb/c/ce/Wikimedia_Product_Analytics_team_logo.svg/512px-Wikimedia_Product_Analytics_team_logo.svg.png" - title: "Explorers" description: |- diff --git a/apps/wizard/utils/components.py b/apps/wizard/utils/components.py index c2934548512..c82b4d84cc4 100644 --- a/apps/wizard/utils/components.py +++ b/apps/wizard/utils/components.py @@ -13,42 +13,43 @@ HORIZONTAL_STYLE = """ """ @contextmanager -def st_horizontal(): - st.markdown(HORIZONTAL_STYLE, unsafe_allow_html=True) +def st_horizontal(vertical_alignment="baseline"): + h_style = HORIZONTAL_STYLE.format(vertical_alignment=vertical_alignment) + st.markdown(h_style, unsafe_allow_html=True) with st.container(): st.markdown('', unsafe_allow_html=True) yield diff --git a/etl/version_tracker.py b/etl/version_tracker.py index f75f9f8bbc0..20a78df7c75 100644 --- a/etl/version_tracker.py +++ b/etl/version_tracker.py @@ -1,3 +1,4 @@ +import re from datetime import datetime from enum import Enum from pathlib import Path @@ -259,6 +260,31 @@ def _recursive_get_all_step_dependencies_ndim( return dependencies, memo +def remove_steps_from_dag(dag: dict, exclude: list[str]) -> dict: + """ + Remove specific steps (either active steps or dependencies) from the DAG. + + This can be useful to ignore auxiliary datasets. The excluded steps can have wildcards: + exclude = [ + "data://garden/demography/.*/population", + "data://garden/wb/.*/income_groups", + ] + """ + + # Check if a step matches any exclude pattern. + def is_excluded(step: str) -> bool: + return any(pattern.match(step) for pattern in [re.compile(pattern) for pattern in exclude]) + + # Filter out steps and dependencies that match any element from the excluded list. + dag_filtered = { + step: {dep for dep in dependencies if not is_excluded(dep)} + for step, dependencies in dag.items() + if not is_excluded(step) + } + + return dag_filtered + + class VersionTracker: """Helper object that loads the dag, provides useful functions to check for versions and dataset dependencies, and checks for inconsistencies. @@ -299,6 +325,7 @@ def __init__( warn_on_archivable: bool = True, warn_on_unused: bool = True, ignore_archive: bool = False, + exclude_steps: Optional[list[str]] = None, ): # Load dag of active steps (a dictionary step: set of dependencies). self.dag_active = load_dag(paths.DAG_FILE) @@ -308,6 +335,13 @@ def __init__( else: # Load dag of active and archive steps. self.dag_all = load_dag(paths.DAG_ARCHIVE_FILE) + + # Optionally exclude certain steps and dependencies. + self.exclude_steps = exclude_steps + if self.exclude_steps: + self.dag_active = remove_steps_from_dag(self.dag_active, self.exclude_steps) + self.dag_all = remove_steps_from_dag(self.dag_all, self.exclude_steps) + # Create a reverse dag (a dictionary where each item is step: set of usages). self.dag_all_reverse = reverse_graph(graph=self.dag_all) # Create a reverse dag (a dictionary where each item is step: set of usages) of active steps. @@ -621,14 +655,14 @@ def _days_since_step_creation(version): ] = UpdateState.ARCHIVABLE.value # There are special steps that, even though they are archivable or unused, we want to keep in the active dag. - steps_active_df.loc[steps_active_df["step"].isin(self.ARCHIVABLE_STEPS_TO_KEEP), "update_state"] = ( - UpdateState.UP_TO_DATE.value - ) + steps_active_df.loc[ + steps_active_df["step"].isin(self.ARCHIVABLE_STEPS_TO_KEEP), "update_state" + ] = UpdateState.UP_TO_DATE.value # All explorers and external steps should be considered up to date. - steps_active_df.loc[steps_active_df["channel"].isin(["explorers", "external"]), "update_state"] = ( - UpdateState.UP_TO_DATE.value - ) + steps_active_df.loc[ + steps_active_df["channel"].isin(["explorers", "external"]), "update_state" + ] = UpdateState.UP_TO_DATE.value # Add update state to archived steps. steps_inactive_df["update_state"] = UpdateState.ARCHIVED.value diff --git a/pyproject.toml b/pyproject.toml index 3e524a1b5ca..ca6c8ecde91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,6 @@ dependencies = [ "scikit-learn>=1.5.2", "geopy>=2.4.1", "py7zr>=0.22.0", - "pyreadr>=0.5.2", ] [tool.uv.sources] @@ -107,7 +106,7 @@ dev-dependencies = [ # unpinning those would introduce tons of type errors "pyright==1.1.373", "pandas-stubs==1.2.0.62", - "ruff>=0.8.2", + "ruff==0.1.6", "ipdb>=0.13.13", ] @@ -135,6 +134,7 @@ wizard = [ "torch<2.3.0", "sentence-transformers>=2.2.2", "moviepy>=2.1.1", + "pandas-gbq>=0.25.0", ] [project.scripts] @@ -147,17 +147,16 @@ compare = 'etl.compare:cli' backport = 'apps.backport.backport:backport_cli' [tool.ruff] -lint.extend-select = [ +extend-select = [ # isort "I" ] -lint.ignore = ["E501"] +ignore = ["E501"] line-length = 120 target-version = "py310" extend-exclude = [ ".ipynb_checkpoints", "*cookiecutter", - "*.ipynb", ] [build-system] diff --git a/uv.lock b/uv.lock index ced25ca0fd1..2f4ecd7dbac 100644 --- a/uv.lock +++ b/uv.lock @@ -796,6 +796,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 }, ] +[[package]] +name = "db-dtypes" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "pyarrow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/80/00d501391fc5450c37b5fe0c25596e629a9fa2868fe4f194d69c5622210e/db_dtypes-1.3.1.tar.gz", hash = "sha256:a058f05dab100891f3e76a7a3db9ad0f107f18dd3d1bdd13680749a2f07eae77", size = 32492 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/e0/2eeacbe14b5a3a3b4ad5fd2c1e7959bd2201e4daeba851579fa78e5eccb1/db_dtypes-1.3.1-py2.py3-none-any.whl", hash = "sha256:fbc9d1740d94aaf2b5ae24601cfc875a69b4635bb9d049e3c3036e9f10203af8", size = 18742 }, +] + [[package]] name = "debugpy" version = "1.8.5" @@ -938,7 +953,6 @@ dependencies = [ { name = "pyhumps" }, { name = "pymysql" }, { name = "pypdf2" }, - { name = "pyreadr" }, { name = "python-docx" }, { name = "python-dotenv" }, { name = "pyyaml" }, @@ -979,6 +993,7 @@ api = [ wizard = [ { name = "geographiclib" }, { name = "moviepy" }, + { name = "pandas-gbq" }, { name = "plotly" }, { name = "pyproj" }, { name = "sentence-transformers" }, @@ -1053,6 +1068,7 @@ requires-dist = [ { name = "owid-datautils", editable = "lib/datautils" }, { name = "owid-repack", editable = "lib/repack" }, { name = "pandas", specifier = "==2.2.3" }, + { name = "pandas-gbq", marker = "extra == 'wizard'", specifier = ">=0.25.0" }, { name = "papermill", specifier = ">=2.3.3" }, { name = "pdfplumber", specifier = ">=0.9.0" }, { name = "plotly", marker = "extra == 'wizard'", specifier = ">=5.23.0" }, @@ -1063,7 +1079,6 @@ requires-dist = [ { name = "pymysql", specifier = ">=1.1.1" }, { name = "pypdf2", specifier = ">=2.11.1" }, { name = "pyproj", marker = "extra == 'wizard'", specifier = ">=3.6.1" }, - { name = "pyreadr", specifier = ">=0.5.2" }, { name = "python-docx", specifier = ">=1.1.2" }, { name = "python-dotenv", specifier = ">=0.19.0" }, { name = "pyyaml", specifier = ">=6.0.1" }, @@ -1132,7 +1147,7 @@ dev = [ { name = "pyright", specifier = "==1.1.373" }, { name = "pytest", specifier = ">=8.3.2" }, { name = "pyyaml", specifier = ">=6.0.2" }, - { name = "ruff", specifier = ">=0.8.2" }, + { name = "ruff", specifier = "==0.1.6" }, { name = "sqlacodegen", git = "https://github.com/agronholm/sqlacodegen.git" }, { name = "types-pyyaml", specifier = ">=6.0.12.20240808" }, { name = "watchdog", specifier = ">=4.0.1" }, @@ -1496,6 +1511,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/53/2e340a6ed897fa2bdd6c1bf166b98c047fbb648463dfd2b209ca7d501984/google_api_core-2.19.2-py3-none-any.whl", hash = "sha256:53ec0258f2837dd53bbd3d3df50f5359281b3cc13f800c941dd15a9b5a415af4", size = 139427 }, ] +[package.optional-dependencies] +grpc = [ + { name = "grpcio" }, + { name = "grpcio-status" }, +] + [[package]] name = "google-api-python-client" version = "2.145.0" @@ -1552,6 +1573,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1a/8e/22a28dfbd218033e4eeaf3a0533b2b54852b6530da0c0fe934f0cc494b29/google_auth_oauthlib-1.2.1-py2.py3-none-any.whl", hash = "sha256:2d58a27262d55aa1b87678c3ba7142a080098cbc2024f903c62355deb235d91f", size = 24930 }, ] +[[package]] +name = "google-cloud-bigquery" +version = "3.27.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core", extra = ["grpc"] }, + { name = "google-auth" }, + { name = "google-cloud-core" }, + { name = "google-resumable-media" }, + { name = "packaging" }, + { name = "python-dateutil" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/05/633ce6686b1fed2cd364fa4698bfa6d586263cd4795d012584f8097061e1/google_cloud_bigquery-3.27.0.tar.gz", hash = "sha256:379c524054d7b090fa56d0c22662cc6e6458a6229b6754c0e7177e3a73421d2c", size = 456964 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/40/4b11a4a8839de8ce802a3ccd60b34e70ce10d13d434a560534ba98f0ea3f/google_cloud_bigquery-3.27.0-py2.py3-none-any.whl", hash = "sha256:b53b0431e5ba362976a4cd8acce72194b4116cdf8115030c7b339b884603fcc3", size = 240100 }, +] + [[package]] name = "google-cloud-core" version = "2.4.1" @@ -1667,6 +1706,55 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/20/89674b7d62a19138b3352f6080f2ff3e1ee4a298b29bb793746423d0b908/greenlet-3.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:28fe80a3eb673b2d5cc3b12eea468a5e5f4603c26aa34d88bf61bba82ceb2f9b", size = 294647 }, ] +[[package]] +name = "grpcio" +version = "1.68.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/ec/b76ff6d86bdfd1737a5ec889394b54c18b1ec3832d91041e25023fbcb67d/grpcio-1.68.1.tar.gz", hash = "sha256:44a8502dd5de653ae6a73e2de50a401d84184f0331d0ac3daeb044e66d5c5054", size = 12694654 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/88/d1ac9676a0809e3efec154d45246474ec12a4941686da71ffb3d34190294/grpcio-1.68.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:d35740e3f45f60f3c37b1e6f2f4702c23867b9ce21c6410254c9c682237da68d", size = 5171054 }, + { url = "https://files.pythonhosted.org/packages/ec/cb/94ca41e100201fee8876a4b44d64e43ac7405929909afe1fa943d65b25ef/grpcio-1.68.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:d99abcd61760ebb34bdff37e5a3ba333c5cc09feda8c1ad42547bea0416ada78", size = 11078566 }, + { url = "https://files.pythonhosted.org/packages/d5/b0/ad4c66f2e3181b4eab99885686c960c403ae2300bacfe427526282facc07/grpcio-1.68.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:f8261fa2a5f679abeb2a0a93ad056d765cdca1c47745eda3f2d87f874ff4b8c9", size = 5690039 }, + { url = "https://files.pythonhosted.org/packages/67/1e/f5d3410674d021831c9fef2d1d7ca2357b08d09c840ad4e054ea8ffc302e/grpcio-1.68.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0feb02205a27caca128627bd1df4ee7212db051019a9afa76f4bb6a1a80ca95e", size = 6317470 }, + { url = "https://files.pythonhosted.org/packages/91/93/701d5f33b163a621c8f2d4453f9e22f6c14e996baed54118d0dea93fc8c7/grpcio-1.68.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:919d7f18f63bcad3a0f81146188e90274fde800a94e35d42ffe9eadf6a9a6330", size = 5941884 }, + { url = "https://files.pythonhosted.org/packages/67/44/06917ffaa35ca463b93dde60f324015fe4192312b0f4dd0faec061e7ca7f/grpcio-1.68.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:963cc8d7d79b12c56008aabd8b457f400952dbea8997dd185f155e2f228db079", size = 6646332 }, + { url = "https://files.pythonhosted.org/packages/d4/94/074db039532687ec8ef07ebbcc747c46547c94329016e22b97d97b9e5f3b/grpcio-1.68.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ccf2ebd2de2d6661e2520dae293298a3803a98ebfc099275f113ce1f6c2a80f1", size = 6212515 }, + { url = "https://files.pythonhosted.org/packages/c5/f2/0c939264c36c6038fae1732a2a3e01a7075ba171a2154d86842ee0ac9b0a/grpcio-1.68.1-cp310-cp310-win32.whl", hash = "sha256:2cc1fd04af8399971bcd4f43bd98c22d01029ea2e56e69c34daf2bf8470e47f5", size = 3650459 }, + { url = "https://files.pythonhosted.org/packages/b6/90/b0e9278e88f747879d13b79fb893c9acb381fb90541ad9e416c7816c5eaf/grpcio-1.68.1-cp310-cp310-win_amd64.whl", hash = "sha256:ee2e743e51cb964b4975de572aa8fb95b633f496f9fcb5e257893df3be854746", size = 4399144 }, + { url = "https://files.pythonhosted.org/packages/fe/0d/fde5a5777d65696c39bb3e622fe1239dd0a878589bf6c5066980e7d19154/grpcio-1.68.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:55857c71641064f01ff0541a1776bfe04a59db5558e82897d35a7793e525774c", size = 5180919 }, + { url = "https://files.pythonhosted.org/packages/07/fd/e5fa75b5ddf5d9f16606196973f9c2b4b1adf5a1735117eb7129fc33d2ec/grpcio-1.68.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4b177f5547f1b995826ef529d2eef89cca2f830dd8b2c99ffd5fde4da734ba73", size = 11150922 }, + { url = "https://files.pythonhosted.org/packages/86/1e/aaf5a1dae87fe47f277c5a1be72b31d2c209d095bebb0ce1d2df5cb8779c/grpcio-1.68.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:3522c77d7e6606d6665ec8d50e867f13f946a4e00c7df46768f1c85089eae515", size = 5685685 }, + { url = "https://files.pythonhosted.org/packages/a9/69/c4fdf87d5c5696207e2ed232e4bdde656d8c99ba91f361927f3f06aa41ca/grpcio-1.68.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9d1fae6bbf0816415b81db1e82fb3bf56f7857273c84dcbe68cbe046e58e1ccd", size = 6316535 }, + { url = "https://files.pythonhosted.org/packages/6f/c6/539660516ea7db7bc3d39e07154512ae807961b14ec6b5b0c58d15657ff1/grpcio-1.68.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:298ee7f80e26f9483f0b6f94cc0a046caf54400a11b644713bb5b3d8eb387600", size = 5939920 }, + { url = "https://files.pythonhosted.org/packages/38/f3/97a74dc4dd95bf195168d6da2ca4731ab7d3d0b03078f2833b4ff9c4f48f/grpcio-1.68.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cbb5780e2e740b6b4f2d208e90453591036ff80c02cc605fea1af8e6fc6b1bbe", size = 6644770 }, + { url = "https://files.pythonhosted.org/packages/cb/36/79a5e04073e58106aff442509a0c459151fa4f43202395db3eb8f77b78e9/grpcio-1.68.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ddda1aa22495d8acd9dfbafff2866438d12faec4d024ebc2e656784d96328ad0", size = 6211743 }, + { url = "https://files.pythonhosted.org/packages/73/0f/2250f4a0de1a0bec0726c47a021cbf71af6105f512ecaf67703e2eb1ad2f/grpcio-1.68.1-cp311-cp311-win32.whl", hash = "sha256:b33bd114fa5a83f03ec6b7b262ef9f5cac549d4126f1dc702078767b10c46ed9", size = 3650734 }, + { url = "https://files.pythonhosted.org/packages/4b/29/061c93a35f498238dc35eb8fb039ce168aa99cac2f0f1ce0c8a0a4bdb274/grpcio-1.68.1-cp311-cp311-win_amd64.whl", hash = "sha256:7f20ebec257af55694d8f993e162ddf0d36bd82d4e57f74b31c67b3c6d63d8b2", size = 4400816 }, + { url = "https://files.pythonhosted.org/packages/f5/15/674a1468fef234fa996989509bbdfc0d695878cbb385b9271f5d690d5cd3/grpcio-1.68.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:8829924fffb25386995a31998ccbbeaa7367223e647e0122043dfc485a87c666", size = 5148351 }, + { url = "https://files.pythonhosted.org/packages/62/f5/edce368682d6d0b3573b883b134df022a44b1c888ea416dd7d78d480ab24/grpcio-1.68.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:3aed6544e4d523cd6b3119b0916cef3d15ef2da51e088211e4d1eb91a6c7f4f1", size = 11127559 }, + { url = "https://files.pythonhosted.org/packages/ce/14/a6fde3114eafd9e4e345d1ebd0291c544d83b22f0554b1678a2968ae39e1/grpcio-1.68.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:4efac5481c696d5cb124ff1c119a78bddbfdd13fc499e3bc0ca81e95fc573684", size = 5645221 }, + { url = "https://files.pythonhosted.org/packages/21/21/d1865bd6a22f9a26217e4e1b35f9105f7a0cdfb7a5fffe8be48e1a1afafc/grpcio-1.68.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ab2d912ca39c51f46baf2a0d92aa265aa96b2443266fc50d234fa88bf877d8e", size = 6292270 }, + { url = "https://files.pythonhosted.org/packages/3a/f6/19798be6c3515a7b1fb9570198c91710472e2eb21f1900109a76834829e3/grpcio-1.68.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c87ce2a97434dffe7327a4071839ab8e8bffd0054cc74cbe971fba98aedd60", size = 5905978 }, + { url = "https://files.pythonhosted.org/packages/9b/43/c3670a657445cd55be1246f64dbc3a6a33cab0f0141c5836df2e04f794c8/grpcio-1.68.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e4842e4872ae4ae0f5497bf60a0498fa778c192cc7a9e87877abd2814aca9475", size = 6630444 }, + { url = "https://files.pythonhosted.org/packages/80/69/fbbebccffd266bea4268b685f3e8e03613405caba69e93125dc783036465/grpcio-1.68.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:255b1635b0ed81e9f91da4fcc8d43b7ea5520090b9a9ad9340d147066d1d3613", size = 6200324 }, + { url = "https://files.pythonhosted.org/packages/65/5c/27a26c21916f94f0c1585111974a5d5a41d8420dcb42c2717ee514c97a97/grpcio-1.68.1-cp312-cp312-win32.whl", hash = "sha256:7dfc914cc31c906297b30463dde0b9be48e36939575eaf2a0a22a8096e69afe5", size = 3638381 }, + { url = "https://files.pythonhosted.org/packages/a3/ba/ba6b65ccc93c7df1031c6b41e45b79a5a37e46b81d816bb3ea68ba476d77/grpcio-1.68.1-cp312-cp312-win_amd64.whl", hash = "sha256:a0c8ddabef9c8f41617f213e527254c41e8b96ea9d387c632af878d05db9229c", size = 4389959 }, +] + +[[package]] +name = "grpcio-status" +version = "1.68.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "grpcio" }, + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/db/db3911a9009f03b55e60cf13e3e29dfce423c0e501ec976794c7cbbbcd1b/grpcio_status-1.68.1.tar.gz", hash = "sha256:e1378d036c81a1610d7b4c7a146cd663dd13fcc915cf4d7d053929dba5bbb6e1", size = 13667 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/1c/59dfc81f27f252bef2cd52c57157bf381cb3738185d3087ac4c9ff3376b0/grpcio_status-1.68.1-py3-none-any.whl", hash = "sha256:66f3d8847f665acfd56221333d66f7ad8927903d87242a482996bdb45e8d28fd", size = 14427 }, +] + [[package]] name = "gsheets" version = "0.6.1" @@ -3620,6 +3708,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 }, ] +[[package]] +name = "pandas-gbq" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "db-dtypes" }, + { name = "google-api-core" }, + { name = "google-auth" }, + { name = "google-auth-oauthlib" }, + { name = "google-cloud-bigquery" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "pyarrow" }, + { name = "pydata-google-auth" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1c/45/11cc72ad49587112a95bb053a271660816c205b8e55159763991776138a1/pandas_gbq-0.25.0.tar.gz", hash = "sha256:e16362701788c2b528e0202ac744195b1e1c9153e1fe5cfa130401756077e2d7", size = 60705 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/7e/3efc07e054ebc950fe7370937b2a073aa10240baf0559357d08193359962/pandas_gbq-0.25.0-py2.py3-none-any.whl", hash = "sha256:fbb89146c959cf54ed9003db0a5775af908c41de8cf5f24739fa8959893c67e6", size = 35799 }, +] + [[package]] name = "pandas-stubs" version = "1.2.0.62" @@ -4180,6 +4290,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/93/06d44e08277b3b818b75bd5f25e879d7693e4b7dd3505fde89916fcc9ca2/pydantic_core-2.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:254ec27fdb5b1ee60684f91683be95e5133c994cc54e86a0b0963afa25c8f8a6", size = 1914966 }, ] +[[package]] +name = "pydata-google-auth" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "google-auth-oauthlib" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/92/51140850fa44d33f41a393df4767d3f7bd63169e1edc2358397f2a5b57bb/pydata-google-auth-1.9.0.tar.gz", hash = "sha256:2f546e88f007dfdb050087556eb46d6008e351386a7b368096797fae5df374f2", size = 29791 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/34/91cf24101f7c456c0d9be43b975396772481c26e4b218c78f71769ee0555/pydata_google_auth-1.9.0-py2.py3-none-any.whl", hash = "sha256:e17a44ce8de5b48883667357c03595b85d80938bf1fb714d65bfac9a9f9c8add", size = 15552 }, +] + [[package]] name = "pydeck" version = "0.9.1" @@ -4404,32 +4528,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/f3/2f32fe143cd7ba1d4d68f1b6dce9ca402d909cbd5a5830e3a8fa3d1acbbf/pyproj-3.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:7a27151ddad8e1439ba70c9b4b2b617b290c39395fa9ddb7411ebb0eb86d6fb0", size = 6079779 }, ] -[[package]] -name = "pyreadr" -version = "0.5.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pandas" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ab/9b/732136124fec08851ce846544b3d48e9f109638abd250f6c0311671db636/pyreadr-0.5.2.tar.gz", hash = "sha256:33d5747fe210d41e4a329afef2232c9d1258847094b87a2a96efffbef223fae8", size = 1175696 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f1/f5/087d0ea21bf31f43f2ca377f5f03b15e43fc406cd7e1fcfc73a9f4fc055e/pyreadr-0.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ab20da5c1b1a547316bd45f0a8984f3c7c26bb548a41c8c606da939766ae3bc9", size = 304689 }, - { url = "https://files.pythonhosted.org/packages/37/36/d394c0bd09c37e24e9a0435989ee1c5fe91470a0faf43475eb7a41493f68/pyreadr-0.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0b445a6ffe09922ade3dc003585a589163a5dbab2d006aebcd2e00c7738e17c0", size = 300820 }, - { url = "https://files.pythonhosted.org/packages/fa/3b/af53ad8bebc3aa9252ccc99d1394fa85c185daa34aac0295a286f58d6f29/pyreadr-0.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5552b17a2e32b885b789a50bfdf895663fb6fc2e0db85351b620a74ece474c6", size = 415788 }, - { url = "https://files.pythonhosted.org/packages/ca/69/0a6abc458561f7f84ee87ac2b872ea4cd355c376822e73ebc7d44ff94536/pyreadr-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35967860dace4a736108740b6127abb59c2e25c1b081f426181907666c7f9dac", size = 416973 }, - { url = "https://files.pythonhosted.org/packages/2a/00/9141eb7747cd6a04425f071adfb32226d77094766d9901b856345d832743/pyreadr-0.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:fae6a270842bd5d0d2729ab2ad6bb0125786b0b1023fd3cdc51deedc1d66cb61", size = 1251181 }, - { url = "https://files.pythonhosted.org/packages/cf/47/1f666551444156b18524632fb93da10bbcf9a9cf0cb20faebd1f85512558/pyreadr-0.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5b8fa9c92609e2b925900ba1e5f236a9d38bfd49ad37038e41ac1c7d853fddda", size = 301435 }, - { url = "https://files.pythonhosted.org/packages/9e/7a/f9b6786876cbc016019d7de7fef5ed7a5397fbb986cfa9f10fdbef52ec03/pyreadr-0.5.2-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:49b94459a1ed0003e59861f0ad088dc17f97632e451797b79cbf12fe312e6b6d", size = 305194 }, - { url = "https://files.pythonhosted.org/packages/8b/72/73b2979ee06d58844a4f7206dd96c4927a4406c15645945d69c90e00c968/pyreadr-0.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5698418c4fedcbc0b7600c967a2943a79803d39cf7f13b73999c58815f7a5ebc", size = 414793 }, - { url = "https://files.pythonhosted.org/packages/79/9f/a0e84861e0d0939b71830d30960c9215487d5278d2d89890988035f02b9c/pyreadr-0.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2af40aa8de034c24cdefd2dfdbf8c21277cf1c27cce063729604e48aa908aba", size = 416025 }, - { url = "https://files.pythonhosted.org/packages/61/c3/a5eb51a2c8d65c78da160618c6d892178aadf3b977928994efe6a27a5f8c/pyreadr-0.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:82e1a5388d8910eac75e3cd37696851c52175a0b263afaedd570e000d385781c", size = 1251639 }, - { url = "https://files.pythonhosted.org/packages/58/5f/e6c0726bb90c2bb7371d7b68b825c03aaf6bfc89d35b88844a4979de0767/pyreadr-0.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7750624a6158ba0393c0cbc7f44a169735799862089891408f9a83ece04662bf", size = 300822 }, - { url = "https://files.pythonhosted.org/packages/c0/b0/7e955064875988eb000286e405952f1b0931a366f15303e78abcb8a2256a/pyreadr-0.5.2-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:f0c8feaa26f5f45f91bfc07dbea7334c7c22cfaf3fa923e203f30a5c976eda71", size = 303775 }, - { url = "https://files.pythonhosted.org/packages/8a/74/b9c6b39170e7272ebdb44091dce6ad07181bce465becd22977e85d530b9a/pyreadr-0.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3f200d4bd7dcfa37b9d244f05b3708b7183bb7978c6865a1364f39727021fb0", size = 412713 }, - { url = "https://files.pythonhosted.org/packages/10/54/1cd3de1f08d8f86bf273de23d55a1b25e4dccaae45be116512a7b2cfff0b/pyreadr-0.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa4919beb4cfa9c915e704b41d32d8c40a4f505a7c9bfdfc4930b3b4da5d2b8", size = 413567 }, - { url = "https://files.pythonhosted.org/packages/bb/2b/e16e4acc1bacde128add8ae3596f2cd0d899cb8d304332700827fd18c5f8/pyreadr-0.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:6bae17a8970b62c2af257ec8c5aad9d759a1bdc2a763e299ff82826d7140afe4", size = 1249250 }, -] - [[package]] name = "pyright" version = "1.1.373" @@ -5195,27 +5293,26 @@ wheels = [ [[package]] name = "ruff" -version = "0.8.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/2b/01245f4f3a727d60bebeacd7ee6d22586c7f62380a2597ddb22c2f45d018/ruff-0.8.2.tar.gz", hash = "sha256:b84f4f414dda8ac7f75075c1fa0b905ac0ff25361f42e6d5da681a465e0f78e5", size = 3349020 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/91/29/366be70216dba1731a00a41f2f030822b0c96c7c4f3b2c0cdce15cbace74/ruff-0.8.2-py3-none-linux_armv6l.whl", hash = "sha256:c49ab4da37e7c457105aadfd2725e24305ff9bc908487a9bf8d548c6dad8bb3d", size = 10530649 }, - { url = "https://files.pythonhosted.org/packages/63/82/a733956540bb388f00df5a3e6a02467b16c0e529132625fe44ce4c5fb9c7/ruff-0.8.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ec016beb69ac16be416c435828be702ee694c0d722505f9c1f35e1b9c0cc1bf5", size = 10274069 }, - { url = "https://files.pythonhosted.org/packages/3d/12/0b3aa14d1d71546c988a28e1b412981c1b80c8a1072e977a2f30c595cc4a/ruff-0.8.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f05cdf8d050b30e2ba55c9b09330b51f9f97d36d4673213679b965d25a785f3c", size = 9909400 }, - { url = "https://files.pythonhosted.org/packages/23/08/f9f08cefb7921784c891c4151cce6ed357ff49e84b84978440cffbc87408/ruff-0.8.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60f578c11feb1d3d257b2fb043ddb47501ab4816e7e221fbb0077f0d5d4e7b6f", size = 10766782 }, - { url = "https://files.pythonhosted.org/packages/e4/71/bf50c321ec179aa420c8ec40adac5ae9cc408d4d37283a485b19a2331ceb/ruff-0.8.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbd5cf9b0ae8f30eebc7b360171bd50f59ab29d39f06a670b3e4501a36ba5897", size = 10286316 }, - { url = "https://files.pythonhosted.org/packages/f2/83/c82688a2a6117539aea0ce63fdf6c08e60fe0202779361223bcd7f40bd74/ruff-0.8.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b402ddee3d777683de60ff76da801fa7e5e8a71038f57ee53e903afbcefdaa58", size = 11338270 }, - { url = "https://files.pythonhosted.org/packages/7f/d7/bc6a45e5a22e627640388e703160afb1d77c572b1d0fda8b4349f334fc66/ruff-0.8.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:705832cd7d85605cb7858d8a13d75993c8f3ef1397b0831289109e953d833d29", size = 12058579 }, - { url = "https://files.pythonhosted.org/packages/da/3b/64150c93946ec851e6f1707ff586bb460ca671581380c919698d6a9267dc/ruff-0.8.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:32096b41aaf7a5cc095fa45b4167b890e4c8d3fd217603f3634c92a541de7248", size = 11615172 }, - { url = "https://files.pythonhosted.org/packages/e4/9e/cf12b697ea83cfe92ec4509ae414dc4c9b38179cc681a497031f0d0d9a8e/ruff-0.8.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e769083da9439508833cfc7c23e351e1809e67f47c50248250ce1ac52c21fb93", size = 12882398 }, - { url = "https://files.pythonhosted.org/packages/a9/27/96d10863accf76a9c97baceac30b0a52d917eb985a8ac058bd4636aeede0/ruff-0.8.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fe716592ae8a376c2673fdfc1f5c0c193a6d0411f90a496863c99cd9e2ae25d", size = 11176094 }, - { url = "https://files.pythonhosted.org/packages/eb/10/cd2fd77d4a4e7f03c29351be0f53278a393186b540b99df68beb5304fddd/ruff-0.8.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:81c148825277e737493242b44c5388a300584d73d5774defa9245aaef55448b0", size = 10771884 }, - { url = "https://files.pythonhosted.org/packages/71/5d/beabb2ff18870fc4add05fa3a69a4cb1b1d2d6f83f3cf3ae5ab0d52f455d/ruff-0.8.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d261d7850c8367704874847d95febc698a950bf061c9475d4a8b7689adc4f7fa", size = 10382535 }, - { url = "https://files.pythonhosted.org/packages/ae/29/6b3fdf3ad3e35b28d87c25a9ff4c8222ad72485ab783936b2b267250d7a7/ruff-0.8.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1ca4e3a87496dc07d2427b7dd7ffa88a1e597c28dad65ae6433ecb9f2e4f022f", size = 10886995 }, - { url = "https://files.pythonhosted.org/packages/e9/dc/859d889b4d9356a1a2cdbc1e4a0dda94052bc5b5300098647e51a58c430b/ruff-0.8.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:729850feed82ef2440aa27946ab39c18cb4a8889c1128a6d589ffa028ddcfc22", size = 11220750 }, - { url = "https://files.pythonhosted.org/packages/0b/08/e8f519f61f1d624264bfd6b8829e4c5f31c3c61193bc3cff1f19dbe7626a/ruff-0.8.2-py3-none-win32.whl", hash = "sha256:ac42caaa0411d6a7d9594363294416e0e48fc1279e1b0e948391695db2b3d5b1", size = 8729396 }, - { url = "https://files.pythonhosted.org/packages/f8/d4/ba1c7ab72aba37a2b71fe48ab95b80546dbad7a7f35ea28cf66fc5cea5f6/ruff-0.8.2-py3-none-win_amd64.whl", hash = "sha256:2aae99ec70abf43372612a838d97bfe77d45146254568d94926e8ed5bbb409ea", size = 9594729 }, - { url = "https://files.pythonhosted.org/packages/23/34/db20e12d3db11b8a2a8874258f0f6d96a9a4d631659d54575840557164c8/ruff-0.8.2-py3-none-win_arm64.whl", hash = "sha256:fb88e2a506b70cfbc2de6fae6681c4f944f7dd5f2fe87233a7233d888bad73e8", size = 9035131 }, +version = "0.1.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/25/4c/2f786388acd82c295eedc4afeede7ef4b29cf27277151d8d13be906bac70/ruff-0.1.6.tar.gz", hash = "sha256:1b09f29b16c6ead5ea6b097ef2764b42372aebe363722f1605ecbcd2b9207184", size = 1719627 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/75/5054ec93ec0d5db26e218cb2814ddaa085ba1f29fad0ec56dd8107a97688/ruff-0.1.6-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:88b8cdf6abf98130991cbc9f6438f35f6e8d41a02622cc5ee130a02a0ed28703", size = 11628468 }, + { url = "https://files.pythonhosted.org/packages/a2/91/8b2920f6026c069ae0802fc3c44f7337e04bf2a198ce94bfab360073477a/ruff-0.1.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5c549ed437680b6105a1299d2cd30e4964211606eeb48a0ff7a93ef70b902248", size = 5990066 }, + { url = "https://files.pythonhosted.org/packages/df/1e/03ef0cc5c7d03e50d4f954218551d6001f1f70e6f391cdb678efb5c6e6ab/ruff-0.1.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cf5f701062e294f2167e66d11b092bba7af6a057668ed618a9253e1e90cfd76", size = 5929313 }, + { url = "https://files.pythonhosted.org/packages/92/7c/38fd1b9cb624f5725a6a08c81bf7e823c64b28622ffcb4369c56dc0a16d0/ruff-0.1.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:05991ee20d4ac4bb78385360c684e4b417edd971030ab12a4fbd075ff535050e", size = 5627943 }, + { url = "https://files.pythonhosted.org/packages/c7/c3/98e3d0eb92e5a2ec10f76c71067640b6f21def23c3b1ff8f08ab6348255e/ruff-0.1.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87455a0c1f739b3c069e2f4c43b66479a54dea0276dd5d4d67b091265f6fd1dc", size = 6074206 }, + { url = "https://files.pythonhosted.org/packages/e8/33/62fb966eb70d9bb45ddf5023d40e26946a5e5127d99956b84c8a9a76b153/ruff-0.1.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:683aa5bdda5a48cb8266fcde8eea2a6af4e5700a392c56ea5fb5f0d4bfdc0240", size = 6743205 }, + { url = "https://files.pythonhosted.org/packages/c7/f1/60d43182f98113156a1b21a17f30541dda9f5ffcfeedc2b54dc030a2c413/ruff-0.1.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:137852105586dcbf80c1717facb6781555c4e99f520c9c827bd414fac67ddfb6", size = 6600581 }, + { url = "https://files.pythonhosted.org/packages/09/92/36850598e84f75cfe8edd252dbf40442b4cc226ed2c76206a9b3cbfb9986/ruff-0.1.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd98138a98d48a1c36c394fd6b84cd943ac92a08278aa8ac8c0fdefcf7138f35", size = 7708098 }, + { url = "https://files.pythonhosted.org/packages/3b/2f/8ef67614631622aa3ea79b27e01ac86d7f90a988520454e3a84cb2fd890f/ruff-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a0cd909d25f227ac5c36d4e7e681577275fb74ba3b11d288aff7ec47e3ae745", size = 6295145 }, + { url = "https://files.pythonhosted.org/packages/3c/4b/af366db98d15efe83fd3e3aae7319d3897e3475fc53a2f1b0287c8255422/ruff-0.1.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8fd1c62a47aa88a02707b5dd20c5ff20d035d634aa74826b42a1da77861b5ff", size = 5878111 }, + { url = "https://files.pythonhosted.org/packages/bf/af/25b794e750f1d74a83ce6b16625e3306beeb2161c517b9d883958de05526/ruff-0.1.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd89b45d374935829134a082617954120d7a1470a9f0ec0e7f3ead983edc48cc", size = 5629339 }, + { url = "https://files.pythonhosted.org/packages/81/b0/92c4cb6bceb19ebd27cedd1f45b337f7fd5397e6b760094831266be59661/ruff-0.1.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:491262006e92f825b145cd1e52948073c56560243b55fb3b4ecb142f6f0e9543", size = 5959772 }, + { url = "https://files.pythonhosted.org/packages/11/02/3a7e3101d88b113f326e0fdf3f566fba2600fc4b1fd828d56027d293e22d/ruff-0.1.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ea284789861b8b5ca9d5443591a92a397ac183d4351882ab52f6296b4fdd5462", size = 6356759 }, + { url = "https://files.pythonhosted.org/packages/fc/93/8a4b3523c4564168aab720d8361b4bf12173b099509caaac93168b72927a/ruff-0.1.6-py3-none-win32.whl", hash = "sha256:1610e14750826dfc207ccbcdd7331b6bd285607d4181df9c1c6ae26646d6848a", size = 5835632 }, + { url = "https://files.pythonhosted.org/packages/40/e3/55ae013087bd892dd61c25895a119ffec2f9b9f7c3d1fd0cea5f5f7bd74a/ruff-0.1.6-py3-none-win_amd64.whl", hash = "sha256:4558b3e178145491e9bc3b2ee3c4b42f19d19384eaa5c59d10acf6e8f8b57e33", size = 6226075 }, + { url = "https://files.pythonhosted.org/packages/21/f7/fdce733e594e1b274fc9232256be6a4e03e8d2cb9e354c783801191e2f4d/ruff-0.1.6-py3-none-win_arm64.whl", hash = "sha256:03910e81df0d8db0e30050725a5802441c2022ea3ae4fe0609b76081731accbc", size = 5965823 }, ] [[package]]