diff --git a/apps/wizard/app_pages/producer_analytics.py b/apps/wizard/app_pages/producer_analytics.py
new file mode 100644
index 00000000000..3be62be9165
--- /dev/null
+++ b/apps/wizard/app_pages/producer_analytics.py
@@ -0,0 +1,564 @@
+from datetime import datetime, timedelta
+from typing import Optional, cast
+
+import owid.catalog.processing as pr
+import pandas as pd
+import plotly.express as px
+import streamlit as st
+from pandas_gbq import read_gbq
+from st_aggrid import AgGrid, GridUpdateMode, JsCode
+from st_aggrid.grid_options_builder import GridOptionsBuilder
+from structlog import get_logger
+
+from apps.wizard.utils.components import st_horizontal
+from etl.snapshot import Snapshot
+from etl.version_tracker import VersionTracker
+
+# Initialize log.
+log = get_logger()
+
+# Define constants.
+TODAY = datetime.today()
+# Date when the new views metric started to be recorded.
+MIN_DATE = datetime.strptime("2024-11-01", "%Y-%m-%d")
+GRAPHERS_BASE_URL = "https://ourworldindata.org/grapher/"
+# List of auxiliary steps to be (optionally) excluded from the DAG.
+# It may be convenient to ignore these steps because the analytics are heavily affected by a few producers (e.g. those that are involved in the population and income groups datasets).
+AUXILIARY_STEPS = [
+ "data://garden/demography/.*/population",
+ # Primary energy consumption is loaded by GCB.
+ "data://garden/energy/.*/primary_energy_consumption",
+ "data://garden/ggdc/.*/maddison_project_database",
+ "data://garden/wb/.*/income_groups",
+]
+
+# PAGE CONFIG
+st.set_page_config(
+ page_title="Wizard: Producer analytics",
+ layout="wide",
+ page_icon="🪄",
+)
+
+
+########################################################################################################################
+# FUNCTIONS & GLOBAL VARS
+########################################################################################################################
+def columns_producer(min_date, max_date):
+ # Define columns to be shown.
+ cols_prod = {
+ "producer": {
+ "headerName": "Producer",
+ "headerTooltip": "Name of the producer. This is NOT the name of the dataset.",
+ },
+ "n_charts": {
+ "headerName": "Charts",
+ "headerTooltip": "Number of charts using data from a producer.",
+ },
+ "renders_custom": {
+ "headerName": "Views in custom range",
+ "headerTooltip": f"Number of renders between {min_date} and {max_date}.",
+ },
+ "renders_365d": {
+ "headerName": "Views 365 days",
+ "headerTooltip": "Number of renders in the last 365 days.",
+ },
+ "renders_30d": {
+ "headerName": "Views 30 days",
+ "headerTooltip": "Number of renders in the last 30 days.",
+ },
+ }
+ return cols_prod
+
+
+@st.cache_data(show_spinner=False)
+def get_grapher_views(
+ date_start: str = MIN_DATE.strftime("%Y-%m-%d"),
+ date_end: str = TODAY.strftime("%Y-%m-%d"),
+ groupby: Optional[list[str]] = None,
+ grapher_urls: Optional[list[str]] = None,
+) -> pd.DataFrame:
+ grapher_filter = ""
+ if grapher_urls:
+ # If a list of grapher URLs is given, consider only those.
+ grapher_urls_formatted = ", ".join(f"'{url}'" for url in grapher_urls)
+ grapher_filter = f"AND grapher IN ({grapher_urls_formatted})"
+ else:
+ # If no list is given, consider all grapher URLs.
+ grapher_filter = f"AND grapher LIKE '{GRAPHERS_BASE_URL}%'"
+
+ if not groupby:
+ # If a groupby list is not given, assume the simplest case, which gives total views for each grapher.
+ groupby = ["grapher"]
+
+ # Prepare the query.
+ groupby_clause = ", ".join(groupby)
+ select_clause = f"{groupby_clause}, SUM(events) AS renders"
+ query = f"""
+ SELECT
+ {select_clause}
+ FROM prod_google_analytics4.grapher_views_by_day_page_grapher_device_country_iframe
+ WHERE
+ day >= '{date_start}'
+ AND day <= '{date_end}'
+ {grapher_filter}
+ GROUP BY {groupby_clause}
+ ORDER BY {groupby_clause}
+ """
+
+ # Execute the query.
+ df_views = read_gbq(query, project_id="owid-analytics")
+
+ return cast(pd.DataFrame, df_views)
+
+
+@st.cache_data(show_spinner=False)
+def get_chart_renders(min_date: str, max_date: str) -> pd.DataFrame:
+ # List ranges of dates to fetch views.
+ date_ranges = {
+ "renders_365d": ((TODAY - timedelta(days=365)).strftime("%Y-%m-%d"), TODAY.strftime("%Y-%m-%d")),
+ "renders_30d": ((TODAY - timedelta(days=30)).strftime("%Y-%m-%d"), TODAY.strftime("%Y-%m-%d")),
+ "renders_custom": (min_date, max_date), # Use user-defined date range.
+ }
+
+ # Get analytics for those ranges, for all grapher URLs.
+ list_renders = [
+ get_grapher_views(date_start=date_start, date_end=date_end, grapher_urls=None, groupby=["grapher"]).rename(
+ columns={"renders": column_name}
+ )
+ for column_name, (date_start, date_end) in date_ranges.items()
+ ]
+
+ # Merge all dataframes.
+ df_renders = pr.multi_merge(list_renders, on="grapher", how="outer") # type: ignore
+
+ return df_renders
+
+
+@st.cache_data(show_spinner=False)
+def load_steps_df(excluded_steps) -> pd.DataFrame:
+ # Load steps dataframe.
+ steps_df = VersionTracker(exclude_steps=excluded_steps).steps_df
+
+ return steps_df
+
+
+@st.cache_data(show_spinner=False)
+def load_steps_df_with_producer_data(excluded_steps) -> pd.DataFrame:
+ # Load steps dataframe.
+ # st.toast("⌛ Loading data from VersionTracker...")
+ steps_df = load_steps_df(excluded_steps=excluded_steps)
+
+ # st.toast("⌛ Processing VersionTracker data...")
+ # Select only active snapshots.
+ df = steps_df[(steps_df["channel"] == "snapshot") & (steps_df["state"] == "active")].reset_index(drop=True)
+
+ # Select only relevant columns.
+ df = df[["step", "all_chart_slugs"]]
+
+ # Add a column of producer to steps df (where possible).
+ for i, row in df.iterrows():
+ snap_uri = row["step"].split("snapshot://" if "snapshot://" in row["step"] else "snapshot-private://")[1]
+ snap = Snapshot(snap_uri)
+ origin = snap.metadata.origin
+ if (origin is not None) and (snap.metadata.namespace not in ["dummy"]):
+ producer = snap.metadata.origin.producer # type: ignore
+ df.loc[i, "producer"] = producer
+
+ # Select only relevant columns.
+ df = df[["producer", "all_chart_slugs"]]
+
+ # Remove rows with no producer.
+ df = df.dropna(subset=["producer"]).reset_index(drop=True)
+
+ # Ignore the chart id, and keep only the slug.
+ df["all_chart_slugs"] = [sorted(set([slug for _, slug in id_slug])) for id_slug in df["all_chart_slugs"]]
+
+ # Create a row for each producer-slug pair. Fill with "" (in cases where the producer has no charts).
+ df_expanded = df.explode("all_chart_slugs")
+
+ # Remove duplicates.
+ # NOTE: This happens because df contains one row per snapshot. Some grapher datasets come from a combination of multiple snapshots (often from the same producer). We want to count producer-chart pairs only once.
+ df_expanded = df_expanded.drop_duplicates(subset=["producer", "all_chart_slugs"]).reset_index(drop=True)
+
+ # Add a column for grapher URL.
+ df_expanded["grapher"] = GRAPHERS_BASE_URL + df_expanded["all_chart_slugs"]
+
+ return df_expanded
+
+
+@st.cache_data(show_spinner=False)
+def get_producer_charts_analytics(min_date, max_date, excluded_steps):
+ # Get chart renders using user-defined date range for "renders_custom".
+ # st.toast("⌛ Getting analytics on chart renders...")
+ df_renders = get_chart_renders(min_date=min_date, max_date=max_date)
+
+ # Load the steps dataframe with producer data.
+ df_expanded = load_steps_df_with_producer_data(excluded_steps=excluded_steps)
+
+ # Add columns with the numbers of chart renders.
+ df_expanded = df_expanded.merge(df_renders, on="grapher", how="left").drop(columns=["all_chart_slugs"])
+
+ return df_expanded
+
+
+@st.cache_data(show_spinner=False)
+def get_producer_analytics_per_chart(min_date, max_date, excluded_steps):
+ # Load the steps dataframe with producer data and analytics.
+ df_expanded = get_producer_charts_analytics(min_date=min_date, max_date=max_date, excluded_steps=excluded_steps)
+
+ # Create an expanded table with number of views per chart.
+ df_renders_per_chart = df_expanded.dropna(subset=["grapher"]).fillna(0).reset_index(drop=True)
+ df_renders_per_chart = df_renders_per_chart.sort_values("renders_custom", ascending=False).reset_index(drop=True)
+
+ return df_renders_per_chart
+
+
+@st.cache_data(show_spinner=False)
+def get_producer_analytics_per_producer(min_date, max_date, excluded_steps):
+ # Load the steps dataframe with producer data and analytics.
+ df_expanded = get_producer_charts_analytics(min_date=min_date, max_date=max_date, excluded_steps=excluded_steps)
+
+ # st.toast("⌛ Adapting the data for presentation...")
+ # Group by producer and get the full list of chart slugs for each producer.
+ df_grouped = df_expanded.groupby("producer", observed=True, as_index=False).agg(
+ {
+ "grapher": lambda x: [item for item in x if pd.notna(item)], # Filter out NaN values
+ "renders_365d": "sum",
+ "renders_30d": "sum",
+ "renders_custom": "sum",
+ }
+ )
+ df_grouped["n_charts"] = df_grouped["grapher"].apply(len)
+
+ # Check if lists are unique. If not, make them unique in the previous line.
+ error = "Duplicated chart slugs found for a given producer."
+ assert df_grouped["grapher"].apply(lambda x: len(x) == len(set(x))).all(), error
+
+ # Drop unnecessary columns.
+ df_grouped = df_grouped.drop(columns=["grapher"])
+
+ # Sort conveniently.
+ df_grouped = df_grouped.sort_values(["renders_custom"], ascending=False).reset_index(drop=True)
+
+ return df_grouped
+
+
+def show_producers_grid(df_producers, min_date, max_date):
+ """Show table with producers analytics."""
+ gb = GridOptionsBuilder.from_dataframe(df_producers)
+ gb.configure_grid_options(domLayout="autoHeight", enableCellTextSelection=True)
+ gb.configure_selection(
+ selection_mode="multiple",
+ use_checkbox=True,
+ rowMultiSelectWithClick=True,
+ suppressRowDeselection=False,
+ groupSelectsChildren=True,
+ groupSelectsFiltered=True,
+ )
+ gb.configure_default_column(editable=False, groupable=True, sortable=True, filterable=True, resizable=True)
+
+ # Enable column auto-sizing for the grid.
+ gb.configure_grid_options(suppressSizeToFit=False) # Allows dynamic resizing to fit.
+ gb.configure_default_column(autoSizeColumns=True) # Ensures all columns can auto-size.
+
+ # Configure individual columns with specific settings.
+ COLUMNS_PRODUCERS = columns_producer(min_date, max_date)
+ for column in COLUMNS_PRODUCERS:
+ gb.configure_column(column, **COLUMNS_PRODUCERS[column])
+ # Configure pagination with dynamic page size.
+ gb.configure_pagination(paginationAutoPageSize=False, paginationPageSize=20)
+ # Build the grid options.
+ grid_options = gb.build()
+ # Custom CSS to ensure the table stretches across the page.
+ custom_css = {
+ ".ag-theme-streamlit": {
+ "max-width": "100% !important",
+ "width": "100% !important",
+ "margin": "0 auto !important", # Centers the grid horizontally.
+ },
+ }
+ # Display the grid table with the updated grid options.
+ grid_response = AgGrid(
+ data=df_producers,
+ gridOptions=grid_options,
+ height=1000,
+ width="100%",
+ update_mode=GridUpdateMode.MODEL_CHANGED,
+ fit_columns_on_grid_load=True, # Automatically adjust columns when the grid loads.
+ allow_unsafe_jscode=True,
+ theme="streamlit",
+ custom_css=custom_css,
+ # excel_export_mode=ExcelExportMode.MANUAL, # Doesn't work?
+ )
+
+ # Get the selected producers from the first table.
+ producers_selected = [row["producer"] for row in grid_response["selected_rows"]]
+
+ return producers_selected
+
+
+def plot_chart_analytics(df):
+ """Show chart with analytics on producer's charts."""
+ # Get total daily views of selected producers.
+ grapher_urls_selected = df["grapher"].unique().tolist() # type: ignore
+ df_total_daily_views = get_grapher_views(
+ date_start=min_date, date_end=max_date, groupby=["day"], grapher_urls=grapher_urls_selected
+ )
+
+ # Get daily views of the top 10 charts.
+ grapher_urls_top_10 = (
+ df.sort_values("renders_custom", ascending=False)["grapher"].unique().tolist()[0:10] # type: ignore
+ )
+ df_top_10_daily_views = get_grapher_views(
+ date_start=min_date, date_end=max_date, groupby=["day", "grapher"], grapher_urls=grapher_urls_top_10
+ )
+
+ # Get total number of views and average daily views.
+ total_views = df_total_daily_views["renders"].sum()
+ average_daily_views = df_total_daily_views["renders"].mean()
+ # Get total views of the top 10 charts in the selected date range.
+ df_top_10_total_views = df_top_10_daily_views.groupby("grapher", as_index=False).agg({"renders": "sum"})
+
+ # Create a line chart.
+ df_plot = pd.concat([df_total_daily_views.assign(**{"grapher": "Total"}), df_top_10_daily_views]).rename(
+ columns={"grapher": "Chart slug"}
+ )
+ df_plot["Chart slug"] = df_plot["Chart slug"].apply(lambda x: x.split("/")[-1])
+ df_plot["day"] = pd.to_datetime(df_plot["day"]).dt.strftime("%a. %Y-%m-%d")
+ fig = px.line(
+ df_plot,
+ x="day",
+ y="renders",
+ color="Chart slug",
+ title="Total daily views and views of top 10 charts",
+ ).update_layout(xaxis_title=None, yaxis_title=None)
+
+ # Display the chart.
+ st.plotly_chart(fig, use_container_width=True)
+
+ return total_views, average_daily_views, df_top_10_total_views
+
+
+def show_producer_charts_grid(df):
+ """Show table with analytics on producer's charts."""
+ # Configure and display the second table.
+ gb2 = GridOptionsBuilder.from_dataframe(df)
+ gb2.configure_grid_options(domLayout="autoHeight", enableCellTextSelection=True)
+ gb2.configure_default_column(editable=False, groupable=True, sortable=True, filterable=True, resizable=True)
+
+ # Create a JavaScript renderer for clickable slugs.
+ grapher_slug_jscode = JsCode(
+ r"""
+ class UrlCellRenderer {
+ init(params) {
+ this.eGui = document.createElement('a');
+ if (params.value) {
+ // Extract the slug from the full URL.
+ const url = new URL(params.value);
+ const slug = url.pathname.split('/').pop(); // Get the last part of the path as the slug.
+ this.eGui.innerText = slug;
+ this.eGui.setAttribute('href', params.value);
+ } else {
+ this.eGui.innerText = '';
+ }
+ this.eGui.setAttribute('style', "text-decoration:none; color:blue");
+ this.eGui.setAttribute('target', "_blank");
+ }
+ getGui() {
+ return this.eGui;
+ }
+ }
+ """
+ )
+
+ # Define columns to be shown, including the cell renderer for "grapher".
+ COLUMNS_PRODUCERS = columns_producer(min_date, max_date)
+ COLUMNS_PRODUCER_CHARTS = {
+ column: (
+ {
+ "headerName": "Chart URL",
+ "headerTooltip": "URL of the chart in the grapher.",
+ "cellRenderer": grapher_slug_jscode,
+ }
+ if column == "grapher"
+ else COLUMNS_PRODUCERS[column]
+ )
+ for column in ["producer", "renders_custom", "renders_365d", "renders_30d", "grapher"]
+ }
+ # Configure and display the second table.
+ gb2 = GridOptionsBuilder.from_dataframe(df)
+ gb2.configure_grid_options(domLayout="autoHeight", enableCellTextSelection=True)
+ gb2.configure_default_column(editable=False, groupable=True, sortable=True, filterable=True, resizable=True)
+
+ # Apply column configurations directly from the dictionary.
+ for column, config in COLUMNS_PRODUCER_CHARTS.items():
+ gb2.configure_column(column, **config)
+
+ # Configure pagination with dynamic page size.
+ gb2.configure_pagination(paginationAutoPageSize=False, paginationPageSize=20)
+ grid_options2 = gb2.build()
+
+ # Display the grid.
+ AgGrid(
+ data=df,
+ gridOptions=grid_options2,
+ height=500,
+ width="100%",
+ fit_columns_on_grid_load=True,
+ allow_unsafe_jscode=True,
+ theme="streamlit",
+ # excel_export_mode=ExcelExportMode.MANUAL, # Doesn't work?
+ )
+
+
+def prepare_summary(
+ df_top_10_total_views, producers_selected, total_views, average_daily_views, min_date, max_date
+) -> str:
+ """Prepare summary at the end of the app."""
+ # Prepare the total number of views.
+ total_views_str = f"{total_views:9,}"
+ # Prepare the average daily views.
+ average_views_str = f"{round(average_daily_views):9,}"
+ # Prepare a summary of the top 10 charts to be copy-pasted.
+ if len(producers_selected) == 0:
+ producers_selected_str = "all producers"
+ elif len(producers_selected) == 1:
+ producers_selected_str = producers_selected[0]
+ else:
+ producers_selected_str = ", ".join(producers_selected[:-1]) + " and " + producers_selected[-1]
+ # NOTE: I tried .to_string() and .to_markdown() and couldn't find a way to keep a meaningful format.
+ df_summary_str = ""
+ for _, row in df_top_10_total_views.sort_values("renders", ascending=False).iterrows():
+ df_summary_str += f"{row['renders']:9,}" + " - " + row["grapher"] + "\n"
+
+ # Define the content to copy.
+ summary = f"""\
+Analytics of charts using data by {producers_selected_str} between {min_date} and {max_date}:
+- Total number of chart views: {total_views_str}
+- Average daily chart views: {average_views_str}
+- Views of top performing charts:
+{df_summary_str}
+
+ """
+ return summary
+
+
+########################################################################################################################
+# RENDER
+########################################################################################################################
+
+# Streamlit app layout.
+st.title(":material/bar_chart: Producer analytics")
+st.markdown("Explore analytics of data producers.")
+
+# SEARCH BOX
+with st.container(border=True):
+ st.markdown(
+ f"Select a custom date range (note that this metric started to be recorded on {MIN_DATE.strftime('%Y-%m-%d')})."
+ )
+
+ with st_horizontal(vertical_alignment="center"):
+ # Create input fields for minimum and maximum dates.
+ min_date = st.date_input(
+ "Select minimum date",
+ value=MIN_DATE,
+ key="min_date",
+ format="YYYY-MM-DD",
+ ).strftime( # type: ignore
+ "%Y-%m-%d"
+ )
+ max_date = st.date_input(
+ "Select maximum date",
+ value=TODAY,
+ key="max_date",
+ format="YYYY-MM-DD",
+ ).strftime( # type: ignore
+ "%Y-%m-%d"
+ )
+ exclude_auxiliary_steps = st.checkbox(
+ "Exclude auxiliary steps (e.g. population)",
+ False,
+ help="Exclude steps that are commonly used as auxiliary data, so they do not skew the analytics in favor of a few producers. But note that this will exclude all uses of these steps, even when they are the main datasets (not auxiliary). Auxiliary steps are:\n- "
+ + "\n- ".join(sorted(f"`{s}`" for s in AUXILIARY_STEPS)),
+ )
+
+if exclude_auxiliary_steps:
+ # If the user wants to exclude auxiliary steps, take the default list of excluded steps.
+ excluded_steps = AUXILIARY_STEPS
+else:
+ # Otherwise, do not exclude any steps.
+ excluded_steps = []
+
+########################################################################################################################
+# 1/ PRODUCER ANALYTICS: Display main table, with analytics per producer.
+# Allow the user to select a subset of producers.
+########################################################################################################################
+st.header("Analytics by producer")
+st.markdown(
+ "Total number of charts and chart views for each producer. Producers selected in this table will be used to filter the producer-charts table below."
+)
+
+# Load table content and select only columns to be shown.
+with st.spinner("Loading producer data. We are accessing various databases. This can take few seconds..."):
+ df_producers = get_producer_analytics_per_producer(
+ min_date=min_date, max_date=max_date, excluded_steps=excluded_steps
+ )
+
+# Prepare and display the grid table with producer analytics.
+producers_selected = show_producers_grid(
+ df_producers=df_producers,
+ min_date=min_date,
+ max_date=max_date,
+)
+
+########################################################################################################################
+# 2/ CHART ANALYTICS: Display a chart with the total number of daily views, and the daily views of the top performing charts.
+########################################################################################################################
+st.header("Analytics by chart")
+st.markdown("Number of views for each chart that uses data by the selected producers.")
+
+# Load detailed analytics per producer-chart.
+with st.spinner("Loading chart data. This can take few seconds..."):
+ df_producer_charts = get_producer_analytics_per_chart(
+ min_date=min_date, max_date=max_date, excluded_steps=excluded_steps
+ )
+
+# Get the selected producers from the first table.
+if len(producers_selected) == 0:
+ # If no producers are selected, show all producer-charts.
+ df_producer_charts_filtered = df_producer_charts
+else:
+ # Filter producer-charts by selected producers.
+ df_producer_charts_filtered = df_producer_charts[df_producer_charts["producer"].isin(producers_selected)]
+
+# Show chart with chart analytics, and get some summary data.
+total_views, average_daily_views, df_top_10_total_views = plot_chart_analytics(df_producer_charts_filtered)
+
+# Show table
+show_producer_charts_grid(df_producer_charts_filtered)
+
+########################################################################################################################
+# 3/ SUMMARY: Display a summary to be shared with the data producer.
+########################################################################################################################
+
+# Prepare the summary to be copy-pasted.
+summary = prepare_summary(
+ df_top_10_total_views=df_top_10_total_views,
+ producers_selected=producers_selected,
+ total_views=total_views,
+ average_daily_views=average_daily_views,
+ min_date=min_date,
+ max_date=max_date,
+)
+
+# Display the content.
+st.markdown(
+ """## Summary for data producers
+
+For now, to share analytics with a data producer you can so any of the following:
+- **Table export**: Right-click on a cell in the above's table and export as a CSV or Excel file.
+- **Chart export**: Click on the camera icon on the top right of the chart to download the chart as a PNG.
+- **Copy summary**: Click on the upper right corner of the box below to copy the summary to the clipboard.
+"""
+)
+st.code(summary, language="text")
diff --git a/apps/wizard/config/config.yml b/apps/wizard/config/config.yml
index f00f6090fc0..ca80284a32b 100644
--- a/apps/wizard/config/config.yml
+++ b/apps/wizard/config/config.yml
@@ -151,6 +151,13 @@ sections:
entrypoint: app_pages/dataset_explorer.py
icon: ":material/search:"
image_url: "https://upload.wikimedia.org/wikipedia/commons/c/c3/NGC_4414_%28NASA-med%29.jpg"
+ - title: "Producer analytics"
+ alias: producer-analytics
+ description: "Extract analytics to share with data producers"
+ maintainer: "@pablo"
+ entrypoint: app_pages/producer_analytics.py
+ icon: ":material/developer_board:"
+ image_url: "https://upload.wikimedia.org/wikipedia/commons/thumb/c/ce/Wikimedia_Product_Analytics_team_logo.svg/512px-Wikimedia_Product_Analytics_team_logo.svg.png"
- title: "Explorers"
description: |-
diff --git a/apps/wizard/utils/components.py b/apps/wizard/utils/components.py
index c2934548512..c82b4d84cc4 100644
--- a/apps/wizard/utils/components.py
+++ b/apps/wizard/utils/components.py
@@ -13,42 +13,43 @@
HORIZONTAL_STYLE = """
"""
@contextmanager
-def st_horizontal():
- st.markdown(HORIZONTAL_STYLE, unsafe_allow_html=True)
+def st_horizontal(vertical_alignment="baseline"):
+ h_style = HORIZONTAL_STYLE.format(vertical_alignment=vertical_alignment)
+ st.markdown(h_style, unsafe_allow_html=True)
with st.container():
st.markdown('', unsafe_allow_html=True)
yield
diff --git a/etl/version_tracker.py b/etl/version_tracker.py
index f75f9f8bbc0..20a78df7c75 100644
--- a/etl/version_tracker.py
+++ b/etl/version_tracker.py
@@ -1,3 +1,4 @@
+import re
from datetime import datetime
from enum import Enum
from pathlib import Path
@@ -259,6 +260,31 @@ def _recursive_get_all_step_dependencies_ndim(
return dependencies, memo
+def remove_steps_from_dag(dag: dict, exclude: list[str]) -> dict:
+ """
+ Remove specific steps (either active steps or dependencies) from the DAG.
+
+ This can be useful to ignore auxiliary datasets. The excluded steps can have wildcards:
+ exclude = [
+ "data://garden/demography/.*/population",
+ "data://garden/wb/.*/income_groups",
+ ]
+ """
+
+ # Check if a step matches any exclude pattern.
+ def is_excluded(step: str) -> bool:
+ return any(pattern.match(step) for pattern in [re.compile(pattern) for pattern in exclude])
+
+ # Filter out steps and dependencies that match any element from the excluded list.
+ dag_filtered = {
+ step: {dep for dep in dependencies if not is_excluded(dep)}
+ for step, dependencies in dag.items()
+ if not is_excluded(step)
+ }
+
+ return dag_filtered
+
+
class VersionTracker:
"""Helper object that loads the dag, provides useful functions to check for versions and dataset dependencies, and
checks for inconsistencies.
@@ -299,6 +325,7 @@ def __init__(
warn_on_archivable: bool = True,
warn_on_unused: bool = True,
ignore_archive: bool = False,
+ exclude_steps: Optional[list[str]] = None,
):
# Load dag of active steps (a dictionary step: set of dependencies).
self.dag_active = load_dag(paths.DAG_FILE)
@@ -308,6 +335,13 @@ def __init__(
else:
# Load dag of active and archive steps.
self.dag_all = load_dag(paths.DAG_ARCHIVE_FILE)
+
+ # Optionally exclude certain steps and dependencies.
+ self.exclude_steps = exclude_steps
+ if self.exclude_steps:
+ self.dag_active = remove_steps_from_dag(self.dag_active, self.exclude_steps)
+ self.dag_all = remove_steps_from_dag(self.dag_all, self.exclude_steps)
+
# Create a reverse dag (a dictionary where each item is step: set of usages).
self.dag_all_reverse = reverse_graph(graph=self.dag_all)
# Create a reverse dag (a dictionary where each item is step: set of usages) of active steps.
@@ -621,14 +655,14 @@ def _days_since_step_creation(version):
] = UpdateState.ARCHIVABLE.value
# There are special steps that, even though they are archivable or unused, we want to keep in the active dag.
- steps_active_df.loc[steps_active_df["step"].isin(self.ARCHIVABLE_STEPS_TO_KEEP), "update_state"] = (
- UpdateState.UP_TO_DATE.value
- )
+ steps_active_df.loc[
+ steps_active_df["step"].isin(self.ARCHIVABLE_STEPS_TO_KEEP), "update_state"
+ ] = UpdateState.UP_TO_DATE.value
# All explorers and external steps should be considered up to date.
- steps_active_df.loc[steps_active_df["channel"].isin(["explorers", "external"]), "update_state"] = (
- UpdateState.UP_TO_DATE.value
- )
+ steps_active_df.loc[
+ steps_active_df["channel"].isin(["explorers", "external"]), "update_state"
+ ] = UpdateState.UP_TO_DATE.value
# Add update state to archived steps.
steps_inactive_df["update_state"] = UpdateState.ARCHIVED.value
diff --git a/pyproject.toml b/pyproject.toml
index 3e524a1b5ca..ca6c8ecde91 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,7 +66,6 @@ dependencies = [
"scikit-learn>=1.5.2",
"geopy>=2.4.1",
"py7zr>=0.22.0",
- "pyreadr>=0.5.2",
]
[tool.uv.sources]
@@ -107,7 +106,7 @@ dev-dependencies = [
# unpinning those would introduce tons of type errors
"pyright==1.1.373",
"pandas-stubs==1.2.0.62",
- "ruff>=0.8.2",
+ "ruff==0.1.6",
"ipdb>=0.13.13",
]
@@ -135,6 +134,7 @@ wizard = [
"torch<2.3.0",
"sentence-transformers>=2.2.2",
"moviepy>=2.1.1",
+ "pandas-gbq>=0.25.0",
]
[project.scripts]
@@ -147,17 +147,16 @@ compare = 'etl.compare:cli'
backport = 'apps.backport.backport:backport_cli'
[tool.ruff]
-lint.extend-select = [
+extend-select = [
# isort
"I"
]
-lint.ignore = ["E501"]
+ignore = ["E501"]
line-length = 120
target-version = "py310"
extend-exclude = [
".ipynb_checkpoints",
"*cookiecutter",
- "*.ipynb",
]
[build-system]
diff --git a/uv.lock b/uv.lock
index ced25ca0fd1..2f4ecd7dbac 100644
--- a/uv.lock
+++ b/uv.lock
@@ -796,6 +796,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 },
]
+[[package]]
+name = "db-dtypes"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "numpy" },
+ { name = "packaging" },
+ { name = "pandas" },
+ { name = "pyarrow" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/80/00d501391fc5450c37b5fe0c25596e629a9fa2868fe4f194d69c5622210e/db_dtypes-1.3.1.tar.gz", hash = "sha256:a058f05dab100891f3e76a7a3db9ad0f107f18dd3d1bdd13680749a2f07eae77", size = 32492 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a2/e0/2eeacbe14b5a3a3b4ad5fd2c1e7959bd2201e4daeba851579fa78e5eccb1/db_dtypes-1.3.1-py2.py3-none-any.whl", hash = "sha256:fbc9d1740d94aaf2b5ae24601cfc875a69b4635bb9d049e3c3036e9f10203af8", size = 18742 },
+]
+
[[package]]
name = "debugpy"
version = "1.8.5"
@@ -938,7 +953,6 @@ dependencies = [
{ name = "pyhumps" },
{ name = "pymysql" },
{ name = "pypdf2" },
- { name = "pyreadr" },
{ name = "python-docx" },
{ name = "python-dotenv" },
{ name = "pyyaml" },
@@ -979,6 +993,7 @@ api = [
wizard = [
{ name = "geographiclib" },
{ name = "moviepy" },
+ { name = "pandas-gbq" },
{ name = "plotly" },
{ name = "pyproj" },
{ name = "sentence-transformers" },
@@ -1053,6 +1068,7 @@ requires-dist = [
{ name = "owid-datautils", editable = "lib/datautils" },
{ name = "owid-repack", editable = "lib/repack" },
{ name = "pandas", specifier = "==2.2.3" },
+ { name = "pandas-gbq", marker = "extra == 'wizard'", specifier = ">=0.25.0" },
{ name = "papermill", specifier = ">=2.3.3" },
{ name = "pdfplumber", specifier = ">=0.9.0" },
{ name = "plotly", marker = "extra == 'wizard'", specifier = ">=5.23.0" },
@@ -1063,7 +1079,6 @@ requires-dist = [
{ name = "pymysql", specifier = ">=1.1.1" },
{ name = "pypdf2", specifier = ">=2.11.1" },
{ name = "pyproj", marker = "extra == 'wizard'", specifier = ">=3.6.1" },
- { name = "pyreadr", specifier = ">=0.5.2" },
{ name = "python-docx", specifier = ">=1.1.2" },
{ name = "python-dotenv", specifier = ">=0.19.0" },
{ name = "pyyaml", specifier = ">=6.0.1" },
@@ -1132,7 +1147,7 @@ dev = [
{ name = "pyright", specifier = "==1.1.373" },
{ name = "pytest", specifier = ">=8.3.2" },
{ name = "pyyaml", specifier = ">=6.0.2" },
- { name = "ruff", specifier = ">=0.8.2" },
+ { name = "ruff", specifier = "==0.1.6" },
{ name = "sqlacodegen", git = "https://github.com/agronholm/sqlacodegen.git" },
{ name = "types-pyyaml", specifier = ">=6.0.12.20240808" },
{ name = "watchdog", specifier = ">=4.0.1" },
@@ -1496,6 +1511,12 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/79/53/2e340a6ed897fa2bdd6c1bf166b98c047fbb648463dfd2b209ca7d501984/google_api_core-2.19.2-py3-none-any.whl", hash = "sha256:53ec0258f2837dd53bbd3d3df50f5359281b3cc13f800c941dd15a9b5a415af4", size = 139427 },
]
+[package.optional-dependencies]
+grpc = [
+ { name = "grpcio" },
+ { name = "grpcio-status" },
+]
+
[[package]]
name = "google-api-python-client"
version = "2.145.0"
@@ -1552,6 +1573,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1a/8e/22a28dfbd218033e4eeaf3a0533b2b54852b6530da0c0fe934f0cc494b29/google_auth_oauthlib-1.2.1-py2.py3-none-any.whl", hash = "sha256:2d58a27262d55aa1b87678c3ba7142a080098cbc2024f903c62355deb235d91f", size = 24930 },
]
+[[package]]
+name = "google-cloud-bigquery"
+version = "3.27.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "google-api-core", extra = ["grpc"] },
+ { name = "google-auth" },
+ { name = "google-cloud-core" },
+ { name = "google-resumable-media" },
+ { name = "packaging" },
+ { name = "python-dateutil" },
+ { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/05/633ce6686b1fed2cd364fa4698bfa6d586263cd4795d012584f8097061e1/google_cloud_bigquery-3.27.0.tar.gz", hash = "sha256:379c524054d7b090fa56d0c22662cc6e6458a6229b6754c0e7177e3a73421d2c", size = 456964 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f5/40/4b11a4a8839de8ce802a3ccd60b34e70ce10d13d434a560534ba98f0ea3f/google_cloud_bigquery-3.27.0-py2.py3-none-any.whl", hash = "sha256:b53b0431e5ba362976a4cd8acce72194b4116cdf8115030c7b339b884603fcc3", size = 240100 },
+]
+
[[package]]
name = "google-cloud-core"
version = "2.4.1"
@@ -1667,6 +1706,55 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0d/20/89674b7d62a19138b3352f6080f2ff3e1ee4a298b29bb793746423d0b908/greenlet-3.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:28fe80a3eb673b2d5cc3b12eea468a5e5f4603c26aa34d88bf61bba82ceb2f9b", size = 294647 },
]
+[[package]]
+name = "grpcio"
+version = "1.68.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/ec/b76ff6d86bdfd1737a5ec889394b54c18b1ec3832d91041e25023fbcb67d/grpcio-1.68.1.tar.gz", hash = "sha256:44a8502dd5de653ae6a73e2de50a401d84184f0331d0ac3daeb044e66d5c5054", size = 12694654 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f5/88/d1ac9676a0809e3efec154d45246474ec12a4941686da71ffb3d34190294/grpcio-1.68.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:d35740e3f45f60f3c37b1e6f2f4702c23867b9ce21c6410254c9c682237da68d", size = 5171054 },
+ { url = "https://files.pythonhosted.org/packages/ec/cb/94ca41e100201fee8876a4b44d64e43ac7405929909afe1fa943d65b25ef/grpcio-1.68.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:d99abcd61760ebb34bdff37e5a3ba333c5cc09feda8c1ad42547bea0416ada78", size = 11078566 },
+ { url = "https://files.pythonhosted.org/packages/d5/b0/ad4c66f2e3181b4eab99885686c960c403ae2300bacfe427526282facc07/grpcio-1.68.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:f8261fa2a5f679abeb2a0a93ad056d765cdca1c47745eda3f2d87f874ff4b8c9", size = 5690039 },
+ { url = "https://files.pythonhosted.org/packages/67/1e/f5d3410674d021831c9fef2d1d7ca2357b08d09c840ad4e054ea8ffc302e/grpcio-1.68.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0feb02205a27caca128627bd1df4ee7212db051019a9afa76f4bb6a1a80ca95e", size = 6317470 },
+ { url = "https://files.pythonhosted.org/packages/91/93/701d5f33b163a621c8f2d4453f9e22f6c14e996baed54118d0dea93fc8c7/grpcio-1.68.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:919d7f18f63bcad3a0f81146188e90274fde800a94e35d42ffe9eadf6a9a6330", size = 5941884 },
+ { url = "https://files.pythonhosted.org/packages/67/44/06917ffaa35ca463b93dde60f324015fe4192312b0f4dd0faec061e7ca7f/grpcio-1.68.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:963cc8d7d79b12c56008aabd8b457f400952dbea8997dd185f155e2f228db079", size = 6646332 },
+ { url = "https://files.pythonhosted.org/packages/d4/94/074db039532687ec8ef07ebbcc747c46547c94329016e22b97d97b9e5f3b/grpcio-1.68.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ccf2ebd2de2d6661e2520dae293298a3803a98ebfc099275f113ce1f6c2a80f1", size = 6212515 },
+ { url = "https://files.pythonhosted.org/packages/c5/f2/0c939264c36c6038fae1732a2a3e01a7075ba171a2154d86842ee0ac9b0a/grpcio-1.68.1-cp310-cp310-win32.whl", hash = "sha256:2cc1fd04af8399971bcd4f43bd98c22d01029ea2e56e69c34daf2bf8470e47f5", size = 3650459 },
+ { url = "https://files.pythonhosted.org/packages/b6/90/b0e9278e88f747879d13b79fb893c9acb381fb90541ad9e416c7816c5eaf/grpcio-1.68.1-cp310-cp310-win_amd64.whl", hash = "sha256:ee2e743e51cb964b4975de572aa8fb95b633f496f9fcb5e257893df3be854746", size = 4399144 },
+ { url = "https://files.pythonhosted.org/packages/fe/0d/fde5a5777d65696c39bb3e622fe1239dd0a878589bf6c5066980e7d19154/grpcio-1.68.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:55857c71641064f01ff0541a1776bfe04a59db5558e82897d35a7793e525774c", size = 5180919 },
+ { url = "https://files.pythonhosted.org/packages/07/fd/e5fa75b5ddf5d9f16606196973f9c2b4b1adf5a1735117eb7129fc33d2ec/grpcio-1.68.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4b177f5547f1b995826ef529d2eef89cca2f830dd8b2c99ffd5fde4da734ba73", size = 11150922 },
+ { url = "https://files.pythonhosted.org/packages/86/1e/aaf5a1dae87fe47f277c5a1be72b31d2c209d095bebb0ce1d2df5cb8779c/grpcio-1.68.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:3522c77d7e6606d6665ec8d50e867f13f946a4e00c7df46768f1c85089eae515", size = 5685685 },
+ { url = "https://files.pythonhosted.org/packages/a9/69/c4fdf87d5c5696207e2ed232e4bdde656d8c99ba91f361927f3f06aa41ca/grpcio-1.68.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9d1fae6bbf0816415b81db1e82fb3bf56f7857273c84dcbe68cbe046e58e1ccd", size = 6316535 },
+ { url = "https://files.pythonhosted.org/packages/6f/c6/539660516ea7db7bc3d39e07154512ae807961b14ec6b5b0c58d15657ff1/grpcio-1.68.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:298ee7f80e26f9483f0b6f94cc0a046caf54400a11b644713bb5b3d8eb387600", size = 5939920 },
+ { url = "https://files.pythonhosted.org/packages/38/f3/97a74dc4dd95bf195168d6da2ca4731ab7d3d0b03078f2833b4ff9c4f48f/grpcio-1.68.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cbb5780e2e740b6b4f2d208e90453591036ff80c02cc605fea1af8e6fc6b1bbe", size = 6644770 },
+ { url = "https://files.pythonhosted.org/packages/cb/36/79a5e04073e58106aff442509a0c459151fa4f43202395db3eb8f77b78e9/grpcio-1.68.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ddda1aa22495d8acd9dfbafff2866438d12faec4d024ebc2e656784d96328ad0", size = 6211743 },
+ { url = "https://files.pythonhosted.org/packages/73/0f/2250f4a0de1a0bec0726c47a021cbf71af6105f512ecaf67703e2eb1ad2f/grpcio-1.68.1-cp311-cp311-win32.whl", hash = "sha256:b33bd114fa5a83f03ec6b7b262ef9f5cac549d4126f1dc702078767b10c46ed9", size = 3650734 },
+ { url = "https://files.pythonhosted.org/packages/4b/29/061c93a35f498238dc35eb8fb039ce168aa99cac2f0f1ce0c8a0a4bdb274/grpcio-1.68.1-cp311-cp311-win_amd64.whl", hash = "sha256:7f20ebec257af55694d8f993e162ddf0d36bd82d4e57f74b31c67b3c6d63d8b2", size = 4400816 },
+ { url = "https://files.pythonhosted.org/packages/f5/15/674a1468fef234fa996989509bbdfc0d695878cbb385b9271f5d690d5cd3/grpcio-1.68.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:8829924fffb25386995a31998ccbbeaa7367223e647e0122043dfc485a87c666", size = 5148351 },
+ { url = "https://files.pythonhosted.org/packages/62/f5/edce368682d6d0b3573b883b134df022a44b1c888ea416dd7d78d480ab24/grpcio-1.68.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:3aed6544e4d523cd6b3119b0916cef3d15ef2da51e088211e4d1eb91a6c7f4f1", size = 11127559 },
+ { url = "https://files.pythonhosted.org/packages/ce/14/a6fde3114eafd9e4e345d1ebd0291c544d83b22f0554b1678a2968ae39e1/grpcio-1.68.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:4efac5481c696d5cb124ff1c119a78bddbfdd13fc499e3bc0ca81e95fc573684", size = 5645221 },
+ { url = "https://files.pythonhosted.org/packages/21/21/d1865bd6a22f9a26217e4e1b35f9105f7a0cdfb7a5fffe8be48e1a1afafc/grpcio-1.68.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ab2d912ca39c51f46baf2a0d92aa265aa96b2443266fc50d234fa88bf877d8e", size = 6292270 },
+ { url = "https://files.pythonhosted.org/packages/3a/f6/19798be6c3515a7b1fb9570198c91710472e2eb21f1900109a76834829e3/grpcio-1.68.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c87ce2a97434dffe7327a4071839ab8e8bffd0054cc74cbe971fba98aedd60", size = 5905978 },
+ { url = "https://files.pythonhosted.org/packages/9b/43/c3670a657445cd55be1246f64dbc3a6a33cab0f0141c5836df2e04f794c8/grpcio-1.68.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e4842e4872ae4ae0f5497bf60a0498fa778c192cc7a9e87877abd2814aca9475", size = 6630444 },
+ { url = "https://files.pythonhosted.org/packages/80/69/fbbebccffd266bea4268b685f3e8e03613405caba69e93125dc783036465/grpcio-1.68.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:255b1635b0ed81e9f91da4fcc8d43b7ea5520090b9a9ad9340d147066d1d3613", size = 6200324 },
+ { url = "https://files.pythonhosted.org/packages/65/5c/27a26c21916f94f0c1585111974a5d5a41d8420dcb42c2717ee514c97a97/grpcio-1.68.1-cp312-cp312-win32.whl", hash = "sha256:7dfc914cc31c906297b30463dde0b9be48e36939575eaf2a0a22a8096e69afe5", size = 3638381 },
+ { url = "https://files.pythonhosted.org/packages/a3/ba/ba6b65ccc93c7df1031c6b41e45b79a5a37e46b81d816bb3ea68ba476d77/grpcio-1.68.1-cp312-cp312-win_amd64.whl", hash = "sha256:a0c8ddabef9c8f41617f213e527254c41e8b96ea9d387c632af878d05db9229c", size = 4389959 },
+]
+
+[[package]]
+name = "grpcio-status"
+version = "1.68.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "googleapis-common-protos" },
+ { name = "grpcio" },
+ { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/db/db3911a9009f03b55e60cf13e3e29dfce423c0e501ec976794c7cbbbcd1b/grpcio_status-1.68.1.tar.gz", hash = "sha256:e1378d036c81a1610d7b4c7a146cd663dd13fcc915cf4d7d053929dba5bbb6e1", size = 13667 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/86/1c/59dfc81f27f252bef2cd52c57157bf381cb3738185d3087ac4c9ff3376b0/grpcio_status-1.68.1-py3-none-any.whl", hash = "sha256:66f3d8847f665acfd56221333d66f7ad8927903d87242a482996bdb45e8d28fd", size = 14427 },
+]
+
[[package]]
name = "gsheets"
version = "0.6.1"
@@ -3620,6 +3708,28 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 },
]
+[[package]]
+name = "pandas-gbq"
+version = "0.25.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "db-dtypes" },
+ { name = "google-api-core" },
+ { name = "google-auth" },
+ { name = "google-auth-oauthlib" },
+ { name = "google-cloud-bigquery" },
+ { name = "numpy" },
+ { name = "packaging" },
+ { name = "pandas" },
+ { name = "pyarrow" },
+ { name = "pydata-google-auth" },
+ { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1c/45/11cc72ad49587112a95bb053a271660816c205b8e55159763991776138a1/pandas_gbq-0.25.0.tar.gz", hash = "sha256:e16362701788c2b528e0202ac744195b1e1c9153e1fe5cfa130401756077e2d7", size = 60705 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f9/7e/3efc07e054ebc950fe7370937b2a073aa10240baf0559357d08193359962/pandas_gbq-0.25.0-py2.py3-none-any.whl", hash = "sha256:fbb89146c959cf54ed9003db0a5775af908c41de8cf5f24739fa8959893c67e6", size = 35799 },
+]
+
[[package]]
name = "pandas-stubs"
version = "1.2.0.62"
@@ -4180,6 +4290,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/af/93/06d44e08277b3b818b75bd5f25e879d7693e4b7dd3505fde89916fcc9ca2/pydantic_core-2.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:254ec27fdb5b1ee60684f91683be95e5133c994cc54e86a0b0963afa25c8f8a6", size = 1914966 },
]
+[[package]]
+name = "pydata-google-auth"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "google-auth" },
+ { name = "google-auth-oauthlib" },
+ { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/92/51140850fa44d33f41a393df4767d3f7bd63169e1edc2358397f2a5b57bb/pydata-google-auth-1.9.0.tar.gz", hash = "sha256:2f546e88f007dfdb050087556eb46d6008e351386a7b368096797fae5df374f2", size = 29791 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/07/34/91cf24101f7c456c0d9be43b975396772481c26e4b218c78f71769ee0555/pydata_google_auth-1.9.0-py2.py3-none-any.whl", hash = "sha256:e17a44ce8de5b48883667357c03595b85d80938bf1fb714d65bfac9a9f9c8add", size = 15552 },
+]
+
[[package]]
name = "pydeck"
version = "0.9.1"
@@ -4404,32 +4528,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/f3/2f32fe143cd7ba1d4d68f1b6dce9ca402d909cbd5a5830e3a8fa3d1acbbf/pyproj-3.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:7a27151ddad8e1439ba70c9b4b2b617b290c39395fa9ddb7411ebb0eb86d6fb0", size = 6079779 },
]
-[[package]]
-name = "pyreadr"
-version = "0.5.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
- { name = "pandas" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/ab/9b/732136124fec08851ce846544b3d48e9f109638abd250f6c0311671db636/pyreadr-0.5.2.tar.gz", hash = "sha256:33d5747fe210d41e4a329afef2232c9d1258847094b87a2a96efffbef223fae8", size = 1175696 }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/f1/f5/087d0ea21bf31f43f2ca377f5f03b15e43fc406cd7e1fcfc73a9f4fc055e/pyreadr-0.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ab20da5c1b1a547316bd45f0a8984f3c7c26bb548a41c8c606da939766ae3bc9", size = 304689 },
- { url = "https://files.pythonhosted.org/packages/37/36/d394c0bd09c37e24e9a0435989ee1c5fe91470a0faf43475eb7a41493f68/pyreadr-0.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0b445a6ffe09922ade3dc003585a589163a5dbab2d006aebcd2e00c7738e17c0", size = 300820 },
- { url = "https://files.pythonhosted.org/packages/fa/3b/af53ad8bebc3aa9252ccc99d1394fa85c185daa34aac0295a286f58d6f29/pyreadr-0.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5552b17a2e32b885b789a50bfdf895663fb6fc2e0db85351b620a74ece474c6", size = 415788 },
- { url = "https://files.pythonhosted.org/packages/ca/69/0a6abc458561f7f84ee87ac2b872ea4cd355c376822e73ebc7d44ff94536/pyreadr-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35967860dace4a736108740b6127abb59c2e25c1b081f426181907666c7f9dac", size = 416973 },
- { url = "https://files.pythonhosted.org/packages/2a/00/9141eb7747cd6a04425f071adfb32226d77094766d9901b856345d832743/pyreadr-0.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:fae6a270842bd5d0d2729ab2ad6bb0125786b0b1023fd3cdc51deedc1d66cb61", size = 1251181 },
- { url = "https://files.pythonhosted.org/packages/cf/47/1f666551444156b18524632fb93da10bbcf9a9cf0cb20faebd1f85512558/pyreadr-0.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5b8fa9c92609e2b925900ba1e5f236a9d38bfd49ad37038e41ac1c7d853fddda", size = 301435 },
- { url = "https://files.pythonhosted.org/packages/9e/7a/f9b6786876cbc016019d7de7fef5ed7a5397fbb986cfa9f10fdbef52ec03/pyreadr-0.5.2-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:49b94459a1ed0003e59861f0ad088dc17f97632e451797b79cbf12fe312e6b6d", size = 305194 },
- { url = "https://files.pythonhosted.org/packages/8b/72/73b2979ee06d58844a4f7206dd96c4927a4406c15645945d69c90e00c968/pyreadr-0.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5698418c4fedcbc0b7600c967a2943a79803d39cf7f13b73999c58815f7a5ebc", size = 414793 },
- { url = "https://files.pythonhosted.org/packages/79/9f/a0e84861e0d0939b71830d30960c9215487d5278d2d89890988035f02b9c/pyreadr-0.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2af40aa8de034c24cdefd2dfdbf8c21277cf1c27cce063729604e48aa908aba", size = 416025 },
- { url = "https://files.pythonhosted.org/packages/61/c3/a5eb51a2c8d65c78da160618c6d892178aadf3b977928994efe6a27a5f8c/pyreadr-0.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:82e1a5388d8910eac75e3cd37696851c52175a0b263afaedd570e000d385781c", size = 1251639 },
- { url = "https://files.pythonhosted.org/packages/58/5f/e6c0726bb90c2bb7371d7b68b825c03aaf6bfc89d35b88844a4979de0767/pyreadr-0.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7750624a6158ba0393c0cbc7f44a169735799862089891408f9a83ece04662bf", size = 300822 },
- { url = "https://files.pythonhosted.org/packages/c0/b0/7e955064875988eb000286e405952f1b0931a366f15303e78abcb8a2256a/pyreadr-0.5.2-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:f0c8feaa26f5f45f91bfc07dbea7334c7c22cfaf3fa923e203f30a5c976eda71", size = 303775 },
- { url = "https://files.pythonhosted.org/packages/8a/74/b9c6b39170e7272ebdb44091dce6ad07181bce465becd22977e85d530b9a/pyreadr-0.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3f200d4bd7dcfa37b9d244f05b3708b7183bb7978c6865a1364f39727021fb0", size = 412713 },
- { url = "https://files.pythonhosted.org/packages/10/54/1cd3de1f08d8f86bf273de23d55a1b25e4dccaae45be116512a7b2cfff0b/pyreadr-0.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa4919beb4cfa9c915e704b41d32d8c40a4f505a7c9bfdfc4930b3b4da5d2b8", size = 413567 },
- { url = "https://files.pythonhosted.org/packages/bb/2b/e16e4acc1bacde128add8ae3596f2cd0d899cb8d304332700827fd18c5f8/pyreadr-0.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:6bae17a8970b62c2af257ec8c5aad9d759a1bdc2a763e299ff82826d7140afe4", size = 1249250 },
-]
-
[[package]]
name = "pyright"
version = "1.1.373"
@@ -5195,27 +5293,26 @@ wheels = [
[[package]]
name = "ruff"
-version = "0.8.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5e/2b/01245f4f3a727d60bebeacd7ee6d22586c7f62380a2597ddb22c2f45d018/ruff-0.8.2.tar.gz", hash = "sha256:b84f4f414dda8ac7f75075c1fa0b905ac0ff25361f42e6d5da681a465e0f78e5", size = 3349020 }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/91/29/366be70216dba1731a00a41f2f030822b0c96c7c4f3b2c0cdce15cbace74/ruff-0.8.2-py3-none-linux_armv6l.whl", hash = "sha256:c49ab4da37e7c457105aadfd2725e24305ff9bc908487a9bf8d548c6dad8bb3d", size = 10530649 },
- { url = "https://files.pythonhosted.org/packages/63/82/a733956540bb388f00df5a3e6a02467b16c0e529132625fe44ce4c5fb9c7/ruff-0.8.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ec016beb69ac16be416c435828be702ee694c0d722505f9c1f35e1b9c0cc1bf5", size = 10274069 },
- { url = "https://files.pythonhosted.org/packages/3d/12/0b3aa14d1d71546c988a28e1b412981c1b80c8a1072e977a2f30c595cc4a/ruff-0.8.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f05cdf8d050b30e2ba55c9b09330b51f9f97d36d4673213679b965d25a785f3c", size = 9909400 },
- { url = "https://files.pythonhosted.org/packages/23/08/f9f08cefb7921784c891c4151cce6ed357ff49e84b84978440cffbc87408/ruff-0.8.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60f578c11feb1d3d257b2fb043ddb47501ab4816e7e221fbb0077f0d5d4e7b6f", size = 10766782 },
- { url = "https://files.pythonhosted.org/packages/e4/71/bf50c321ec179aa420c8ec40adac5ae9cc408d4d37283a485b19a2331ceb/ruff-0.8.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbd5cf9b0ae8f30eebc7b360171bd50f59ab29d39f06a670b3e4501a36ba5897", size = 10286316 },
- { url = "https://files.pythonhosted.org/packages/f2/83/c82688a2a6117539aea0ce63fdf6c08e60fe0202779361223bcd7f40bd74/ruff-0.8.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b402ddee3d777683de60ff76da801fa7e5e8a71038f57ee53e903afbcefdaa58", size = 11338270 },
- { url = "https://files.pythonhosted.org/packages/7f/d7/bc6a45e5a22e627640388e703160afb1d77c572b1d0fda8b4349f334fc66/ruff-0.8.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:705832cd7d85605cb7858d8a13d75993c8f3ef1397b0831289109e953d833d29", size = 12058579 },
- { url = "https://files.pythonhosted.org/packages/da/3b/64150c93946ec851e6f1707ff586bb460ca671581380c919698d6a9267dc/ruff-0.8.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:32096b41aaf7a5cc095fa45b4167b890e4c8d3fd217603f3634c92a541de7248", size = 11615172 },
- { url = "https://files.pythonhosted.org/packages/e4/9e/cf12b697ea83cfe92ec4509ae414dc4c9b38179cc681a497031f0d0d9a8e/ruff-0.8.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e769083da9439508833cfc7c23e351e1809e67f47c50248250ce1ac52c21fb93", size = 12882398 },
- { url = "https://files.pythonhosted.org/packages/a9/27/96d10863accf76a9c97baceac30b0a52d917eb985a8ac058bd4636aeede0/ruff-0.8.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fe716592ae8a376c2673fdfc1f5c0c193a6d0411f90a496863c99cd9e2ae25d", size = 11176094 },
- { url = "https://files.pythonhosted.org/packages/eb/10/cd2fd77d4a4e7f03c29351be0f53278a393186b540b99df68beb5304fddd/ruff-0.8.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:81c148825277e737493242b44c5388a300584d73d5774defa9245aaef55448b0", size = 10771884 },
- { url = "https://files.pythonhosted.org/packages/71/5d/beabb2ff18870fc4add05fa3a69a4cb1b1d2d6f83f3cf3ae5ab0d52f455d/ruff-0.8.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d261d7850c8367704874847d95febc698a950bf061c9475d4a8b7689adc4f7fa", size = 10382535 },
- { url = "https://files.pythonhosted.org/packages/ae/29/6b3fdf3ad3e35b28d87c25a9ff4c8222ad72485ab783936b2b267250d7a7/ruff-0.8.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1ca4e3a87496dc07d2427b7dd7ffa88a1e597c28dad65ae6433ecb9f2e4f022f", size = 10886995 },
- { url = "https://files.pythonhosted.org/packages/e9/dc/859d889b4d9356a1a2cdbc1e4a0dda94052bc5b5300098647e51a58c430b/ruff-0.8.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:729850feed82ef2440aa27946ab39c18cb4a8889c1128a6d589ffa028ddcfc22", size = 11220750 },
- { url = "https://files.pythonhosted.org/packages/0b/08/e8f519f61f1d624264bfd6b8829e4c5f31c3c61193bc3cff1f19dbe7626a/ruff-0.8.2-py3-none-win32.whl", hash = "sha256:ac42caaa0411d6a7d9594363294416e0e48fc1279e1b0e948391695db2b3d5b1", size = 8729396 },
- { url = "https://files.pythonhosted.org/packages/f8/d4/ba1c7ab72aba37a2b71fe48ab95b80546dbad7a7f35ea28cf66fc5cea5f6/ruff-0.8.2-py3-none-win_amd64.whl", hash = "sha256:2aae99ec70abf43372612a838d97bfe77d45146254568d94926e8ed5bbb409ea", size = 9594729 },
- { url = "https://files.pythonhosted.org/packages/23/34/db20e12d3db11b8a2a8874258f0f6d96a9a4d631659d54575840557164c8/ruff-0.8.2-py3-none-win_arm64.whl", hash = "sha256:fb88e2a506b70cfbc2de6fae6681c4f944f7dd5f2fe87233a7233d888bad73e8", size = 9035131 },
+version = "0.1.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/25/4c/2f786388acd82c295eedc4afeede7ef4b29cf27277151d8d13be906bac70/ruff-0.1.6.tar.gz", hash = "sha256:1b09f29b16c6ead5ea6b097ef2764b42372aebe363722f1605ecbcd2b9207184", size = 1719627 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b6/75/5054ec93ec0d5db26e218cb2814ddaa085ba1f29fad0ec56dd8107a97688/ruff-0.1.6-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:88b8cdf6abf98130991cbc9f6438f35f6e8d41a02622cc5ee130a02a0ed28703", size = 11628468 },
+ { url = "https://files.pythonhosted.org/packages/a2/91/8b2920f6026c069ae0802fc3c44f7337e04bf2a198ce94bfab360073477a/ruff-0.1.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5c549ed437680b6105a1299d2cd30e4964211606eeb48a0ff7a93ef70b902248", size = 5990066 },
+ { url = "https://files.pythonhosted.org/packages/df/1e/03ef0cc5c7d03e50d4f954218551d6001f1f70e6f391cdb678efb5c6e6ab/ruff-0.1.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cf5f701062e294f2167e66d11b092bba7af6a057668ed618a9253e1e90cfd76", size = 5929313 },
+ { url = "https://files.pythonhosted.org/packages/92/7c/38fd1b9cb624f5725a6a08c81bf7e823c64b28622ffcb4369c56dc0a16d0/ruff-0.1.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:05991ee20d4ac4bb78385360c684e4b417edd971030ab12a4fbd075ff535050e", size = 5627943 },
+ { url = "https://files.pythonhosted.org/packages/c7/c3/98e3d0eb92e5a2ec10f76c71067640b6f21def23c3b1ff8f08ab6348255e/ruff-0.1.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87455a0c1f739b3c069e2f4c43b66479a54dea0276dd5d4d67b091265f6fd1dc", size = 6074206 },
+ { url = "https://files.pythonhosted.org/packages/e8/33/62fb966eb70d9bb45ddf5023d40e26946a5e5127d99956b84c8a9a76b153/ruff-0.1.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:683aa5bdda5a48cb8266fcde8eea2a6af4e5700a392c56ea5fb5f0d4bfdc0240", size = 6743205 },
+ { url = "https://files.pythonhosted.org/packages/c7/f1/60d43182f98113156a1b21a17f30541dda9f5ffcfeedc2b54dc030a2c413/ruff-0.1.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:137852105586dcbf80c1717facb6781555c4e99f520c9c827bd414fac67ddfb6", size = 6600581 },
+ { url = "https://files.pythonhosted.org/packages/09/92/36850598e84f75cfe8edd252dbf40442b4cc226ed2c76206a9b3cbfb9986/ruff-0.1.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd98138a98d48a1c36c394fd6b84cd943ac92a08278aa8ac8c0fdefcf7138f35", size = 7708098 },
+ { url = "https://files.pythonhosted.org/packages/3b/2f/8ef67614631622aa3ea79b27e01ac86d7f90a988520454e3a84cb2fd890f/ruff-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a0cd909d25f227ac5c36d4e7e681577275fb74ba3b11d288aff7ec47e3ae745", size = 6295145 },
+ { url = "https://files.pythonhosted.org/packages/3c/4b/af366db98d15efe83fd3e3aae7319d3897e3475fc53a2f1b0287c8255422/ruff-0.1.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8fd1c62a47aa88a02707b5dd20c5ff20d035d634aa74826b42a1da77861b5ff", size = 5878111 },
+ { url = "https://files.pythonhosted.org/packages/bf/af/25b794e750f1d74a83ce6b16625e3306beeb2161c517b9d883958de05526/ruff-0.1.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd89b45d374935829134a082617954120d7a1470a9f0ec0e7f3ead983edc48cc", size = 5629339 },
+ { url = "https://files.pythonhosted.org/packages/81/b0/92c4cb6bceb19ebd27cedd1f45b337f7fd5397e6b760094831266be59661/ruff-0.1.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:491262006e92f825b145cd1e52948073c56560243b55fb3b4ecb142f6f0e9543", size = 5959772 },
+ { url = "https://files.pythonhosted.org/packages/11/02/3a7e3101d88b113f326e0fdf3f566fba2600fc4b1fd828d56027d293e22d/ruff-0.1.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ea284789861b8b5ca9d5443591a92a397ac183d4351882ab52f6296b4fdd5462", size = 6356759 },
+ { url = "https://files.pythonhosted.org/packages/fc/93/8a4b3523c4564168aab720d8361b4bf12173b099509caaac93168b72927a/ruff-0.1.6-py3-none-win32.whl", hash = "sha256:1610e14750826dfc207ccbcdd7331b6bd285607d4181df9c1c6ae26646d6848a", size = 5835632 },
+ { url = "https://files.pythonhosted.org/packages/40/e3/55ae013087bd892dd61c25895a119ffec2f9b9f7c3d1fd0cea5f5f7bd74a/ruff-0.1.6-py3-none-win_amd64.whl", hash = "sha256:4558b3e178145491e9bc3b2ee3c4b42f19d19384eaa5c59d10acf6e8f8b57e33", size = 6226075 },
+ { url = "https://files.pythonhosted.org/packages/21/f7/fdce733e594e1b274fc9232256be6a4e03e8d2cb9e354c783801191e2f4d/ruff-0.1.6-py3-none-win_arm64.whl", hash = "sha256:03910e81df0d8db0e30050725a5802441c2022ea3ae4fe0609b76081731accbc", size = 5965823 },
]
[[package]]