Skip to content

Refresh data

Refresh data #317

Workflow file for this run

name: Refresh data
on:
push:
branches:
- main
workflow_dispatch:
schedule:
- cron: "21 11 * * *"
jobs:
scrape_fresh_data:
runs-on: macos-latest
steps:
- name: Checkout
uses: actions/[email protected]
- name: Setup Python
uses: actions/[email protected]
with:
python-version: "3.12.x"
- name: Install Python dependencies
run: |
pip install uv
uv pip install -r requirements.txt --system
- name: Fetch latest data
run: |
cp data/Street_Tree_List.csv Street_Tree_List-old.csv
curl -o Street_Tree_List-unsorted.csv "https://data.sfgov.org/api/views/tkzw-k3nq/rows.csv?accessType=DOWNLOAD"
# Remove heading line and use it to start a new file
head -n 1 Street_Tree_List-unsorted.csv > data/Street_Tree_List.csv
# Sort all but the first line and append to that file
tail -n +2 "Street_Tree_List-unsorted.csv" | sort >> data/Street_Tree_List.csv
# Generate commit message using csv-diff
csv-diff Street_Tree_List-old.csv data/Street_Tree_List.csv --key=TreeID --singular=tree --plural=trees > message.txt
- name: Commit and push if it changed
run: |-
git config user.name "Automated"
git config user.email "[email protected]"
git add data/Street_Tree_List.csv
timestamp=$(date -u)
git commit -F message.txt || exit 0
git push
build_to_motherduck:
uses: ./.github/workflows/build.yml
needs: scrape_fresh_data
secrets:
MOTHERDUCK_TOKEN: ${{ secrets.MOTHERDUCK_TOKEN }}