Skip to content

Scrape Data

Scrape Data #50

Workflow file for this run

name: Scrape Data
on:
schedule:
- cron: "0 1 * * *" # 6:30am IST
workflow_dispatch:
jobs:
fetch-data:
name: Scrape data from GitHub and Slack
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4 # This pulls the caller workflow's repo (eg: coronasafe/leaderboard-data)
with:
token: ${{ secrets.GIT_ACCESS_TOKEN }}
- uses: actions/checkout@v4
with:
repository: coronasafe/leaderboard
path: ./leaderboard
sparse-checkout: |
scraper
scripts
- name: setup python
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Install dependencies
run: pip install -r scraper/requirements.txt
- name: Scrape GitHub data
uses: nick-fields/retry@v2
with:
timeout_minutes: 10
max_attempts: 3
command: python scraper/src/github.py ${{ github.repository_owner }} data/github -l DEBUG
on_retry_command: git checkout .
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Scrape EOD updates from Slack Channel
if: ${{ env.SLACK_API_TOKEN && env.SLACK_CHANNEL }}
run: python scraper/src/slack.py --lookback_days=2
env:
SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}
SLACK_CHANNEL: ${{ inputs.slack-eod-channel }}
- name: Generate markdown files for new contributors
run: node scripts/generateNewContributors.js
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Commit changes
run: |
git config user.name github-actions
git config user.email [email protected]
git add data contributors
git commit -m "🌍 Update JSON - $(date -d '+5 hours +30 minutes' +'%d %b %Y | %I:%M %p')"
git push