Skip to content

1. Collect files for data release monthly #47

1. Collect files for data release monthly

1. Collect files for data release monthly #47

name: 1. Collect files for data release monthly
on:
workflow_dispatch:
schedule:
- cron: "30 06 10 * *"
jobs:
collect-files:
runs-on: ubuntu-latest
name: Collect data release files
env:
RELDATE: $(date +"%Y%m%d")
steps:
- name: Checkout
uses: actions/checkout@v4
with:
persist-credentials: false
token: ${{ secrets.PICOPAT2 }}
path: wikipathways-data-release
fetch-depth: 0
ref: main
- name: Setup R
uses: r-lib/actions/setup-r@v2
- name: Install jq
run: |
sudo apt-get update
sudo apt-get install -y jq
- name: Checkout Assets
uses: actions/checkout@v4
with:
repository: wikipathways/wikipathways-assets
path: wikipathways-assets
fetch-depth: 0
ref: main
- name: Make data release folders
run: |
mkdir -p "wikipathways-data-release/${{ env.RELDATE }}/gpml"
mkdir -p "wikipathways-data-release/${{ env.RELDATE }}/svg"
mkdir -p "wikipathways-data-release/${{ env.RELDATE }}/gmt"
mkdir -p "wikipathways-data-release/${{ env.RELDATE }}/rdf"
- name: Collect GPML, SVG, TSV and INFO files
run: |
OPINDEX=$(curl -s https://www.wikipathways.org/org-pathway-index.json | jq -c '.')
for ORGANISM in $(echo $OPINDEX | jq -r '.organisms[] | @base64'); do
ORGANISM_JSON=$(echo ${ORGANISM} | base64 --decode)
LATIN=$(echo ${ORGANISM_JSON} | jq -r '.latin')
echo "${LATIN}"
TWO_LETTER_CODE=$(echo ${ORGANISM_JSON} | jq -r '.["two-letter-code"]')
GPML_FOLDER="wikipathways-${{ env.RELDATE }}-gpml-${LATIN}"
mkdir -p "wikipathways-data-release/${{ env.RELDATE }}/gpml/${GPML_FOLDER}"
SVG_FOLDER="wikipathways-${{ env.RELDATE }}-svg-${LATIN}"
mkdir -p "wikipathways-data-release/${{ env.RELDATE }}/svg/${SVG_FOLDER}"
GMT_FOLDER="wikipathways-${{ env.RELDATE }}-gmt-${LATIN}"
mkdir -p "wikipathways-data-release/${{ env.RELDATE }}/gmt/${GMT_FOLDER}"
for PATHWAY in $(echo ${ORGANISM_JSON} | jq -r '.pathways[] | @base64'); do
PATHWAY_JSON=$(echo ${PATHWAY} | base64 --decode)
TITLE=$(echo ${PATHWAY_JSON} | jq -r '.title')
WPID=$(echo ${PATHWAY_JSON} | jq -r '.wpid')
echo "${WPID}"
LAST_EDITED=$(echo ${PATHWAY_JSON} | jq -r '.["last-edited"]')
GPML_FILE="${TWO_LETTER_CODE}_${TITLE}_${WPID}_${LAST_EDITED}.gpml"
cp wikipathways-assets/pathways/${WPID}/${WPID}.gpml "wikipathways-data-release/${{ env.RELDATE }}/gpml/${GPML_FOLDER}/${GPML_FILE}"
SVG_FILE="${TWO_LETTER_CODE}_${TITLE}_${WPID}_${LAST_EDITED}.svg"
cp wikipathways-assets/pathways/${WPID}/${WPID}.svg "wikipathways-data-release/${{ env.RELDATE }}/svg/${SVG_FOLDER}/${SVG_FILE}"
TSV_FILE="${WPID}.tsv"
cp wikipathways-assets/pathways/${WPID}/${WPID}-datanodes.tsv "wikipathways-data-release/${{ env.RELDATE }}/gmt/${GMT_FOLDER}/${TSV_FILE}"
INFO_FILE="wikipathways-data-release/${{ env.RELDATE }}/gmt/${GMT_FOLDER}/${WPID}.info"
TITLE_GMT=$(echo ${TITLE} | sed 's/[^a-zA-Z0-9]\+/_/g')
echo "title: ${TITLE_GMT}" > "${INFO_FILE}"
echo "version: WikiPathways_${{ env.RELDATE }}" >> "${INFO_FILE}"
echo "wpid: ${WPID}" >> "${INFO_FILE}"
LATIN_GMT=$(echo ${LATIN} | sed 's/_\+/ /g')
echo "latin: ${LATIN_GMT}" >> "${INFO_FILE}"
echo "url: https://www.wikipathways.org/instance/${WPID}" >> "${INFO_FILE}"
done
echo "zip gpml"
zip -j wikipathways-data-release/${{ env.RELDATE }}/gpml/${GPML_FOLDER}.zip wikipathways-data-release/${{ env.RELDATE }}/gpml/${GPML_FOLDER}/*
echo "zip svg"
zip -j wikipathways-data-release/${{ env.RELDATE }}/svg/${SVG_FOLDER}.zip wikipathways-data-release/${{ env.RELDATE }}/svg/${SVG_FOLDER}/*
done
- name: Install R packages
run: |
sudo apt-get install libcurl4-openssl-dev
Rscript -e 'install.packages(c("tidyr","dplyr","magrittr","yaml","WikidataQueryServiceR","remotes"))'
- name: Install rWikiPathways
run: |
remotes::install_cran("BiocManager")
BiocManager::install("rWikiPathways", ask = FALSE)
shell: Rscript {0}
- name: Generate GMT files from TSV and temporary INFO
run: |
cp wikipathways-data-release/make-gmts.R wikipathways-data-release/${{ env.RELDATE }}/gmt/.
cd wikipathways-data-release/${{ env.RELDATE }}/gmt
Rscript -e "source('make-gmts.R')"
cd ../../..
- name: Checkout Jekyll
uses: actions/checkout@v4
with:
repository: wikipathways/wikipathways.github.io
path: wikipathways.github.io
fetch-depth: 0
ref: main
- name: Generate Authors RDF
if: true
run: |
Rscript -e "source('wikipathways-data-release/make-authors-rdf.R')"
ls -l
mv wikipathways-${{ env.RELDATE }}-rdf-authors.zip wikipathways-data-release/${{ env.RELDATE }}/rdf/.
- name: Collect RDF and TTL files
if: false
run: |
wget -O wikipathways-data-release/${{ env.RELDATE }}/rdf/wikipathways-${{ env.RELDATE }}-rdf-gpml.zip https://jenkins.bigcat.unimaas.nl/job/WikiPathways%20RDF%20-%20Monthly/lastSuccessfulBuild/artifact/WP2RDF/output/wikipathways-rdf-gpml.zip
wget -O wikipathways-data-release/${{ env.RELDATE }}/rdf/wikipathways-${{ env.RELDATE }}-rdf-wp.zip https://jenkins.bigcat.unimaas.nl/job/WikiPathways%20RDF%20-%20Monthly/lastSuccessfulBuild/artifact/WP2RDF/output/wikipathways-rdf-wp.zip
wget -O wikipathways-data-release/${{ env.RELDATE }}/rdf/wikipathways-${{ env.RELDATE }}-rdf-void.ttl https://jenkins.bigcat.unimaas.nl/job/WikiPathways%20RDF%20-%20Monthly/lastSuccessfulBuild/artifact/WP2RDF/output/void_for_data.wp.org.ttl
- name: Replace current
run: |
rm -rf wikipathways-data-release/current
mkdir wikipathways-data-release/current
cp -R wikipathways-data-release/${{ env.RELDATE }}/* wikipathways-data-release/current/.
- name: Commit new files and remove oldest
run: |
git config --global user.name 'GitHub Action'
git config --global user.email '[email protected]'
cd wikipathways-data-release
git add */*.{zip,gmt,ttl}
RETIRE=$(ls -d * | grep -o '[0-9]\{8\}' | sort -n | head -1)
git rm -r ${RETIRE}
if git diff --exit-code --staged; then
echo "No changes"
else
git commit -m 'New release of data files'
fi
- name: Push as user
uses: ad-m/github-push-action@master
with:
directory: wikipathways-data-release
github_token: ${{ secrets.PICOPAT2}}