From 1359b3872fd16b2bd90eb3ab585d5930bc41c3a7 Mon Sep 17 00:00:00 2001 From: Abdullah Aziz Date: Tue, 15 Oct 2024 15:47:59 +0200 Subject: [PATCH 1/2] fixed requirement.txt file and split runners scripts --- runner_daily.sh => freya_runner_daily.sh | 4 +--- runner_every10mins.sh => freya_runner_every10mins.sh | 0 runner_weekly.sh => freya_runner_weekly.sh | 0 requirements.txt | 11 +---------- slack_runner_daily.sh | 4 ++++ runner_hourly.sh => slack_runner_hourly.sh | 0 6 files changed, 6 insertions(+), 13 deletions(-) rename runner_daily.sh => freya_runner_daily.sh (70%) rename runner_every10mins.sh => freya_runner_every10mins.sh (100%) rename runner_weekly.sh => freya_runner_weekly.sh (100%) create mode 100755 slack_runner_daily.sh rename runner_hourly.sh => slack_runner_hourly.sh (100%) diff --git a/runner_daily.sh b/freya_runner_daily.sh similarity index 70% rename from runner_daily.sh rename to freya_runner_daily.sh index 3a3acf6..dbf5983 100755 --- a/runner_daily.sh +++ b/freya_runner_daily.sh @@ -1,7 +1,5 @@ export CODE_PATH=/code -# Feed SciLifeLab figshare data to Slack -python "$CODE_PATH"/slack_figshare.py - # Update EBI index file PYTHONPATH="$CODE_PATH"/pathogens-portal-scripts/EBI_indexing python "$CODE_PATH"/pathogens-portal-scripts/EBI_indexing/update_index_json.py + diff --git a/runner_every10mins.sh b/freya_runner_every10mins.sh similarity index 100% rename from runner_every10mins.sh rename to freya_runner_every10mins.sh diff --git a/runner_weekly.sh b/freya_runner_weekly.sh similarity index 100% rename from runner_weekly.sh rename to freya_runner_weekly.sh diff --git a/requirements.txt b/requirements.txt index 5791573..df54ad2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,4 @@ # DC-DYNAMIC DEPENDENCIES beautifulsoup4==4.11.1 requests==2.32.3 -lxml==4.9.1 - -# VISUALISATIONS DEPENDENCIES -matplotlib==3.9.1.post1 -numpy==2.0.1 -openpyxl==3.1.5 -pandas==2.2.2 -pillow==10.4.0 -plotly==5.23.0 -wordcloud==1.9.3 +lxml==4.9.1 \ No newline at end of file diff --git a/slack_runner_daily.sh b/slack_runner_daily.sh new file mode 100755 index 0000000..1e71f24 --- /dev/null +++ b/slack_runner_daily.sh @@ -0,0 +1,4 @@ +export CODE_PATH=/code + +# Feed SciLifeLab figshare data to Slack +python "$CODE_PATH"/slack_figshare.py diff --git a/runner_hourly.sh b/slack_runner_hourly.sh similarity index 100% rename from runner_hourly.sh rename to slack_runner_hourly.sh From 512122a420227b925d84c8bca14263e6d831e9d7 Mon Sep 17 00:00:00 2001 From: Abdullah Aziz Date: Thu, 17 Oct 2024 15:31:29 +0200 Subject: [PATCH 2/2] dc-dynamics refactoring: remove scripts --- Dockerfile | 13 +++++++---- all.sh | 5 ----- freya_runner_weekly.sh | 4 ++-- gen_clouds.py | 50 ------------------------------------------ gen_recent_pub.py | 33 ---------------------------- 5 files changed, 11 insertions(+), 94 deletions(-) delete mode 100755 all.sh delete mode 100644 gen_clouds.py delete mode 100644 gen_recent_pub.py diff --git a/Dockerfile b/Dockerfile index 8c978e8..67c34dc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,9 +7,14 @@ WORKDIR /code RUN git clone https://github.com/ScilifelabDataCentre/pathogens-portal-visualisations.git && \ git clone https://github.com/ScilifelabDataCentre/pathogens-portal-scripts.git -COPY *.sh *.py requirements.txt /code/ - +# Install Python dependencies from the current directory and both repositories +COPY requirements.txt /code/ RUN pip install -r requirements.txt && \ - mkdir output + pip install -r pathogens-portal-visualisations/requirements.txt && \ + pip install -r pathogens-portal-scripts/requirements.txt + +# Copy other necessary files to the /code directory +COPY *.sh *.py /code/ -CMD ["/code/all.sh"] +# Create an output directory if needed +RUN mkdir /code/output \ No newline at end of file diff --git a/all.sh b/all.sh deleted file mode 100755 index 0907c08..0000000 --- a/all.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -/code/runner_daily.sh -/code/runner_weekly.sh -/code/runner_every10mins.sh diff --git a/freya_runner_weekly.sh b/freya_runner_weekly.sh index 4069405..23796f7 100755 --- a/freya_runner_weekly.sh +++ b/freya_runner_weekly.sh @@ -1,11 +1,11 @@ export CODE_PATH=/code # Wordclouds generator -PYTHONPATH="$CODE_PATH"/pathogens-portal-visualisations/Wordcloud python "$CODE_PATH"/gen_clouds.py +PYTHONPATH="$CODE_PATH"/pathogens-portal-visualisations/Wordcloud python "$CODE_PATH"/pathogens-portal-visualisations/Wordcloud/livewordcloud.py # Publication related updates PYTHONPATH="$CODE_PATH"/pathogens-portal-visualisations/Count_publications python "$CODE_PATH"/pathogens-portal-visualisations/Count_publications/count_publications.py > "$CODE_PATH"/output/COVID_publication_count.json -python "$CODE_PATH"/gen_recent_pub.py > "$CODE_PATH"/output/covid-portal-recent10.json +PYTHONPATH="$CODE_PATH"/pathogens-portal-visualisations/Count_publications python "$CODE_PATH"/pathogens-portal-visualisations/Count_publications/gen_recent_pub.py > "$CODE_PATH"/output/covid-portal-recent10.json # Upload generated files diff --git a/gen_clouds.py b/gen_clouds.py deleted file mode 100644 index 9457411..0000000 --- a/gen_clouds.py +++ /dev/null @@ -1,50 +0,0 @@ -import os - -import livewordcloud as lwc - -PATH = os.environ.get('PYTHONPATH') -CODE_PATH = os.environ.get('CODE_PATH') - -# titles -lwc.write_file(os.path.join(CODE_PATH, 'output/covid-portal-titles_all.png'), - lwc.gen_wordcloud(field='title', - data_folder=PATH, - xsize=10, - shape='rectangle')) -lwc.write_file(os.path.join(CODE_PATH, 'output/covid-portal-titles_vr.png'), - lwc.gen_wordcloud(field='title', - data_folder=PATH, - json_path='https://publications-covid19.scilifelab.se/label/Funder%3A%20VR.json', - maxwords=100)) -lwc.write_file(os.path.join(CODE_PATH, 'output/covid-portal-titles_kaw.png'), - lwc.gen_wordcloud(field='title', - data_folder=PATH, - json_path='https://publications-covid19.scilifelab.se/label/Funder%3A%20KAW/SciLifeLab.json', - maxwords=100)) -lwc.write_file(os.path.join(CODE_PATH, 'output/covid-portal-titles_h2020.png'), - lwc.gen_wordcloud(field='title', - data_folder=PATH, - json_path='https://publications-covid19.scilifelab.se/label/Funder%3A%20H2020.json', - maxwords=100)) - -# abstracts -lwc.write_file(os.path.join(CODE_PATH, 'output/covid-portal-abstracts_all.png'), - lwc.gen_wordcloud(field='abstract', - data_folder=PATH, - xsize=10, - shape='rectangle')) -lwc.write_file(os.path.join(CODE_PATH, 'output/covid-portal-abstracts_vr.png'), - lwc.gen_wordcloud(field='abstract', - data_folder=PATH, - json_path='https://publications-covid19.scilifelab.se/label/Funder%3A%20VR.json', - maxwords=100)) -lwc.write_file(os.path.join(CODE_PATH, 'output/covid-portal-abstracts_kaw.png'), - lwc.gen_wordcloud(field='abstract', - data_folder=PATH, - json_path='https://publications-covid19.scilifelab.se/label/Funder%3A%20KAW/SciLifeLab.json', - maxwords=100)) -lwc.write_file(os.path.join(CODE_PATH, 'output/covid-portal-abstracts_h2020.png'), - lwc.gen_wordcloud(field='abstract', - data_folder=PATH, - json_path='https://publications-covid19.scilifelab.se/label/Funder%3A%20H2020.json', - maxwords=100)) diff --git a/gen_recent_pub.py b/gen_recent_pub.py deleted file mode 100644 index 9bed51d..0000000 --- a/gen_recent_pub.py +++ /dev/null @@ -1,33 +0,0 @@ -"""This script produces a table showing -the most recent 10 publications in the COVID-19 -publications database""" -import datetime -import json - -import requests - -# get data -req = requests.get("https://publications-covid19.scilifelab.se/publications.json") -data = req.json() - -# keep 10 most recent, with date today or earlier -today = str(datetime.date.today()) -pubs = [pub for pub in data["publications"] if pub["published"] <= today] -recent = sorted(pubs, key=lambda x: x["published"], reverse=True)[:10] - -# collapse authors -for entry in recent: - if len(entry["authors"]) == 1: - entry["authors"] = entry['authors'][0]['family'] - elif len(entry["authors"]) == 2: - entry["authors"] = f"{entry['authors'][0]['family']} and {entry['authors'][1]['family']}" - else: - entry["authors"] = f"{entry['authors'][0]['family']} et al." - -output = {"publications": [{"published": entry["published"], - "authors": entry["authors"], - "title": entry["title"], - "doi": entry["doi"]} - for entry in recent]} - -print(json.dumps(output))