From d43858f190a5a210284a0b4cdbb68d80c6921c64 Mon Sep 17 00:00:00 2001 From: Sergei Maertens Date: Mon, 16 Oct 2023 10:23:17 +0200 Subject: [PATCH] :whale: [#3448] Apply (yubin) patches in docker image build Backport-of: #3538 --- Dockerfile | 10 ++++++--- patches/README.md | 12 +++++++++++ patches/apply.sh | 22 +++++++++++++++++++ patches/yubin_001.patch | 43 ++++++++++++++++++++++++++++++++++++ patches/yubin_002.patch | 48 +++++++++++++++++++++++++++++++++++++++++ patches/yubin_003.patch | 24 +++++++++++++++++++++ 6 files changed, 156 insertions(+), 3 deletions(-) create mode 100644 patches/README.md create mode 100755 patches/apply.sh create mode 100644 patches/yubin_001.patch create mode 100644 patches/yubin_002.patch create mode 100644 patches/yubin_003.patch diff --git a/Dockerfile b/Dockerfile index 601b4a1432..699c3a0d7e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,9 +30,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ shared-mime-info \ && rm -rf /var/lib/apt/lists/* - # build-essential python3-dev python3-pip python3-setuptools python3-wheel python3-cffi - - WORKDIR /app # Ensure we use the latest version of pip @@ -42,6 +39,13 @@ COPY ./requirements /app/requirements ARG TARGET_ENVIRONMENT=production RUN pip install -r requirements/${TARGET_ENVIRONMENT}.txt +# Apply patches of third party libraries +COPY ./patches /tmp/patches +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + && rm -rf /var/lib/apt/lists/* \ + && /tmp/patches/apply.sh /usr/local/lib/python3.10/site-packages + # Stage 2 - Install frontend deps and build assets FROM node:16-bookworm-slim AS frontend-build diff --git a/patches/README.md b/patches/README.md new file mode 100644 index 0000000000..81a9628735 --- /dev/null +++ b/patches/README.md @@ -0,0 +1,12 @@ +# Patches + +This directory contains patches for third party code that has not been or won't ever be applied, but +are required for Open Forms to properly function. They are included the docker image through the +`Dockerfile` instructions. + +## Django-yubin + +The Yubin patches are for the following PRs: + +- https://github.com/APSL/django-yubin/pull/69 +- https://github.com/APSL/django-yubin/pull/71 diff --git a/patches/apply.sh b/patches/apply.sh new file mode 100755 index 0000000000..426be31946 --- /dev/null +++ b/patches/apply.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -eu -o pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +sitepackagesdir=${1:-} +if [[ -z "$sitepackagesdir" ]]; then + echo "You must provide the path to site-packages"; + exit 1; +fi + +cd $sitepackagesdir +echo "Patching packages in: $(pwd)" + +for patch_file in $SCRIPT_DIR/yubin_00{1..3}.patch +do + echo "Applying patch file: $patch_file" + git apply $patch_file +done + +echo "Done patching." diff --git a/patches/yubin_001.patch b/patches/yubin_001.patch new file mode 100644 index 0000000000..8241d19ee8 --- /dev/null +++ b/patches/yubin_001.patch @@ -0,0 +1,43 @@ +From dc3bfd1fb996a7ab5ffc0d064703b57b47b84d25 Mon Sep 17 00:00:00 2001 +From: SilviaAmAm +Date: Thu, 12 Oct 2023 15:09:22 +0200 +Subject: [PATCH] :sparkles: [#70] Add the email.policy.EmailPolicy + +https://docs.python.org/3/library/email.policy.html#email.policy.EmailPolicy +This results in the unfolding of headers that have previously been +folded, so that new lines don't cause errors. +--- + django_yubin/models.py | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/django_yubin/models.py b/django_yubin/models.py +index e9e75be..7a1e9db 100644 +--- a/django_yubin/models.py ++++ b/django_yubin/models.py +@@ -1,5 +1,7 @@ + import datetime + import logging ++import email ++from email import policy + from email import encoders as Encoders + from email.mime.base import MIMEBase + +@@ -15,7 +17,7 @@ + from django.utils.text import Truncator + from django.utils.timezone import now + from django.utils.translation import gettext_lazy as _ +-import mailparser ++from mailparser import MailParser + + from . import mailparser_utils, tasks + +@@ -131,7 +133,8 @@ def recipients(self): + return self.to() + self.cc() + self.bcc() + + def get_message_parser(self): +- return mailparser.parse_from_string(self.message_data) ++ message = email.message_from_string(self.message_data, policy=policy.default) ++ return MailParser(message) + + def get_email_message(self): + """ diff --git a/patches/yubin_002.patch b/patches/yubin_002.patch new file mode 100644 index 0000000000..23711bc168 --- /dev/null +++ b/patches/yubin_002.patch @@ -0,0 +1,48 @@ +From 624f5b8103cf02257c5d8d3aa4423bb777a9c36d Mon Sep 17 00:00:00 2001 +From: Sergei Maertens +Date: Thu, 5 Oct 2023 12:28:05 +0200 +Subject: [PATCH] :zap: Fixes #68 -- improve performance of data migration + +Tested against a 13GB SQL dump of the email table (~23K records), +within a cgroup with 300M memory limit. The migration completed +in just under 60s. + +* Use iterator to avoid loading entire table in memory (for the queryset cache) +* Defer the message content, as it may contain attachments causing excessive + memory usage - it is not used in the migration anyway. +* Replace loop over log table with 3 separate SQL update queries +--- + django_yubin/migrations/0007_auto_20200319_1158.py | 13 ++++--------- + 1 file changed, 4 insertions(+), 9 deletions(-) + +diff --git a/django_yubin/migrations/0007_auto_20200319_1158.py b/django_yubin/migrations/0007_auto_20200319_1158.py +index 98690b0..6c91e0b 100644 +--- a/django_yubin/migrations/0007_auto_20200319_1158.py ++++ b/django_yubin/migrations/0007_auto_20200319_1158.py +@@ -19,7 +19,7 @@ def migrate_to_queues(apps, schema_editor): + Log = apps.get_model('django_yubin', 'Log') + + # Messages without a QueueMessage ara sent. +- for message in Message.objects.all(): ++ for message in Message.objects.defer("encoded_message").iterator(): + queued = QueuedMessage.objects.filter(message=message).only('date_queued').first() + if queued: + message.status = DBMessage.STATUS_QUEUED +@@ -35,14 +35,9 @@ def migrate_to_queues(apps, schema_editor): + message.save() + + # Set Log actions based on its result +- for log in Log.objects.all(): +- if log.result == RESULT_SENT: +- log.action = DBMessage.STATUS_SENT +- elif log.result == RESULT_FAILED: +- log.action = DBMessage.STATUS_FAILED +- elif log.result == RESULT_SKIPPED: +- log.action = DBMessage.STATUS_DISCARDED +- log.save() ++ Log.objects.filter(result=RESULT_SENT).update(action=DBMessage.STATUS_SENT) ++ Log.objects.filter(result=RESULT_FAILED).update(action=DBMessage.STATUS_FAILED) ++ Log.objects.filter(result=RESULT_SKIPPED).update(action=DBMessage.STATUS_DISCARDED) + + + class Migration(migrations.Migration): diff --git a/patches/yubin_003.patch b/patches/yubin_003.patch new file mode 100644 index 0000000000..4a825d4cdb --- /dev/null +++ b/patches/yubin_003.patch @@ -0,0 +1,24 @@ +From c17ad0ce951e8308a84a018bdcb4151e9afb95b1 Mon Sep 17 00:00:00 2001 +From: Sergei Maertens +Date: Thu, 12 Oct 2023 10:47:52 +0200 +Subject: [PATCH] :bug: Do not prevent migration rollbacks + +Discussed in the context of #68 - the reverse data migration is not +implemented (deliberately), but that shouldn't prevent people from +rolling back their migrations in dev-environments and while +debugging/diagnosing issues, for example. +--- + django_yubin/migrations/0007_auto_20200319_1158.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/django_yubin/migrations/0007_auto_20200319_1158.py b/django_yubin/migrations/0007_auto_20200319_1158.py +index 6c91e0b..2db6fb0 100644 +--- a/django_yubin/migrations/0007_auto_20200319_1158.py ++++ b/django_yubin/migrations/0007_auto_20200319_1158.py +@@ -47,5 +47,5 @@ class Migration(migrations.Migration): + ] + + operations = [ +- migrations.RunPython(migrate_to_queues), ++ migrations.RunPython(migrate_to_queues, migrations.RunPython.noop), + ]