diff --git a/cdhweb/pages/management/commands/exodus.py b/cdhweb/pages/management/commands/exodus.py index 14958db6a..4fcf379ff 100644 --- a/cdhweb/pages/management/commands/exodus.py +++ b/cdhweb/pages/management/commands/exodus.py @@ -1,14 +1,23 @@ """Convert mezzanine-based pages to wagtail page models.""" - +import filecmp +import glob import json +import os +import os.path +import shutil +from collections import defaultdict -from cdhweb.pages.models import ContentPage, HomePage, LandingPage +from django.conf import settings +from django.core.files.images import ImageFile from django.core.management.base import BaseCommand from django.db.models import Q from mezzanine.core.models import CONTENT_STATUS_PUBLISHED from mezzanine.pages import models as mezz_page_models from wagtail.core.blocks import RichTextBlock -from wagtail.core.models import Page, Site +from wagtail.core.models import Page, Site, Collection, get_root_collection_id +from wagtail.images.models import Image + +from cdhweb.pages.models import ContentPage, HomePage, LandingPage class Command(BaseCommand): @@ -41,6 +50,7 @@ def create_landingpage(self, page): return LandingPage( title=page.title, tagline=page.landingpage.tagline, # landing pages have a tagline + header_image=self.get_wagtail_image(page.landingpage.image), slug=self.convert_slug(page.slug), seo_title=page._meta_title or page.title, body=json.dumps([{ @@ -81,6 +91,9 @@ def handle(self, *args, **options): Page.objects.filter(depth__gt=2).delete() # PageRevision.objects.all().delete() + # convert media images to wagtail images + self.image_exodus() + # create the new homepage old_homepage = mezz_page_models.Page.objects.get(slug="/") homepage = self.create_homepage(old_homepage) @@ -138,7 +151,7 @@ def handle(self, *args, **options): .filter(Q(slug__startswith="events/") | Q(slug="year-of-data")) for page in event_pages: self.migrate_pages(page, events) - + if projects: # - migrate project pages but specify new projects list page as parent # - process about page last so project pages don't nest @@ -198,7 +211,6 @@ def migrate_pages(self, page, parent): # recursively create and add new versions of all this page's children for child in page.children.all(): self.migrate_pages(child, new_page) - def form_pages(self): # migrate embedded google forms from mezzanine templates @@ -219,3 +231,127 @@ def form_pages(self): {"type": "migrated", "value": ''} ]) cosponsor.save() + + # cached collections used for migrated media + collections = { + # get root collection so we can add children to it + 'root': Collection.objects.get(pk=get_root_collection_id()) + } + + def get_collection(self, name): + # if we don't already have this collection, get it + if name not in self.collections: + # try to get it if it already exists + coll = Collection.objects.filter(name=name).first() + # otherwise, create it + if not coll: + coll = Collection(name=name) + self.collections['root'].add_child(instance=coll) + self.collections['root'].save() + + self.collections[name] = coll + + return self.collections[name] + + def image_exodus(self): + # generate wagtail images for all uploaded media + + # mezzanine/filebrowser_safe doesn't seem to have useful objects + # or track metadata, so just import from the filesystem + + # delete all images and collections prior to run + # (clear out past migration attempts) + Image.objects.all().delete() + Collection.objects.exclude(pk=get_root_collection_id()).delete() + + # also delete any wagtail image files, since they are not deleted + # by removing the objects + shutil.rmtree('%s/images' % settings.MEDIA_ROOT, ignore_errors=True) + shutil.rmtree('%s/original_images' % settings.MEDIA_ROOT, ignore_errors=True) + + # get media filenames to migrate, with duplicates filtered out + media_filenames = self.get_media_files() + + for imgpath in media_filenames: + extension = os.path.splitext(imgpath)[1] + # skip unsupported files based on file extension + # NOTE: leaving this here in case we want to handle + # documents the same way + if extension in ['.pdf', '.svg', '.docx']: + continue + + # if image is in a subdirectory under uploads (e.g. projects, blog) + # add it to a collection with that name + relative_path = os.path.dirname(imgpath) \ + .replace('%s/uploads/' % settings.MEDIA_ROOT, '') + + # there are two variants of Slavic DH, one with and one + # without a space; remove the space so they'll be in one collection + basedir = relative_path.split('/')[0].replace(' ', '') + collection = None + if basedir: + collection = self.get_collection(basedir) + + with open(imgpath, 'rb') as imgfilehandle: + title = os.path.basename(imgpath) + # passing collection=None errors, so + # only specify collection option when we have one + extra_opts = {} + if collection: + extra_opts['collection'] = collection + try: + Image.objects.create( + title=title, + file=ImageFile(imgfilehandle, name=title), + **extra_opts) + except Exception as err: + # seems to mean that height/width calculation failed + # (usually non-images) + print('Error creating image for %s: %s' % (imgpath, err)) + + def get_media_files(self): + # wagtail images support: GIF, JPEG, PNG, WEBP + imgfile_path = '%s/**/*.*' % settings.MEDIA_ROOT + # get filenames for all uploaded files + filenames = glob.glob(imgfile_path, recursive=True) + # aggregate files by basename to identify files with the same + # name in different locations + unique_filenames = defaultdict(list) + for path in filenames: + unique_filenames[os.path.basename(path)].append(path) + + # check files with the same name in multiple locations + for key, val in unique_filenames.items(): + if len(val) > 1: + samefile = filecmp.cmp(val[0], val[1], shallow=False) + # if the files are the same + if samefile: + # keep the first one and remove the others from the + # list of files to be migrated + extra_copies = val[1:] + + # if not all the same, identify the largest + # (all are variant/cropped versions of the same image) + else: + largest_file = None + largest_size = 0 + for filepath in val: + size = os.stat(filepath).st_size + if size > largest_size: + largest_size = size + largest_file = filepath + + extra_copies = [f for f in val if f != largest_file] + + # remove duplicate and variant images that + # will not be imported into wagtail + for extra_copy in extra_copies: + filenames.remove(extra_copy) + + return filenames + + def get_wagtail_image(self, image): + # get the migrated wagtail image for a foreign-key image + # using image file basename, which is migrated as image title + return Image.objects.get(title=os.path.basename(image.name)) + diff --git a/cdhweb/pages/templates/cdhpages/landing_page.html b/cdhweb/pages/templates/cdhpages/landing_page.html index a949d2831..09ea553c1 100644 --- a/cdhweb/pages/templates/cdhpages/landing_page.html +++ b/cdhweb/pages/templates/cdhpages/landing_page.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% load wagtailcore_tags %} +{% load wagtailcore_tags wagtailimages_tags %} {% block page-title %}{{ page.title }}{% endblock %} @@ -12,7 +12,15 @@ {% endblock %} {% block content-header %} -
/

{{ page.title }}

diff --git a/cdhweb/pages/tests/test_models.py b/cdhweb/pages/tests/test_models.py index f54be5c4e..18df2b132 100644 --- a/cdhweb/pages/tests/test_models.py +++ b/cdhweb/pages/tests/test_models.py @@ -8,7 +8,7 @@ class TestHomePage(WagtailPageTests): - + def test_can_create(self): root = Page.objects.get(title='Root') self.assertCanCreate(root, HomePage, nested_form_data({ @@ -30,7 +30,7 @@ def test_subpages(self): class TestLandingPage(WagtailPageTests): fixtures = ['test_pages'] - + def test_can_create(self): home = HomePage.objects.get(title='Home') self.assertCanCreate(home, LandingPage, nested_form_data({