Skip to content

Commit

Permalink
Merge pull request #265 from Princeton-CDH/feature/migrate-images
Browse files Browse the repository at this point in the history
Feature/migrate images
  • Loading branch information
rlskoeser authored Dec 22, 2020
2 parents 6bbb0f4 + 71e357c commit 8d7c85b
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 9 deletions.
146 changes: 141 additions & 5 deletions cdhweb/pages/management/commands/exodus.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
"""Convert mezzanine-based pages to wagtail page models."""

import filecmp
import glob
import json
import os
import os.path
import shutil
from collections import defaultdict

from cdhweb.pages.models import ContentPage, HomePage, LandingPage
from django.conf import settings
from django.core.files.images import ImageFile
from django.core.management.base import BaseCommand
from django.db.models import Q
from mezzanine.core.models import CONTENT_STATUS_PUBLISHED
from mezzanine.pages import models as mezz_page_models
from wagtail.core.blocks import RichTextBlock
from wagtail.core.models import Page, Site
from wagtail.core.models import Page, Site, Collection, get_root_collection_id
from wagtail.images.models import Image

from cdhweb.pages.models import ContentPage, HomePage, LandingPage


class Command(BaseCommand):
Expand Down Expand Up @@ -41,6 +50,7 @@ def create_landingpage(self, page):
return LandingPage(
title=page.title,
tagline=page.landingpage.tagline, # landing pages have a tagline
header_image=self.get_wagtail_image(page.landingpage.image),
slug=self.convert_slug(page.slug),
seo_title=page._meta_title or page.title,
body=json.dumps([{
Expand Down Expand Up @@ -81,6 +91,9 @@ def handle(self, *args, **options):
Page.objects.filter(depth__gt=2).delete()
# PageRevision.objects.all().delete()

# convert media images to wagtail images
self.image_exodus()

# create the new homepage
old_homepage = mezz_page_models.Page.objects.get(slug="/")
homepage = self.create_homepage(old_homepage)
Expand Down Expand Up @@ -138,7 +151,7 @@ def handle(self, *args, **options):
.filter(Q(slug__startswith="events/") | Q(slug="year-of-data"))
for page in event_pages:
self.migrate_pages(page, events)

if projects:
# - migrate project pages but specify new projects list page as parent
# - process about page last so project pages don't nest
Expand Down Expand Up @@ -198,7 +211,6 @@ def migrate_pages(self, page, parent):
# recursively create and add new versions of all this page's children
for child in page.children.all():
self.migrate_pages(child, new_page)


def form_pages(self):
# migrate embedded google forms from mezzanine templates
Expand All @@ -219,3 +231,127 @@ def form_pages(self):
{"type": "migrated", "value": '<iframe title="Cosponsorship Request Form" height="3250" src="https://docs.google.com/forms/d/e/1FAIpQLSeP40DBM7n8GYgW_i99nRxY5T5P39DrIWyIwq9LggIwu4r5jQ/viewform?embedded=true">Loading...</iframe>'}
])
cosponsor.save()

# cached collections used for migrated media
collections = {
# get root collection so we can add children to it
'root': Collection.objects.get(pk=get_root_collection_id())
}

def get_collection(self, name):
# if we don't already have this collection, get it
if name not in self.collections:
# try to get it if it already exists
coll = Collection.objects.filter(name=name).first()
# otherwise, create it
if not coll:
coll = Collection(name=name)
self.collections['root'].add_child(instance=coll)
self.collections['root'].save()

self.collections[name] = coll

return self.collections[name]

def image_exodus(self):
# generate wagtail images for all uploaded media

# mezzanine/filebrowser_safe doesn't seem to have useful objects
# or track metadata, so just import from the filesystem

# delete all images and collections prior to run
# (clear out past migration attempts)
Image.objects.all().delete()
Collection.objects.exclude(pk=get_root_collection_id()).delete()

# also delete any wagtail image files, since they are not deleted
# by removing the objects
shutil.rmtree('%s/images' % settings.MEDIA_ROOT, ignore_errors=True)
shutil.rmtree('%s/original_images' % settings.MEDIA_ROOT, ignore_errors=True)

# get media filenames to migrate, with duplicates filtered out
media_filenames = self.get_media_files()

for imgpath in media_filenames:
extension = os.path.splitext(imgpath)[1]
# skip unsupported files based on file extension
# NOTE: leaving this here in case we want to handle
# documents the same way
if extension in ['.pdf', '.svg', '.docx']:
continue

# if image is in a subdirectory under uploads (e.g. projects, blog)
# add it to a collection with that name
relative_path = os.path.dirname(imgpath) \
.replace('%s/uploads/' % settings.MEDIA_ROOT, '')

# there are two variants of Slavic DH, one with and one
# without a space; remove the space so they'll be in one collection
basedir = relative_path.split('/')[0].replace(' ', '')
collection = None
if basedir:
collection = self.get_collection(basedir)

with open(imgpath, 'rb') as imgfilehandle:
title = os.path.basename(imgpath)
# passing collection=None errors, so
# only specify collection option when we have one
extra_opts = {}
if collection:
extra_opts['collection'] = collection
try:
Image.objects.create(
title=title,
file=ImageFile(imgfilehandle, name=title),
**extra_opts)
except Exception as err:
# seems to mean that height/width calculation failed
# (usually non-images)
print('Error creating image for %s: %s' % (imgpath, err))

def get_media_files(self):
# wagtail images support: GIF, JPEG, PNG, WEBP
imgfile_path = '%s/**/*.*' % settings.MEDIA_ROOT
# get filenames for all uploaded files
filenames = glob.glob(imgfile_path, recursive=True)
# aggregate files by basename to identify files with the same
# name in different locations
unique_filenames = defaultdict(list)
for path in filenames:
unique_filenames[os.path.basename(path)].append(path)

# check files with the same name in multiple locations
for key, val in unique_filenames.items():
if len(val) > 1:
samefile = filecmp.cmp(val[0], val[1], shallow=False)
# if the files are the same
if samefile:
# keep the first one and remove the others from the
# list of files to be migrated
extra_copies = val[1:]

# if not all the same, identify the largest
# (all are variant/cropped versions of the same image)
else:
largest_file = None
largest_size = 0
for filepath in val:
size = os.stat(filepath).st_size
if size > largest_size:
largest_size = size
largest_file = filepath

extra_copies = [f for f in val if f != largest_file]

# remove duplicate and variant images that
# will not be imported into wagtail
for extra_copy in extra_copies:
filenames.remove(extra_copy)

return filenames

def get_wagtail_image(self, image):
# get the migrated wagtail image for a foreign-key image
# using image file basename, which is migrated as image title
return Image.objects.get(title=os.path.basename(image.name))

12 changes: 10 additions & 2 deletions cdhweb/pages/templates/cdhpages/landing_page.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% extends 'base.html' %}
{% load wagtailcore_tags %}
{% load wagtailcore_tags wagtailimages_tags %}

{% block page-title %}{{ page.title }}{% endblock %}

Expand All @@ -12,7 +12,15 @@
{% endblock %}

{% block content-header %}
<header {% if page.header_image %}style="background-image:url('{{ MEDIA_URL }}{{ page.header_image }}')"
{% comment %}
image asset guidelines for landing page
L,M @1x W: 2560px H: 680px, @2x W: 5120px H: 1360px
S @1x W: 736px H: 320px, @2x W: 1472px H: 640px
{% endcomment %}
{% if page.header_image %}
{% image page.header_image fill-5120x1360 as header_img %}
{% endif %}
<header {% if page.header_image %}style="background-image:url('{{ header_img.url }}')"
{% else %}class="no-background"{% endif %}>
<div>
<a href="{% url 'home' %}" class="home">/</a><h1>{{ page.title }}</h1>
Expand Down
4 changes: 2 additions & 2 deletions cdhweb/pages/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


class TestHomePage(WagtailPageTests):

def test_can_create(self):
root = Page.objects.get(title='Root')
self.assertCanCreate(root, HomePage, nested_form_data({
Expand All @@ -30,7 +30,7 @@ def test_subpages(self):

class TestLandingPage(WagtailPageTests):
fixtures = ['test_pages']

def test_can_create(self):
home = HomePage.objects.get(title='Home')
self.assertCanCreate(home, LandingPage, nested_form_data({
Expand Down

0 comments on commit 8d7c85b

Please sign in to comment.