EDIT kaltura migration for links in answer content

We have 28 kaltura videos that didn't go through ComPAIR's Kaltura integration. The only thing we have of them is a link to the video, the link is in this format: https://learning.video.ubc.ca/media/<video name>/<video entry ID> This video is also on a different Kaltura repo, it's not in the ComPAIR Kaltura repo. As a special edge case for this migration, we're going to copy these videos into the ComPAIR Kaltura repo. We'll also make it look like the video was uploaded via ComPAIR, the video will show up as an attachment like all the other file attachments. Note that the original learning.video.ubc.ca link will no longer work after a certain point and we won't be fixing the link. The video will only be available via the attachment. The same mapping mechanism as the regular kaltura migrate command is used. This differs from the regular migrate command in that we need to create new entries in the 'file' and 'kaltura_media' table. The command is similar too: Dry run: python manage.py kaltura links -d mapping.csv For real: python manage.py kaltura links mapping.csv Log file will be in format: kaltura-links-migration-log-<iso8601 datetime>.log
ubc · Mar 9, 2023 · aaca8fd · aaca8fd
1 parent 1605e35
commit aaca8fd
Showing 1 changed file with 116 additions and 2 deletions.
diff --git a/compair/manage/kaltura.py b/compair/manage/kaltura.py
@@ -32,6 +32,7 @@
 import csv
 from datetime import datetime
 import re
+from urllib.parse import unquote_plus
 
 from KalturaClient import KalturaClient, KalturaConfiguration
 from KalturaClient.Plugins.Core import (KalturaSessionType, KalturaMediaEntry,
@@ -86,8 +87,7 @@ def deleteInvalidKalturaMedias(medias, logfile):
         db.session.delete(media)
 
 
-def migrateKalturaMedias(medias, oldToNewEntryIds, logfile):
-    # connect to the Kaltura API
+def connectKalturaApi():
     kClient = KalturaClient(KalturaConfiguration(
                             serviceUrl=KalturaCore.service_url()))
     kSession = kClient.session.start(
@@ -99,6 +99,11 @@ def migrateKalturaMedias(medias, oldToNewEntryIds, logfile):
         "appID:compair"
     )
     kClient.setKs(kSession)
+    return kClient
+
+
+def migrateKalturaMedias(medias, oldToNewEntryIds, logfile):
+    kClient = connectKalturaApi()
 
     for media in medias:
         mediaId = media.id
@@ -115,6 +120,115 @@ def migrateKalturaMedias(medias, oldToNewEntryIds, logfile):
         #db.session.add(media)
 
 
+# Some videos were linked in answer content, we want to switch them to using
+# the file attachment system, so have to create the associated file and
+# kaltura_media table entries for them.
+#
+# Here's a complex query that gets answers that has learning.video.ubc.ca links
+# in them and don't have an associated Kaltura attachment. For our data, we can
+# get away with just seeing if the answer doesn't have an attached file entry,
+# so that's what what was implemented, but this query is documented for ref:
+# select answer.id, answer.content, answer.file_id, file.kaltura_media_id from answer left join file on answer.file_id = file.id where answer.content like '%learning.video.ubc.ca%' and (answer.file_id is NULL or file.kaltura_media_id is NULL)
+def migrateAnswerLinks(answers, oldToNewEntryIds, logfile):
+    kClient = connectKalturaApi()
+    regex = re.compile(r'https://learning.video.ubc.ca/media/([%\w+-]+?)/(\w+?)"')
+    count = 0
+    for answer in answers:
+        count += 1
+        msg(f'Answer {count}: {answer.id}', logfile)
+        link = re.search(regex, answer.content)
+        if not link:
+            msg(f'Error: Answer {answer.id} content has no Kaltura link?', logfile)
+            continue
+        oldEntryId = link.group(2)
+        newEntryId = oldToNewEntryIds[oldEntryId]
+        newInfo = kClient.media.get(newEntryId, -1)
+        videoName = newInfo.getName() + ".mp4"
+        msg(f'  Video Name: {videoName}', logfile)
+        msg(f'  Old Entry ID: {oldEntryId}', logfile)
+        msg(f'  New Entry ID: {newEntryId}', logfile)
+        msg(f'  Creating Kaltura File Entries...', logfile)
+        kalturaMedia = KalturaMedia(
+            user=answer.user,
+            download_url=newInfo.getDownloadUrl(),
+            # can't figure out how to get the original source extension, so
+            # just assuming mp4
+            file_name=videoName,
+            service_url=KalturaCore.service_url(),
+            partner_id=newInfo.getPartnerId(),
+            player_id=KalturaCore.player_id(),
+            entry_id=newEntryId
+        )
+        db.session.add(kalturaMedia)
+        fileEntry = File(
+            user=answer.user,
+            kaltura_media=kalturaMedia,
+            alias=videoName
+        )
+        db.session.add(fileEntry)
+        answer.file = fileEntry
+        db.session.commit()
+        fileEntry.name = fileEntry.uuid + '.' + kalturaMedia.extension
+        db.session.commit()
+        msg(f'  Kaltura File Entries Created!', logfile)
+
+
+# Some videos were linked in answer content, we want to switch them to using
+# the file attachment system like all other kaltura media
+@manager.command
+def links(mappingCsv, noHeader=False, dryRun=False):
+    ts = datetime.now().isoformat(timespec='seconds')
+    logfile = open(f'kaltura-links-migration-log-{ts}.log', 'a')
+    msg('Starting Kaltura links migration', logfile)
+    oldToNewEntryIds = readMappingCsv(mappingCsv, noHeader)
+    newToOldEntryIds = dict(map(reversed, oldToNewEntryIds.items()))
+    needMigrationAnswers = []
+    numInvalid = 0
+    numAlreadyMigrated = 0
+    numNoMapping = 0
+    numTotal = 0
+    answers = Answer.query \
+        .filter(Answer.content.ilike('%learning.video.ubc.ca%')) \
+        .filter(Answer.file_id.is_(None)) \
+        .all()
+    regex = re.compile(r'https://learning.video.ubc.ca/media/([%\w+-]+?)/(\w+?)"')
+    # find out how much work needs to be done
+    for answer in answers:
+        numTotal += 1
+        link = re.search(regex, answer.content)
+        if not link:
+            msg(f'Answer {answer.id} content has no Kaltura link?', logfile)
+            numInvalid += 1
+            continue
+        entryId = link.group(2)
+        if not entryId:
+            msg(f'Answer {answer.id} Kaltura link has no entry ID', logfile)
+            numInvalid += 1
+        elif entryId in oldToNewEntryIds:
+            msg(f"Migration needed on answer {answer.id}: Entry {entryId}", logfile)
+            needMigrationAnswers.append(answer)
+        elif entryId in newToOldEntryIds:
+            # this is always 0, since answers with a file_id won't show up in
+            # the query again
+            msg(f"Already migrated answer {answer.id}: Entry {entryId}", logfile)
+            numAlreadyMigrated += 1
+        else:
+            # didn't find a mapping, perhaps missing from migration?
+            msg(f'No mapping for answer {answer.id}: Entry {entryId}', logfile)
+            numNoMapping += 1
+    # summarize what needs to be done
+    summarize(len(needMigrationAnswers), numInvalid, numAlreadyMigrated,
+              numNoMapping, numTotal, logfile)
+    if dryRun:
+        msg(f'*** Dry run completed, no changes were made ***', logfile)
+    else:
+        msg(f'Starting database session', logfile)
+        migrateAnswerLinks(needMigrationAnswers, oldToNewEntryIds, logfile)
+        msg(f'Committing to database', logfile)
+        db.session.commit()
+    logfile.close()
+
+
 @manager.command
 def migrate(mappingCsv, noHeader=False, dryRun=False):
     ts = datetime.now().isoformat(timespec='seconds')