diff --git a/compair/manage/kaltura.py b/compair/manage/kaltura.py index d5db8004..fe037d59 100644 --- a/compair/manage/kaltura.py +++ b/compair/manage/kaltura.py @@ -32,6 +32,7 @@ import csv from datetime import datetime import re +from urllib.parse import unquote_plus from KalturaClient import KalturaClient, KalturaConfiguration from KalturaClient.Plugins.Core import (KalturaSessionType, KalturaMediaEntry, @@ -86,8 +87,7 @@ def deleteInvalidKalturaMedias(medias, logfile): db.session.delete(media) -def migrateKalturaMedias(medias, oldToNewEntryIds, logfile): - # connect to the Kaltura API +def connectKalturaApi(): kClient = KalturaClient(KalturaConfiguration( serviceUrl=KalturaCore.service_url())) kSession = kClient.session.start( @@ -99,6 +99,11 @@ def migrateKalturaMedias(medias, oldToNewEntryIds, logfile): "appID:compair" ) kClient.setKs(kSession) + return kClient + + +def migrateKalturaMedias(medias, oldToNewEntryIds, logfile): + kClient = connectKalturaApi() for media in medias: mediaId = media.id @@ -115,6 +120,115 @@ def migrateKalturaMedias(medias, oldToNewEntryIds, logfile): #db.session.add(media) +# Some videos were linked in answer content, we want to switch them to using +# the file attachment system, so have to create the associated file and +# kaltura_media table entries for them. +# +# Here's a complex query that gets answers that has learning.video.ubc.ca links +# in them and don't have an associated Kaltura attachment. For our data, we can +# get away with just seeing if the answer doesn't have an attached file entry, +# so that's what what was implemented, but this query is documented for ref: +# select answer.id, answer.content, answer.file_id, file.kaltura_media_id from answer left join file on answer.file_id = file.id where answer.content like '%learning.video.ubc.ca%' and (answer.file_id is NULL or file.kaltura_media_id is NULL) +def migrateAnswerLinks(answers, oldToNewEntryIds, logfile): + kClient = connectKalturaApi() + regex = re.compile(r'https://learning.video.ubc.ca/media/([%\w+-]+?)/(\w+?)"') + count = 0 + for answer in answers: + count += 1 + msg(f'Answer {count}: {answer.id}', logfile) + link = re.search(regex, answer.content) + if not link: + msg(f'Error: Answer {answer.id} content has no Kaltura link?', logfile) + continue + oldEntryId = link.group(2) + newEntryId = oldToNewEntryIds[oldEntryId] + newInfo = kClient.media.get(newEntryId, -1) + videoName = newInfo.getName() + ".mp4" + msg(f' Video Name: {videoName}', logfile) + msg(f' Old Entry ID: {oldEntryId}', logfile) + msg(f' New Entry ID: {newEntryId}', logfile) + msg(f' Creating Kaltura File Entries...', logfile) + kalturaMedia = KalturaMedia( + user=answer.user, + download_url=newInfo.getDownloadUrl(), + # can't figure out how to get the original source extension, so + # just assuming mp4 + file_name=videoName, + service_url=KalturaCore.service_url(), + partner_id=newInfo.getPartnerId(), + player_id=KalturaCore.player_id(), + entry_id=newEntryId + ) + db.session.add(kalturaMedia) + fileEntry = File( + user=answer.user, + kaltura_media=kalturaMedia, + alias=videoName + ) + db.session.add(fileEntry) + answer.file = fileEntry + db.session.commit() + fileEntry.name = fileEntry.uuid + '.' + kalturaMedia.extension + db.session.commit() + msg(f' Kaltura File Entries Created!', logfile) + + +# Some videos were linked in answer content, we want to switch them to using +# the file attachment system like all other kaltura media +@manager.command +def links(mappingCsv, noHeader=False, dryRun=False): + ts = datetime.now().isoformat(timespec='seconds') + logfile = open(f'kaltura-links-migration-log-{ts}.log', 'a') + msg('Starting Kaltura links migration', logfile) + oldToNewEntryIds = readMappingCsv(mappingCsv, noHeader) + newToOldEntryIds = dict(map(reversed, oldToNewEntryIds.items())) + needMigrationAnswers = [] + numInvalid = 0 + numAlreadyMigrated = 0 + numNoMapping = 0 + numTotal = 0 + answers = Answer.query \ + .filter(Answer.content.ilike('%learning.video.ubc.ca%')) \ + .filter(Answer.file_id.is_(None)) \ + .all() + regex = re.compile(r'https://learning.video.ubc.ca/media/([%\w+-]+?)/(\w+?)"') + # find out how much work needs to be done + for answer in answers: + numTotal += 1 + link = re.search(regex, answer.content) + if not link: + msg(f'Answer {answer.id} content has no Kaltura link?', logfile) + numInvalid += 1 + continue + entryId = link.group(2) + if not entryId: + msg(f'Answer {answer.id} Kaltura link has no entry ID', logfile) + numInvalid += 1 + elif entryId in oldToNewEntryIds: + msg(f"Migration needed on answer {answer.id}: Entry {entryId}", logfile) + needMigrationAnswers.append(answer) + elif entryId in newToOldEntryIds: + # this is always 0, since answers with a file_id won't show up in + # the query again + msg(f"Already migrated answer {answer.id}: Entry {entryId}", logfile) + numAlreadyMigrated += 1 + else: + # didn't find a mapping, perhaps missing from migration? + msg(f'No mapping for answer {answer.id}: Entry {entryId}', logfile) + numNoMapping += 1 + # summarize what needs to be done + summarize(len(needMigrationAnswers), numInvalid, numAlreadyMigrated, + numNoMapping, numTotal, logfile) + if dryRun: + msg(f'*** Dry run completed, no changes were made ***', logfile) + else: + msg(f'Starting database session', logfile) + migrateAnswerLinks(needMigrationAnswers, oldToNewEntryIds, logfile) + msg(f'Committing to database', logfile) + db.session.commit() + logfile.close() + + @manager.command def migrate(mappingCsv, noHeader=False, dryRun=False): ts = datetime.now().isoformat(timespec='seconds')