Skip to content

Commit

Permalink
EDIT kaltura migration for links in answer content
Browse files Browse the repository at this point in the history
We have 28 kaltura videos that didn't go through ComPAIR's Kaltura
integration. The only thing we have of them is a link to the video, the
link is in this format:

https://learning.video.ubc.ca/media/<video name>/<video entry ID>

This video is also on a different Kaltura repo, it's not in the ComPAIR
Kaltura repo. As a special edge case for this migration, we're going to
copy these videos into the ComPAIR Kaltura repo. We'll also make it look
like the video was uploaded via ComPAIR, the video will show up as an
attachment like all the other file attachments.

Note that the original learning.video.ubc.ca link will no longer work
after a certain point and we won't be fixing the link. The video will
only be available via the attachment.

The same mapping mechanism as the regular kaltura migrate command is
used. This differs from the regular migrate command in that we need to
create new entries in the 'file' and 'kaltura_media' table. The command
is similar too:

Dry run:
python manage.py kaltura links -d mapping.csv

For real:
python manage.py kaltura links mapping.csv

Log file will be in format:
kaltura-links-migration-log-<iso8601 datetime>.log
  • Loading branch information
ionparticle committed Mar 9, 2023
1 parent 1605e35 commit aaca8fd
Showing 1 changed file with 116 additions and 2 deletions.
118 changes: 116 additions & 2 deletions compair/manage/kaltura.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import csv
from datetime import datetime
import re
from urllib.parse import unquote_plus

from KalturaClient import KalturaClient, KalturaConfiguration
from KalturaClient.Plugins.Core import (KalturaSessionType, KalturaMediaEntry,
Expand Down Expand Up @@ -86,8 +87,7 @@ def deleteInvalidKalturaMedias(medias, logfile):
db.session.delete(media)


def migrateKalturaMedias(medias, oldToNewEntryIds, logfile):
# connect to the Kaltura API
def connectKalturaApi():
kClient = KalturaClient(KalturaConfiguration(
serviceUrl=KalturaCore.service_url()))
kSession = kClient.session.start(
Expand All @@ -99,6 +99,11 @@ def migrateKalturaMedias(medias, oldToNewEntryIds, logfile):
"appID:compair"
)
kClient.setKs(kSession)
return kClient


def migrateKalturaMedias(medias, oldToNewEntryIds, logfile):
kClient = connectKalturaApi()

for media in medias:
mediaId = media.id
Expand All @@ -115,6 +120,115 @@ def migrateKalturaMedias(medias, oldToNewEntryIds, logfile):
#db.session.add(media)


# Some videos were linked in answer content, we want to switch them to using
# the file attachment system, so have to create the associated file and
# kaltura_media table entries for them.
#
# Here's a complex query that gets answers that has learning.video.ubc.ca links
# in them and don't have an associated Kaltura attachment. For our data, we can
# get away with just seeing if the answer doesn't have an attached file entry,
# so that's what what was implemented, but this query is documented for ref:
# select answer.id, answer.content, answer.file_id, file.kaltura_media_id from answer left join file on answer.file_id = file.id where answer.content like '%learning.video.ubc.ca%' and (answer.file_id is NULL or file.kaltura_media_id is NULL)
def migrateAnswerLinks(answers, oldToNewEntryIds, logfile):
kClient = connectKalturaApi()
regex = re.compile(r'https://learning.video.ubc.ca/media/([%\w+-]+?)/(\w+?)"')
count = 0
for answer in answers:
count += 1
msg(f'Answer {count}: {answer.id}', logfile)
link = re.search(regex, answer.content)
if not link:
msg(f'Error: Answer {answer.id} content has no Kaltura link?', logfile)
continue
oldEntryId = link.group(2)
newEntryId = oldToNewEntryIds[oldEntryId]
newInfo = kClient.media.get(newEntryId, -1)
videoName = newInfo.getName() + ".mp4"
msg(f' Video Name: {videoName}', logfile)
msg(f' Old Entry ID: {oldEntryId}', logfile)
msg(f' New Entry ID: {newEntryId}', logfile)
msg(f' Creating Kaltura File Entries...', logfile)
kalturaMedia = KalturaMedia(
user=answer.user,
download_url=newInfo.getDownloadUrl(),
# can't figure out how to get the original source extension, so
# just assuming mp4
file_name=videoName,
service_url=KalturaCore.service_url(),
partner_id=newInfo.getPartnerId(),
player_id=KalturaCore.player_id(),
entry_id=newEntryId
)
db.session.add(kalturaMedia)
fileEntry = File(
user=answer.user,
kaltura_media=kalturaMedia,
alias=videoName
)
db.session.add(fileEntry)
answer.file = fileEntry
db.session.commit()
fileEntry.name = fileEntry.uuid + '.' + kalturaMedia.extension
db.session.commit()
msg(f' Kaltura File Entries Created!', logfile)


# Some videos were linked in answer content, we want to switch them to using
# the file attachment system like all other kaltura media
@manager.command
def links(mappingCsv, noHeader=False, dryRun=False):
ts = datetime.now().isoformat(timespec='seconds')
logfile = open(f'kaltura-links-migration-log-{ts}.log', 'a')
msg('Starting Kaltura links migration', logfile)
oldToNewEntryIds = readMappingCsv(mappingCsv, noHeader)
newToOldEntryIds = dict(map(reversed, oldToNewEntryIds.items()))
needMigrationAnswers = []
numInvalid = 0
numAlreadyMigrated = 0
numNoMapping = 0
numTotal = 0
answers = Answer.query \
.filter(Answer.content.ilike('%learning.video.ubc.ca%')) \
.filter(Answer.file_id.is_(None)) \
.all()
regex = re.compile(r'https://learning.video.ubc.ca/media/([%\w+-]+?)/(\w+?)"')
# find out how much work needs to be done
for answer in answers:
numTotal += 1
link = re.search(regex, answer.content)
if not link:
msg(f'Answer {answer.id} content has no Kaltura link?', logfile)
numInvalid += 1
continue
entryId = link.group(2)
if not entryId:
msg(f'Answer {answer.id} Kaltura link has no entry ID', logfile)
numInvalid += 1
elif entryId in oldToNewEntryIds:
msg(f"Migration needed on answer {answer.id}: Entry {entryId}", logfile)
needMigrationAnswers.append(answer)
elif entryId in newToOldEntryIds:
# this is always 0, since answers with a file_id won't show up in
# the query again
msg(f"Already migrated answer {answer.id}: Entry {entryId}", logfile)
numAlreadyMigrated += 1
else:
# didn't find a mapping, perhaps missing from migration?
msg(f'No mapping for answer {answer.id}: Entry {entryId}', logfile)
numNoMapping += 1
# summarize what needs to be done
summarize(len(needMigrationAnswers), numInvalid, numAlreadyMigrated,
numNoMapping, numTotal, logfile)
if dryRun:
msg(f'*** Dry run completed, no changes were made ***', logfile)
else:
msg(f'Starting database session', logfile)
migrateAnswerLinks(needMigrationAnswers, oldToNewEntryIds, logfile)
msg(f'Committing to database', logfile)
db.session.commit()
logfile.close()


@manager.command
def migrate(mappingCsv, noHeader=False, dryRun=False):
ts = datetime.now().isoformat(timespec='seconds')
Expand Down

0 comments on commit aaca8fd

Please sign in to comment.