From 352c1745e023ef656147d5166a91babdb6256cfe Mon Sep 17 00:00:00 2001 From: ryanlovett Date: Fri, 3 May 2019 16:15:47 -0700 Subject: [PATCH] Initial commit. --- Dockerfile | 19 ++++++++ Makefile | 9 ++++ README.md | 80 ++++++++++++++++++++++++++++++ course-emails.py | 111 ++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 ++ save-course-emails.sh | 42 ++++++++++++++++ 6 files changed, 264 insertions(+) create mode 100644 Dockerfile create mode 100644 Makefile create mode 100644 README.md create mode 100755 course-emails.py create mode 100644 requirements.txt create mode 100755 save-course-emails.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..64c7f48 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:19.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git \ + python3 \ + python3-dev \ + python3-pip \ + python3-setuptools \ + python3-wheel \ + jq + +ADD requirements.txt /tmp/requirements.txt +RUN pip3 install --no-cache-dir -r /tmp/requirements.txt + +ADD course-emails.py /usr/local/bin/ +ADD save-course-emails.sh /usr/local/bin/ + +CMD ["/usr/local/bin/save-course-emails.sh"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..217e682 --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +IMAGE_SPEC = berkeleydsep/fetch-course-emails +VERSION = $(shell git rev-parse --short HEAD) + + +build: + docker build -t $(IMAGE_SPEC):$(VERSION) . + +push: + docker push $(IMAGE_SPEC):$(VERSION) diff --git a/README.md b/README.md new file mode 100644 index 0000000..b927ad3 --- /dev/null +++ b/README.md @@ -0,0 +1,80 @@ +fetch-course-emails +------------------- + +This container fetches the campus email addresses of students and instructors +in specified UCB courses. It runs as a sidecar container alongside hubs +provisioned by berkeley-dsep-infra/datahub. + +Configuration +============= + +Container +========= + +This container is run as a sidecar by specifying it under hub.extraContainers. +Provide API credentials as environment variables in encrypted configuration (i.e. secrets): +``` +jupyterhub: + hub: + extraContainers: + - name: fetch-course-emails + image: berkeleydsep/fetch-course-emails:v3 + volumeMounts: + # for writing out email lists ; consider new volume + - name: hub-db-dir + mountPath: /srv/jupyterhub + # for reading in profiles + - name: config + mountPath: /etc/jupyterhub/config + env: + - name: UCB_HR_ID + value: "..." + - name: UCB_HR_KEY + value: "..." + - name: SIS_CLASSES_ID + value: "..." + - name: SIS_CLASSES_KEY + value: "..." + - name: SIS_ENROLLMENTS_ID + value: "..." + - name: SIS_ENROLLMENTS_KEY + value: "..." + - name: SIS_TERMS_ID + value: "..." + - name: SIS_TERMS_KEY + value: "..." +``` + +Profiles +======== +Courses are specified as keys of the form {year}-{term}-{class_section_id} in +the helm config. For example: + +``` +# current z2jh +jupyterhub: + hub: + extraConfigMap: + profiles: + 2019-spring-25622: + mem_limit: 4096M + mem_guarantee: 2048M + +# newer z2jh +custom: + profiles: + 2019-spring-25622: + mem_limit: 4096M + mem_guarantee: 2048M +``` + +See https://classes.berkeley.edu for class section IDs. + +Output +====== +The container saves email addresses into files: +``` +/srv/jupyterhub/profiles.d/{year}-{term}-{class_section_id}-students.txt +/srv/jupyterhub/profiles.d/{year}-{term}-{class_section_id}-instructors.txt +``` +which are read by the custom spawner. diff --git a/course-emails.py b/course-emails.py new file mode 100755 index 0000000..e23ce6c --- /dev/null +++ b/course-emails.py @@ -0,0 +1,111 @@ +#!/usr/bin/python3.7 + +import argparse +import asyncio +import logging +import os +import sys + +from sis import classes, enrollments, terms +from ucbhr import info as ucbhr_info + +def parse_course(course): + '''Parse {year}-{semester}-{class_number}.''' + year, semester, class_number = course.split('-', 3) + # type check + year = int(year) ; class_number = int(class_number) + # validate semester + semesters = ['summer', 'spring', 'fall'] + semester = semester.lower() + assert semester in semesters, f"{semester} not one of {semesters}" + return year, semester, class_number + +async def instructor_emails(term_id, class_number): + '''Return the business emails of instructors for courses matching + {term_id} and {class_number.''' + # get the instructors of our course. sis only has their uids, not emails. + uids = await classes.get_instructors( + SIS_CLASSES_ID, SIS_CLASSES_KEY, + term_id, class_number, False, 'campus-uid' + ) + + # ask hr for the emails + emails = [] + for uid in uids: + # get all emails + items = await ucbhr_info.get(UCB_HR_ID, UCB_HR_KEY, uid, 'campus-uid') + # extract the business (berkeley.edu) addresses + emails += ucbhr_info.emails(items, 'BUSN') + return emails + +async def student_emails(term_id, class_number): + '''Return the campus emails of students in courses matching + {term_id} and {class_number.''' + # get the section data for the specified course + section = await classes.get_section_by_id( + SIS_CLASSES_ID, SIS_CLASSES_KEY, term_id, class_number + ) + + # isolate the subject area and catalog number, e.g. STAT C8 + subject_area = enrollments.section_subject_area(section) + catalog_number = enrollments.section_catalog_number(section) + + # get enrollments in matching sections for the term id, subject, and number + student_enrollments = await enrollments.get_enrollments( + SIS_ENROLLMENTS_ID, SIS_ENROLLMENTS_KEY, + term_id, subject_area, catalog_number + ) + + # extract the student email addresses + return enrollments.get_enrollment_emails(student_enrollments) + +async def main(): + logging.basicConfig(stream=sys.stdout) + logger = logging.getLogger() + logger.setLevel(logging.ERROR) + + # check for creds in environment and set them as global vars + required_env_vars = [ + 'SIS_CLASSES_ID', 'SIS_CLASSES_KEY', + 'SIS_ENROLLMENTS_ID', 'SIS_ENROLLMENTS_KEY', + 'SIS_TERMS_ID', 'SIS_TERMS_KEY', + 'UCB_HR_ID', 'UCB_HR_KEY', + ] + for v in required_env_vars: + assert v in os.environ, f"{v} not defined in environment." + globals()[v] = os.environ[v] + + # arg parsing + parser = argparse.ArgumentParser( + description="Get UCB course enrollee and instructor email addresses.") + parser.add_argument('-d', dest='debug', action='store_true', + help='set debug log level') + parser.add_argument('course', metavar='year-semester-classnum', + help='e.g. "2019-summer-12345"') + parser.add_argument('constituents', choices=['students', 'instructors'], + help='constituents') + args = parser.parse_args() + + if args.debug: logger.setLevel(logging.DEBUG) + + logger.debug(f"course: {args.course}") + logger.debug(f"constituents: {args.constituents}") + + year, semester, class_number = parse_course(args.course) + + # fetch the SIS term id, e.g. 2195 + term_id = await terms.get_term_id_from_year_sem( + SIS_TERMS_ID, SIS_TERMS_KEY, year, semester + ) + term_id = int(term_id) + logger.debug(f"{term_id} {class_number}") + + if args.constituents == 'students': + emails = await student_emails(term_id, class_number) + elif args.constituents == 'instructors': + emails = await instructor_emails(term_id, class_number) + + for email in emails: print(email) + +# main +asyncio.run(main()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c80a504 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +aiohttp==3.5.4 +git+https://github.com/ryanlovett/sis-cli.git@0e2295e +git+https://github.com/ryanlovett/ucbhr.git@48975fb diff --git a/save-course-emails.sh b/save-course-emails.sh new file mode 100755 index 0000000..b50a728 --- /dev/null +++ b/save-course-emails.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# set -e + +# profiles defined in our helm config +custom_profiles="/etc/jupyterhub/config/custom.profiles" + +# output location +profile_dir="/srv/jupyterhub/profiles.d" + +course_emails="/usr/local/bin/course-emails.py" + +# space 24 hours apart; TODO: guessing k8s can loop this for us somehow +sleep_time=86400 + +while true ; do + + if [ ! -f $custom_profiles ]; then + echo "No such file: $custom_profiles" + else + + if [ ! -d $profile_dir ]; then mkdir -p $profile_dir ; fi + + profiles=`jq -r 'keys[]' ${custom_profiles}` + echo profiles: ${profiles} + + # write out email lists for each profile + for profile in ${profiles} ; do + for people in students instructors ; do + filename="${profile_dir}/${profile}-${people}.txt" + # write to tempfile because gathering addresses takes time + # and we don't want the hub to read an abbreviated list + outfile=`mktemp` + echo $profile $people $outfile + $course_emails $profile $people > $outfile + if [ -f $outfile ]; then mv $outfile $filename ; fi + done + done + fi + + sleep $sleep_time +done