Skip to content
This repository has been archived by the owner on Dec 17, 2024. It is now read-only.

Commit

Permalink
Initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanlovett committed May 3, 2019
0 parents commit 352c174
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 0 deletions.
19 changes: 19 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM ubuntu:19.04

RUN apt-get update && \
apt-get install -y --no-install-recommends \
git \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
python3-wheel \
jq

ADD requirements.txt /tmp/requirements.txt
RUN pip3 install --no-cache-dir -r /tmp/requirements.txt

ADD course-emails.py /usr/local/bin/
ADD save-course-emails.sh /usr/local/bin/

CMD ["/usr/local/bin/save-course-emails.sh"]
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
IMAGE_SPEC = berkeleydsep/fetch-course-emails
VERSION = $(shell git rev-parse --short HEAD)


build:
docker build -t $(IMAGE_SPEC):$(VERSION) .

push:
docker push $(IMAGE_SPEC):$(VERSION)
80 changes: 80 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
fetch-course-emails
-------------------

This container fetches the campus email addresses of students and instructors
in specified UCB courses. It runs as a sidecar container alongside hubs
provisioned by berkeley-dsep-infra/datahub.

Configuration
=============

Container
=========

This container is run as a sidecar by specifying it under hub.extraContainers.
Provide API credentials as environment variables in encrypted configuration (i.e. secrets):
```
jupyterhub:
hub:
extraContainers:
- name: fetch-course-emails
image: berkeleydsep/fetch-course-emails:v3
volumeMounts:
# for writing out email lists ; consider new volume
- name: hub-db-dir
mountPath: /srv/jupyterhub
# for reading in profiles
- name: config
mountPath: /etc/jupyterhub/config
env:
- name: UCB_HR_ID
value: "..."
- name: UCB_HR_KEY
value: "..."
- name: SIS_CLASSES_ID
value: "..."
- name: SIS_CLASSES_KEY
value: "..."
- name: SIS_ENROLLMENTS_ID
value: "..."
- name: SIS_ENROLLMENTS_KEY
value: "..."
- name: SIS_TERMS_ID
value: "..."
- name: SIS_TERMS_KEY
value: "..."
```

Profiles
========
Courses are specified as keys of the form {year}-{term}-{class_section_id} in
the helm config. For example:

```
# current z2jh
jupyterhub:
hub:
extraConfigMap:
profiles:
2019-spring-25622:
mem_limit: 4096M
mem_guarantee: 2048M
# newer z2jh
custom:
profiles:
2019-spring-25622:
mem_limit: 4096M
mem_guarantee: 2048M
```

See https://classes.berkeley.edu for class section IDs.

Output
======
The container saves email addresses into files:
```
/srv/jupyterhub/profiles.d/{year}-{term}-{class_section_id}-students.txt
/srv/jupyterhub/profiles.d/{year}-{term}-{class_section_id}-instructors.txt
```
which are read by the custom spawner.
111 changes: 111 additions & 0 deletions course-emails.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/python3.7

import argparse
import asyncio
import logging
import os
import sys

from sis import classes, enrollments, terms
from ucbhr import info as ucbhr_info

def parse_course(course):
'''Parse {year}-{semester}-{class_number}.'''
year, semester, class_number = course.split('-', 3)
# type check
year = int(year) ; class_number = int(class_number)
# validate semester
semesters = ['summer', 'spring', 'fall']
semester = semester.lower()
assert semester in semesters, f"{semester} not one of {semesters}"
return year, semester, class_number

async def instructor_emails(term_id, class_number):
'''Return the business emails of instructors for courses matching
{term_id} and {class_number.'''
# get the instructors of our course. sis only has their uids, not emails.
uids = await classes.get_instructors(
SIS_CLASSES_ID, SIS_CLASSES_KEY,
term_id, class_number, False, 'campus-uid'
)

# ask hr for the emails
emails = []
for uid in uids:
# get all emails
items = await ucbhr_info.get(UCB_HR_ID, UCB_HR_KEY, uid, 'campus-uid')
# extract the business (berkeley.edu) addresses
emails += ucbhr_info.emails(items, 'BUSN')
return emails

async def student_emails(term_id, class_number):
'''Return the campus emails of students in courses matching
{term_id} and {class_number.'''
# get the section data for the specified course
section = await classes.get_section_by_id(
SIS_CLASSES_ID, SIS_CLASSES_KEY, term_id, class_number
)

# isolate the subject area and catalog number, e.g. STAT C8
subject_area = enrollments.section_subject_area(section)
catalog_number = enrollments.section_catalog_number(section)

# get enrollments in matching sections for the term id, subject, and number
student_enrollments = await enrollments.get_enrollments(
SIS_ENROLLMENTS_ID, SIS_ENROLLMENTS_KEY,
term_id, subject_area, catalog_number
)

# extract the student email addresses
return enrollments.get_enrollment_emails(student_enrollments)

async def main():
logging.basicConfig(stream=sys.stdout)
logger = logging.getLogger()
logger.setLevel(logging.ERROR)

# check for creds in environment and set them as global vars
required_env_vars = [
'SIS_CLASSES_ID', 'SIS_CLASSES_KEY',
'SIS_ENROLLMENTS_ID', 'SIS_ENROLLMENTS_KEY',
'SIS_TERMS_ID', 'SIS_TERMS_KEY',
'UCB_HR_ID', 'UCB_HR_KEY',
]
for v in required_env_vars:
assert v in os.environ, f"{v} not defined in environment."
globals()[v] = os.environ[v]

# arg parsing
parser = argparse.ArgumentParser(
description="Get UCB course enrollee and instructor email addresses.")
parser.add_argument('-d', dest='debug', action='store_true',
help='set debug log level')
parser.add_argument('course', metavar='year-semester-classnum',
help='e.g. "2019-summer-12345"')
parser.add_argument('constituents', choices=['students', 'instructors'],
help='constituents')
args = parser.parse_args()

if args.debug: logger.setLevel(logging.DEBUG)

logger.debug(f"course: {args.course}")
logger.debug(f"constituents: {args.constituents}")

year, semester, class_number = parse_course(args.course)

# fetch the SIS term id, e.g. 2195
term_id = await terms.get_term_id_from_year_sem(
SIS_TERMS_ID, SIS_TERMS_KEY, year, semester
)
term_id = int(term_id)
logger.debug(f"{term_id} {class_number}")

if args.constituents == 'students':
emails = await student_emails(term_id, class_number)
elif args.constituents == 'instructors':
emails = await instructor_emails(term_id, class_number)

for email in emails: print(email)

# main
asyncio.run(main())
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
aiohttp==3.5.4
git+https://github.com/ryanlovett/sis-cli.git@0e2295e
git+https://github.com/ryanlovett/ucbhr.git@48975fb
42 changes: 42 additions & 0 deletions save-course-emails.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash

# set -e

# profiles defined in our helm config
custom_profiles="/etc/jupyterhub/config/custom.profiles"

# output location
profile_dir="/srv/jupyterhub/profiles.d"

course_emails="/usr/local/bin/course-emails.py"

# space 24 hours apart; TODO: guessing k8s can loop this for us somehow
sleep_time=86400

while true ; do

if [ ! -f $custom_profiles ]; then
echo "No such file: $custom_profiles"
else

if [ ! -d $profile_dir ]; then mkdir -p $profile_dir ; fi

profiles=`jq -r 'keys[]' ${custom_profiles}`
echo profiles: ${profiles}

# write out email lists for each profile
for profile in ${profiles} ; do
for people in students instructors ; do
filename="${profile_dir}/${profile}-${people}.txt"
# write to tempfile because gathering addresses takes time
# and we don't want the hub to read an abbreviated list
outfile=`mktemp`
echo $profile $people $outfile
$course_emails $profile $people > $outfile
if [ -f $outfile ]; then mv $outfile $filename ; fi
done
done
fi

sleep $sleep_time
done

0 comments on commit 352c174

Please sign in to comment.