Skip to content

Commit

Permalink
feat: purge name from certificate records during user retirement
Browse files Browse the repository at this point in the history
[APER-3241]

This PR updates the retirement pipeline to purge learners' names from certificate records when their account is being retired.

It also introduces a new management command that can be used by Open edX operators to purge the leftover name data (PII data) from the `certificates_generatedcertificate` table. This is designed as a one-time use data fixup, as the retirement functionality should clean this up now.
  • Loading branch information
justinhynes committed May 16, 2024
1 parent 73d3995 commit fd805e0
Show file tree
Hide file tree
Showing 9 changed files with 501 additions and 71 deletions.
18 changes: 18 additions & 0 deletions lms/djangoapps/certificates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -953,3 +953,21 @@ def invalidate_certificate(user_id, course_key_or_id, source):
return False

return True


def clear_pii_from_certificate_records_for_user(user):
"""
Utility function to remove PII from certificate records when a learner's account is being retired. Used by the
`AccountRetirementView` in the `user_api` Django app (invoked by the /api/user/v1/accounts/retire endpoint).
The update is performed using a bulk SQL update via the Django ORM. This will not trigger the GeneratedCertificate
model's custom `save()` function, nor fire any Django signals (which is desired at the time of writing). There is
nothing to update in our external systems by this update.
Args:
user (User): The User instance of the learner actively being retired.
Returns:
None
"""
GeneratedCertificate.objects.filter(user=user).update(name="")
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
A management command, designed to be run once by Open edX Operators, to obfuscate learner PII from the
`Certificates_GeneratedCertificate` table that should have been purged during learner retirement.
A fix has been included in the retirement pipeline to properly purge this data during learner retirement. This can be
used to purge PII from accounts that have already been retired.
"""

import logging

from django.contrib.auth import get_user_model
from django.core.management.base import BaseCommand

from lms.djangoapps.certificates.models import GeneratedCertificate
from openedx.core.djangoapps.user_api.api import get_retired_user_ids

User = get_user_model()
log = logging.getLogger(__name__)


class Command(BaseCommand):
"""
This management command performs a bulk update on `GeneratedCertificate` instances. This means that it will not
invoke the custom save() function defined as part of the `GeneratedCertificate` model, and thus will not emit any
Django signals throughout the system after the update occurs. This is desired behavior. We are using this
management command to purge remnant PII, retired elsewhere in the system, that should have already been removed
from the Certificates tables. We don't need updates to propogate to external systems (like the Credentials IDA).
This management command functions by requesting a list of learners' user_ids whom have completed their journey
through the retirement pipeline. The `get_retired_user_ids` utility function is responsible for filtering out any
learners in the PENDING state, as they could still submit a request to cancel their account deletion request (and
we don't want to remove any data that may still be good).
Example usage:
# Dry Run (preview changes):
$ ./manage.py lms purge_pii_from_generatedcertificates --dry-run
# Purge data:
$ ./manage.py lms purge_pii_from_generatedcertificates
"""

help = """
Purges learners' full names from the `Certificates_GeneratedCertificate` table if their account has been
successfully retired.
"""

def add_arguments(self, parser):
parser.add_argument(
"--dry-run",
action="store_true",
help="Shows a preview of what users would be affected by running this management command.",
)

def handle(self, *args, **options):
retired_user_ids = get_retired_user_ids()
if not options["dry_run"]:
log.warning(
f"Purging `name` from the certificate records of the following users: {retired_user_ids}"
)
GeneratedCertificate.objects.filter(user_id__in=retired_user_ids).update(name="")
else:
log.info(
"DRY RUN: running this management command would purge `name` data from the following users: "
f"{retired_user_ids}"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""
Tests for the `purge_pii_from_generatedcertificates` management command.
"""


from django.core.management import call_command
from testfixtures import LogCapture

from common.djangoapps.student.tests.factories import UserFactory
from lms.djangoapps.certificates.data import CertificateStatuses
from lms.djangoapps.certificates.models import GeneratedCertificate
from lms.djangoapps.certificates.tests.factories import GeneratedCertificateFactory
from openedx.core.djangoapps.user_api.models import RetirementState
from openedx.core.djangoapps.user_api.tests.factories import (
RetirementStateFactory,
UserRetirementRequestFactory,
UserRetirementStatusFactory,
)
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
from xmodule.modulestore.tests.factories import CourseFactory


class PurgePiiFromCertificatesTests(ModuleStoreTestCase):
"""
Tests for the `purge_pii_from_generatedcertificates` management command.
"""
@classmethod
def setUpClass(cls):
"""
The retirement pipeline is not fully enabled by default. In order to properly test the management command, we
must ensure that at least one of the required RetirementState states (`COMPLETE`) exists.
"""
super().setUpClass()
cls.complete = RetirementStateFactory(state_name="COMPLETE")

@classmethod
def tearDownClass(cls):
# Remove any retirement state objects that we created during this test suite run. We don't want to poison other
# test suites.
RetirementState.objects.all().delete()
super().tearDownClass()

def setUp(self):
super().setUp()
self.course_run = CourseFactory()
# create an "active" learner that is not associated with any retirement requests, used to verify that the
# management command doesn't purge any info for active users.
self.user_active = UserFactory()
self.user_active_name = "Teysa Karlov"
GeneratedCertificateFactory(
status=CertificateStatuses.downloadable,
course_id=self.course_run.id,
user=self.user_active,
name=self.user_active_name,
grade=1.00,
)
# create a second learner that is associated with a retirement request, used to verify that the management
# command purges info successfully from a GeneratedCertificate instance associated with a retired learner
self.user_retired = UserFactory()
self.user_retired_name = "Nicol Bolas"
GeneratedCertificateFactory(
status=CertificateStatuses.downloadable,
course_id=self.course_run.id,
user=self.user_retired,
name=self.user_retired_name,
grade=0.99,
)
UserRetirementStatusFactory(
user=self.user_retired,
current_state=self.complete,
last_state=self.complete,
)
UserRetirementRequestFactory(user=self.user_retired)

def test_management_command(self):
"""
Verify the management command purges expected data from a GeneratedCertificate instance if a learner has
successfully had their account retired.
"""
cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active)
assert cert_for_active_user.name == self.user_active_name
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == self.user_retired_name

call_command("purge_pii_from_generatedcertificates")

cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active)
assert cert_for_active_user.name == self.user_active_name
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == ""

def test_management_command_dry_run(self):
"""
Verify that the management command does not purge any data when invoked with the `--dry-run` flag
"""
expected_log_msg = (
"DRY RUN: running this management command would purge `name` data from the following users: "
f"[{self.user_retired.id}]"
)

cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active)
assert cert_for_active_user.name == self.user_active_name
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == self.user_retired_name

with LogCapture() as logger:
call_command("purge_pii_from_generatedcertificates", "--dry-run")

cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active)
assert cert_for_active_user.name == self.user_active_name
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == self.user_retired_name

assert logger.records[0].msg == expected_log_msg
47 changes: 43 additions & 4 deletions lms/djangoapps/certificates/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
can_show_certificate_message,
certificate_status_for_student,
certificate_downloadable_status,
clear_pii_from_certificate_records_for_user,
create_certificate_invalidation_entry,
create_or_update_certificate_allowlist_entry,
display_date_for_certificate,
Expand Down Expand Up @@ -76,6 +77,9 @@
from openedx.core.djangoapps.site_configuration.tests.test_util import with_site_configuration

CAN_GENERATE_METHOD = 'lms.djangoapps.certificates.generation_handler._can_generate_regular_certificate'
BETA_TESTER_METHOD = 'lms.djangoapps.certificates.api.access.is_beta_tester'
CERTS_VIEWABLE_METHOD = 'lms.djangoapps.certificates.api.certificates_viewable_for_course'
PASSED_OR_ALLOWLISTED_METHOD = 'lms.djangoapps.certificates.api._has_passed_or_is_allowlisted'
FEATURES_WITH_CERTS_ENABLED = settings.FEATURES.copy()
FEATURES_WITH_CERTS_ENABLED['CERTIFICATES_HTML_VIEW'] = True

Expand Down Expand Up @@ -1120,10 +1124,6 @@ def test_get_certificate_invalidation_entry_dne(self):
for index, message in enumerate(expected_messages):
assert message in log.records[index].getMessage()

BETA_TESTER_METHOD = 'lms.djangoapps.certificates.api.access.is_beta_tester'
CERTS_VIEWABLE_METHOD = 'lms.djangoapps.certificates.api.certificates_viewable_for_course'
PASSED_OR_ALLOWLISTED_METHOD = 'lms.djangoapps.certificates.api._has_passed_or_is_allowlisted'


class MockGeneratedCertificate:
"""
Expand Down Expand Up @@ -1268,3 +1268,42 @@ def test_beta_tester(self):

with patch(BETA_TESTER_METHOD, return_value=True):
assert not can_show_certificate_message(self.course, self.user, grade, certs_enabled)


class CertificatesLearnerRetirementFunctionality(ModuleStoreTestCase):
"""
API tests for utility functions used as part of the learner retirement pipeline to remove PII from certificate
records.
"""
def setUp(self):
super().setUp()
self.user = UserFactory()
self.user_full_name = "Maeby Funke"
self.course1 = CourseOverviewFactory()
self.course2 = CourseOverviewFactory()
GeneratedCertificateFactory(
course_id=self.course1.id,
name=self.user_full_name,
user=self.user,
)
GeneratedCertificateFactory(
course_id=self.course2.id,
name=self.user_full_name,
user=self.user,
)

def test_clear_pii_from_certificate_records(self):
"""
Unit test for the `clear_pii_from_certificate_records` utility function, used to wipe PII from certificate
records when a learner's account is being retired.
"""
cert_course1 = GeneratedCertificate.objects.get(user=self.user, course_id=self.course1.id)
cert_course2 = GeneratedCertificate.objects.get(user=self.user, course_id=self.course2.id)
assert cert_course1.name == self.user_full_name
assert cert_course2.name == self.user_full_name

clear_pii_from_certificate_records_for_user(self.user)
cert_course1 = GeneratedCertificate.objects.get(user=self.user, course_id=self.course1.id)
cert_course2 = GeneratedCertificate.objects.get(user=self.user, course_id=self.course2.id)
assert cert_course1.name == ""
assert cert_course2.name == ""
Loading

0 comments on commit fd805e0

Please sign in to comment.