Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Associate PublishableEntities with Collections [FC-0062] #216

Merged
merged 9 commits into from
Sep 3, 2024
2 changes: 1 addition & 1 deletion openedx_learning/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
Open edX Learning ("Learning Core").
"""
__version__ = "0.11.2"
__version__ = "0.11.3"
93 changes: 82 additions & 11 deletions openedx_learning/apps/authoring/collections/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@
"""
from __future__ import annotations

from datetime import datetime, timezone

from django.core.exceptions import ValidationError
from django.db.models import QuerySet

from ..publishing import api as publishing_api
from ..publishing.models import PublishableEntity
from .models import Collection

# The public API that will be re-exported by openedx_learning.apps.authoring.api
Expand All @@ -13,10 +18,12 @@
# start with an underscore AND it is not in __all__, that function is considered
# to be callable only by other apps in the authoring package.
__all__ = [
"add_to_collection",
"create_collection",
"get_collection",
"get_collections",
"get_learning_package_collections",
"get_entity_collections",
"remove_from_collection",
"update_collection",
]

Expand All @@ -26,6 +33,7 @@ def create_collection(
title: str,
created_by: int | None,
description: str = "",
enabled: bool = True,
) -> Collection:
"""
Create a new Collection
Expand All @@ -35,6 +43,7 @@ def create_collection(
title=title,
created_by_id=created_by,
description=description,
enabled=enabled,
)
return collection

Expand Down Expand Up @@ -70,23 +79,85 @@ def update_collection(
return collection


def get_learning_package_collections(learning_package_id: int) -> QuerySet[Collection]:
def add_to_collection(
collection_id: int,
entities_qset: QuerySet[PublishableEntity],
created_by: int | None = None,
) -> Collection:
"""
Get all collections for a given learning package
Adds a QuerySet of PublishableEntities to a Collection.

These Entities must belong to the same LearningPackage as the Collection, or a ValidationError will be raised.

PublishableEntities already in the Collection are silently ignored.

The Collection object's modified date is updated.

Returns the updated Collection object.
"""
collection = get_collection(collection_id)
learning_package_id = collection.learning_package_id

# Disallow adding entities outside the collection's learning package
for entity in entities_qset.all():
if entity.learning_package_id != learning_package_id:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Nit (optional)]: You can also push this to the database layer with something like:

invalid_entity = entities_qset.filter(~Q(learning_package_id=learning_package_id)).first()
if invalid_entity:
    # ... validation error with specifics

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's much cleaner, thank you, done with e03daaa

raise ValidationError(
f"Cannot add entity {entity.pk} in learning package {entity.learning_package_id} to "
f"collection {collection_id} in learning package {learning_package_id}."
)

collection.entities.add(
*entities_qset.all(),
through_defaults={"created_by_id": created_by},
)
collection.modified = datetime.now(tz=timezone.utc)
collection.save()

return collection


def remove_from_collection(
collection_id: int,
entities_qset: QuerySet[PublishableEntity],
) -> Collection:
"""
Removes a QuerySet of PublishableEntities from a Collection.

PublishableEntities are deleted (in bulk).

The Collection's modified date is updated (even if nothing was removed).

Returns the updated Collection.
"""
collection = get_collection(collection_id)

collection.entities.remove(*entities_qset.all())
collection.modified = datetime.now(tz=timezone.utc)
collection.save()

Only enabled collections are returned
return collection


def get_entity_collections(learning_package_id: int, entity_key: str) -> QuerySet[Collection]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[speculation (please do not block PR for this)]: It might be nice if we could a custom Manager on the through-model so that if someone has an entity, they could do something like entity.collections.active().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh absolutely -- I am interested to see the other use cases for Collections besides what we're supporting here. But will leave that for a day when it's needed, thank you :)

"""
Get all collections in the given learning package which contain this entity.

Only enabled collections are returned.
"""
return Collection.objects \
.filter(learning_package_id=learning_package_id, enabled=True) \
.select_related("learning_package") \
.order_by('pk')
entity = publishing_api.get_publishable_entity_by_key(
learning_package_id,
key=entity_key,
)
return entity.collections.filter(enabled=True).order_by("pk")


def get_collections(enabled: bool | None = None) -> QuerySet[Collection]:
def get_collections(learning_package_id: int, enabled: bool | None = True) -> QuerySet[Collection]:
"""
Get all collections, optionally caller can filter by enabled flag
Get all collections for a given learning package

Enabled collections are returned by default.
"""
qs = Collection.objects.all()
qs = Collection.objects.filter(learning_package_id=learning_package_id)
if enabled is not None:
qs = qs.filter(enabled=enabled)
return qs.select_related("learning_package").order_by('pk')
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 4.2.15 on 2024-08-29 00:05

import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models

import openedx_learning.lib.validators


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('oel_publishing', '0002_alter_learningpackage_key_and_more'),
('oel_collections', '0002_remove_collection_name_collection_created_by_and_more'),
]

operations = [
migrations.CreateModel(
name='CollectionPublishableEntity',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', models.DateTimeField(auto_now_add=True, validators=[openedx_learning.lib.validators.validate_utc_datetime])),
('collection', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collection')),
('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)),
('entity', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.publishableentity')),
],
),
migrations.AddField(
model_name='collection',
name='entities',
field=models.ManyToManyField(related_name='collections', through='oel_collections.CollectionPublishableEntity', to='oel_publishing.publishableentity'),
),
migrations.AddConstraint(
model_name='collectionpublishableentity',
constraint=models.UniqueConstraint(fields=('collection', 'entity'), name='oel_collections_cpe_uniq_col_ent'),
),
]
111 changes: 110 additions & 1 deletion openedx_learning/apps/authoring/collections/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,68 @@
"""
Core models for Collections

TLDR Guidelines:
1. DO NOT modify these models to store full version snapshots.
2. DO NOT use these models to try to reconstruct historical versions of
Collections for fast querying.

If you're trying to do either of these things, you probably want a new model or
app. For more details, read on.
Comment on lines +9 to +10
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This adds a lot of context!! Thank you! ❤️

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh definitely.. @ormsbee 's notes are always worth preserving :)


The goal of these models is to provide a lightweight method of organizing
PublishableEntities. The first use case for this is modeling the structure of a
v1 Content Library within a LearningPackage. This is what we'll use the
Collection model for.

An important thing to note here is that Collections are *NOT* publishable
entities themselves. They have no "Draft" or "Published" versions. Collections
are never "published", though the things inside of them are.

When a LibraryContentBlock makes use of a Content Library, it copies all of
the items it will use into the Course itself. It will also store a version
on the LibraryContentBlock -- this is a MongoDB ObjectID in v1 and an integer in
v2 Libraries. Later on, the LibraryContentBlock will want to check back to see
if any updates have been made, using its version as a key. If a new version
exists, the course team has the option of re-copying data from the Library.

ModuleStore based v1 Libraries and Blockstore-based v2 libraries both version
the entire library in a series of snapshots. This makes it difficult to have
very large libraries, which is an explicit goal for Modular Learning. In
Learning Core, we've moved to tracking the versions of individual Components to
address this issue. But that means we no longer have a single version indicator
for "has anything here changed"?

We *could* have put that version in the ``publishing`` app's PublishLog, but
that would make it too broad. We want the ability to eventually collapse many v1
Libraries into a single Learning Core backed v2 Library. If we tracked the
versioning in only a central location, then we'd have many false positives where
the version was bumped because something else in the Learning Package changed.
So instead, we're creating a new Collection model inside the LearningPackage to
track that concept.

A critical takeaway is that we don't have to store snapshots of every version of
a Collection, because that data has been copied over by the LibraryContentBlock.
We only need to store the current state of the Collection, and increment the
version numbers when changes happen. This will allow the LibraryContentBlock to
check in and re-copy over the latest version if the course team desires.

That's why these models only store the current state of a Collection. Unlike the
``components`` app, ``collections`` does not store fully materialized snapshots
of past versions. This is done intentionally in order to save space and reduce
the cost of writes. Collections may grow to be very large, and we don't want to
be writing N rows with every version, where N is the number of
PublishableEntities in a Collection.

MVP of these models does not store changesets, but we can add this when there's a
use case for it. The number of rows in these changesets would grow in proportion
to the number of things that are actually changing (instead of copying over
everything on every version). This is could be used to make it easier to figure out
what changed between two given versions of a Collection. A LibraryContentBlock
in a course would have stored the version number of the last time it copied data
from the Collection, and we can eventually surface this data to the user. Note that
while it may be possible to reconstruct past versions of Collections based off of
this changeset data, it's going to be a very slow process to do so, and it is
strongly discouraged.
"""
from __future__ import annotations

Expand All @@ -9,10 +72,11 @@

from ....lib.fields import MultiCollationTextField, case_insensitive_char_field
from ....lib.validators import validate_utc_datetime
from ..publishing.models import LearningPackage
from ..publishing.models import LearningPackage, PublishableEntity

__all__ = [
"Collection",
"CollectionPublishableEntity",
]


Expand Down Expand Up @@ -79,6 +143,12 @@ class Collection(models.Model):
],
)

entities: models.ManyToManyField[PublishableEntity, "CollectionPublishableEntity"] = models.ManyToManyField(
PublishableEntity,
through="CollectionPublishableEntity",
related_name="collections",
)

class Meta:
verbose_name_plural = "Collections"
indexes = [
Expand All @@ -96,3 +166,42 @@ def __str__(self) -> str:
User-facing string representation of a Collection.
"""
return f"<{self.__class__.__name__}> ({self.id}:{self.title})"


class CollectionPublishableEntity(models.Model):
"""
Collection -> PublishableEntity association.
"""
collection = models.ForeignKey(
Collection,
on_delete=models.CASCADE,
)
entity = models.ForeignKey(
PublishableEntity,
on_delete=models.RESTRICT,
)
created_by = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.SET_NULL,
null=True,
blank=True,
)
created = models.DateTimeField(
auto_now_add=True,
validators=[
validate_utc_datetime,
],
)

pomegranited marked this conversation as resolved.
Show resolved Hide resolved
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
class Meta:
constraints = [
# Prevent race conditions from making multiple rows associating the
# same Collection to the same Entity.
models.UniqueConstraint(
fields=[
"collection",
"entity",
],
name="oel_collections_cpe_uniq_col_ent",
)
]
9 changes: 9 additions & 0 deletions openedx_learning/apps/authoring/collections/readme.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,12 @@ had to create many small libraries.

For the Libraries Relaunch ("v2"), we want to encourage people to create large libraries with lots of content,
and organize that content using tags and collections.

Architecture Guidelines
-----------------------

Things to remember:

* Collections may grow very large.
* Collections are not publishable in and of themselves.
* Collections link to PublishableEntity records, **not** PublishableEntityVersion records.
Loading