From 45295909e83299160198b5fe4abb7b14b9e87df7 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 23 Dec 2025 14:11:54 -0500 Subject: [PATCH 1/6] refactor: change Content model to Media --- .../apps/authoring/backup_restore/zipper.py | 4 +-- .../components/{admin.py => _admin.py} | 2 +- .../apps/authoring/components/models.py | 8 ++--- .../apps/authoring/contents/admin.py | 14 ++++----- .../apps/authoring/contents/api.py | 24 +++++++-------- .../migrations/0002_rename_content_media.py | 23 ++++++++++++++ .../contents/migrations/0003_rename_index.py | 27 +++++++++++++++++ .../apps/authoring/contents/models.py | 14 ++++----- .../authoring/backup_restore/test_backup.py | 4 +-- .../apps/authoring/components/test_api.py | 30 +++++++++---------- .../apps/authoring/components/test_assets.py | 6 ++-- 11 files changed, 103 insertions(+), 53 deletions(-) rename openedx_learning/apps/authoring/components/{admin.py => _admin.py} (98%) create mode 100644 openedx_learning/apps/authoring/contents/migrations/0002_rename_content_media.py create mode 100644 openedx_learning/apps/authoring/contents/migrations/0003_rename_index.py diff --git a/openedx_learning/apps/authoring/backup_restore/zipper.py b/openedx_learning/apps/authoring/backup_restore/zipper.py index 27ddcacc3..5dcb4274d 100644 --- a/openedx_learning/apps/authoring/backup_restore/zipper.py +++ b/openedx_learning/apps/authoring/backup_restore/zipper.py @@ -23,7 +23,7 @@ ComponentType, ComponentVersion, ComponentVersionContent, - Content, + Media, LearningPackage, PublishableEntity, PublishableEntityVersion, @@ -377,7 +377,7 @@ def create_zip(self, path: str) -> None: ] = component_version.prefetched_contents # type: ignore[attr-defined] for component_version_content in contents: - content: Content = component_version_content.content + content: Media = component_version_content.content # Important: The component_version_content.key contains implicitly # the file name and the file extension diff --git a/openedx_learning/apps/authoring/components/admin.py b/openedx_learning/apps/authoring/components/_admin.py similarity index 98% rename from openedx_learning/apps/authoring/components/admin.py rename to openedx_learning/apps/authoring/components/_admin.py index 2fa501ddf..02f116603 100644 --- a/openedx_learning/apps/authoring/components/admin.py +++ b/openedx_learning/apps/authoring/components/_admin.py @@ -54,7 +54,7 @@ class ContentInline(admin.TabularInline): """ Django admin configuration for Content """ - model = ComponentVersion.contents.through + model = ComponentVersion.media.through def get_queryset(self, request): queryset = super().get_queryset(request) diff --git a/openedx_learning/apps/authoring/components/models.py b/openedx_learning/apps/authoring/components/models.py index b53077637..344455236 100644 --- a/openedx_learning/apps/authoring/components/models.py +++ b/openedx_learning/apps/authoring/components/models.py @@ -23,7 +23,7 @@ from ....lib.fields import case_sensitive_char_field, key_field from ....lib.managers import WithRelationsManager -from ..contents.models import Content +from ..contents.models import Media from ..publishing.models import LearningPackage, PublishableEntityMixin, PublishableEntityVersionMixin __all__ = [ @@ -210,8 +210,8 @@ class ComponentVersion(PublishableEntityVersionMixin): # The contents hold the actual interesting data associated with this # ComponentVersion. - contents: models.ManyToManyField[Content, ComponentVersionContent] = models.ManyToManyField( - Content, + contents: models.ManyToManyField[Media, ComponentVersionContent] = models.ManyToManyField( + Media, through="ComponentVersionContent", related_name="component_versions", ) @@ -238,7 +238,7 @@ class ComponentVersionContent(models.Model): """ component_version = models.ForeignKey(ComponentVersion, on_delete=models.CASCADE) - content = models.ForeignKey(Content, on_delete=models.RESTRICT) + content = models.ForeignKey(Media, on_delete=models.RESTRICT) # "key" is a reserved word for MySQL, so we're temporarily using the column # name of "_key" to avoid breaking downstream tooling. A possible diff --git a/openedx_learning/apps/authoring/contents/admin.py b/openedx_learning/apps/authoring/contents/admin.py index 029ebfd86..57cf254cb 100644 --- a/openedx_learning/apps/authoring/contents/admin.py +++ b/openedx_learning/apps/authoring/contents/admin.py @@ -8,11 +8,11 @@ from openedx_learning.lib.admin_utils import ReadOnlyModelAdmin -from .models import Content +from .models import Media -@admin.register(Content) -class ContentAdmin(ReadOnlyModelAdmin): +@admin.register(Media) +class MediaAdmin(ReadOnlyModelAdmin): """ Django admin for Content model """ @@ -40,13 +40,13 @@ class ContentAdmin(ReadOnlyModelAdmin): search_fields = ("hash_digest",) @admin.display(description="OS Path") - def os_path(self, content: Content): + def os_path(self, content: Media): return content.os_path() or "" - def path(self, content: Content): + def path(self, content: Media): return content.path if content.has_file else "" - def text_preview(self, content: Content): + def text_preview(self, content: Media): if not content.text: return "" return format_html( @@ -54,7 +54,7 @@ def text_preview(self, content: Content): content.text, ) - def image_preview(self, content: Content): + def image_preview(self, content: Media): """ Return HTML for an image, if that is the underlying Content. diff --git a/openedx_learning/apps/authoring/contents/api.py b/openedx_learning/apps/authoring/contents/api.py index 319935882..b494997b9 100644 --- a/openedx_learning/apps/authoring/contents/api.py +++ b/openedx_learning/apps/authoring/contents/api.py @@ -13,7 +13,7 @@ from django.db.transaction import atomic from ....lib.fields import create_hash_digest -from .models import Content, MediaType +from .models import Media, MediaType # The public API that will be re-exported by openedx_learning.apps.authoring.api # is listed in the __all__ entries below. Internal helper functions that are @@ -66,7 +66,7 @@ def get_or_create_media_type(mime_type: str) -> MediaType: return media_type -def get_content(content_id: int, /) -> Content: +def get_content(content_id: int, /) -> Media: """ Get a single Content object by its ID. @@ -76,7 +76,7 @@ def get_content(content_id: int, /) -> Content: include this function anyway because it's tiny to write and it's better than someone using a get_or_create_* function when they really just want to get. """ - return Content.objects.get(id=content_id) + return Media.objects.get(id=content_id) def get_or_create_text_content( @@ -86,7 +86,7 @@ def get_or_create_text_content( text: str, created: datetime, create_file: bool = False, -) -> Content: +) -> Media: """ Get or create a Content entry with text data stored in the database. @@ -109,13 +109,13 @@ def get_or_create_text_content( with atomic(): try: - content = Content.objects.get( + content = Media.objects.get( learning_package_id=learning_package_id, media_type_id=media_type_id, hash_digest=hash_digest, ) - except Content.DoesNotExist: - content = Content( + except Media.DoesNotExist: + content = Media( learning_package_id=learning_package_id, media_type_id=media_type_id, hash_digest=hash_digest, @@ -139,7 +139,7 @@ def get_or_create_file_content( /, data: bytes, created: datetime, -) -> Content: +) -> Media: """ Get or create a Content with data stored in a file storage backend. @@ -152,13 +152,13 @@ def get_or_create_file_content( hash_digest = create_hash_digest(data) with atomic(): try: - content = Content.objects.get( + content = Media.objects.get( learning_package_id=learning_package_id, media_type_id=media_type_id, hash_digest=hash_digest, ) - except Content.DoesNotExist: - content = Content( + except Media.DoesNotExist: + content = Media( learning_package_id=learning_package_id, media_type_id=media_type_id, hash_digest=hash_digest, @@ -175,7 +175,7 @@ def get_or_create_file_content( return content -def get_content_info_headers(content: Content) -> dict[str, str]: +def get_content_info_headers(content: Media) -> dict[str, str]: """ Return HTTP headers that are specific to this Content. diff --git a/openedx_learning/apps/authoring/contents/migrations/0002_rename_content_media.py b/openedx_learning/apps/authoring/contents/migrations/0002_rename_content_media.py new file mode 100644 index 000000000..809afb276 --- /dev/null +++ b/openedx_learning/apps/authoring/contents/migrations/0002_rename_content_media.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.9 on 2025-12-22 23:49 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_components', '0004_remove_componentversioncontent_uuid'), + ('oel_contents', '0001_initial'), + ('oel_publishing', '0010_backfill_dependencies'), + ] + + operations = [ + migrations.RenameModel( + old_name='Content', + new_name='Media', + ), + migrations.AlterModelOptions( + name='media', + options={'verbose_name': 'Media', 'verbose_name_plural': 'Media'}, + ), + ] diff --git a/openedx_learning/apps/authoring/contents/migrations/0003_rename_index.py b/openedx_learning/apps/authoring/contents/migrations/0003_rename_index.py new file mode 100644 index 000000000..9612843db --- /dev/null +++ b/openedx_learning/apps/authoring/contents/migrations/0003_rename_index.py @@ -0,0 +1,27 @@ +# Generated by Django 5.2.9 on 2025-12-23 19:09 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_contents', '0002_rename_content_media'), + ('oel_publishing', '0010_backfill_dependencies'), + ] + + operations = [ + migrations.RemoveConstraint( + model_name='media', + name='oel_content_uniq_lc_media_type_hash_digest', + ), + migrations.RenameIndex( + model_name='media', + new_name='oel_media_idx_lp_rsize', + old_name='oel_content_idx_lp_rsize', + ), + migrations.AddConstraint( + model_name='media', + constraint=models.UniqueConstraint(fields=('learning_package', 'media_type', 'hash_digest'), name='oel_media_uniq_lc_media_type_hash_digest'), + ), + ] diff --git a/openedx_learning/apps/authoring/contents/models.py b/openedx_learning/apps/authoring/contents/models.py index c087c122f..4af53925b 100644 --- a/openedx_learning/apps/authoring/contents/models.py +++ b/openedx_learning/apps/authoring/contents/models.py @@ -24,7 +24,7 @@ __all__ = [ "MediaType", - "Content", + "Media", ] @@ -128,7 +128,7 @@ def __str__(self) -> str: return base -class Content(models.Model): +class Media(models.Model): """ This is the most primitive piece of content data. @@ -233,7 +233,7 @@ class Content(models.Model): # could be as much as 200K of data if we had nothing but emojis. MAX_TEXT_LENGTH = 50_000 - objects: models.Manager[Content] = WithRelationsManager('media_type') + objects: models.Manager[Media] = WithRelationsManager('media_type') learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) @@ -395,7 +395,7 @@ class Meta: "media_type", "hash_digest", ], - name="oel_content_uniq_lc_media_type_hash_digest", + name="oel_media_uniq_lc_media_type_hash_digest", ), ] indexes = [ @@ -403,8 +403,8 @@ class Meta: # * Find the largest Content entries. models.Index( fields=["learning_package", "-size"], - name="oel_content_idx_lp_rsize", + name="oel_media_idx_lp_rsize", ), ] - verbose_name = "Content" - verbose_name_plural = "Contents" + verbose_name = "Media" + verbose_name_plural = "Media" diff --git a/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py b/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py index 312f289f1..f3be1611b 100644 --- a/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py +++ b/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py @@ -11,7 +11,7 @@ from django.db.models import QuerySet from openedx_learning.api import authoring as api -from openedx_learning.api.authoring_models import Collection, Component, Content, LearningPackage, PublishableEntity +from openedx_learning.api.authoring_models import Collection, Component, Media, LearningPackage, PublishableEntity from openedx_learning.apps.authoring.backup_restore.zipper import LearningPackageZipper from openedx_learning.lib.test_utils import TestCase @@ -32,7 +32,7 @@ class LpDumpCommandTestCase(TestCase): published_component: Component published_component2: Component draft_component: Component - html_asset_content: Content + html_asset_content: Media collection: Collection @classmethod diff --git a/tests/openedx_learning/apps/authoring/components/test_api.py b/tests/openedx_learning/apps/authoring/components/test_api.py index 8a4dd44c5..7970ce189 100644 --- a/tests/openedx_learning/apps/authoring/components/test_api.py +++ b/tests/openedx_learning/apps/authoring/components/test_api.py @@ -448,8 +448,8 @@ def test_bytes_content(self): created=self.now, ) - content_txt = version_1.contents.get(componentversioncontent__key="raw.txt") - content_raw_txt = version_1.contents.get(componentversioncontent__key="no_ext") + content_txt = version_1.media.get(componentversioncontent__key="raw.txt") + content_raw_txt = version_1.media.get(componentversioncontent__key="no_ext") assert content_txt.size == len(bytes_content) assert str(content_txt.media_type) == 'text/plain' @@ -491,16 +491,16 @@ def test_multiple_versions(self): ) assert version_1.version_num == 1 assert version_1.title == "Problem Version 1" - version_1_contents = list(version_1.contents.all()) + version_1_contents = list(version_1.media.all()) assert len(version_1_contents) == 2 assert ( hello_content == - version_1.contents + version_1.media .get(componentversioncontent__key="hello.txt") ) assert ( goodbye_content == - version_1.contents + version_1.media .get(componentversioncontent__key="goodbye.txt") ) @@ -516,20 +516,20 @@ def test_multiple_versions(self): created=self.now, ) assert version_2.version_num == 2 - assert version_2.contents.count() == 3 + assert version_2.media.count() == 3 assert ( blank_content == - version_2.contents + version_2.media .get(componentversioncontent__key="hello.txt") ) assert ( goodbye_content == - version_2.contents + version_2.media .get(componentversioncontent__key="goodbye.txt") ) assert ( blank_content == - version_2.contents + version_2.media .get(componentversioncontent__key="blank.txt") ) @@ -547,10 +547,10 @@ def test_multiple_versions(self): created=self.now, ) assert version_3.version_num == 3 - assert version_3.contents.count() == 1 + assert version_3.media.count() == 1 assert ( hello_content == - version_3.contents + version_3.media .get(componentversioncontent__key="hello.txt") ) @@ -609,21 +609,21 @@ def test_create_multiple_next_versions_and_diff_content(self): ignore_previous_content=True, ) assert version_2_draft.version_num == 2 - assert version_2_draft.contents.count() == 2 + assert version_2_draft.media.count() == 2 assert ( python_source_asset == - version_2_draft.contents.get( + version_2_draft.media.get( componentversioncontent__key="static/profile.webp") ) assert ( python_source_asset == - version_2_draft.contents.get( + version_2_draft.media.get( componentversioncontent__key="static/new_file.webp") ) with self.assertRaises(ObjectDoesNotExist): # This file was in the published version, but not in the draft version # since we ignored previous content. - version_2_draft.contents.get(componentversioncontent__key="static/background.webp") + version_2_draft.media.get(componentversioncontent__key="static/background.webp") class SetCollectionsTestCase(ComponentTestCase): diff --git a/tests/openedx_learning/apps/authoring/components/test_assets.py b/tests/openedx_learning/apps/authoring/components/test_assets.py index f9cbf0643..04605c861 100644 --- a/tests/openedx_learning/apps/authoring/components/test_assets.py +++ b/tests/openedx_learning/apps/authoring/components/test_assets.py @@ -25,9 +25,9 @@ class AssetTestCase(TestCase): component: components_api.Component component_version: components_api.ComponentVersion - problem_content: contents_api.Content - python_source_asset: contents_api.Content - html_asset_content: contents_api.Content + problem_content: contents_api.Media + python_source_asset: contents_api.Media + html_asset_content: contents_api.Media learning_package: LearningPackage now: datetime From 78488950d70407821f4b97a8fb4cc755ca807b6c Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 23 Dec 2025 14:16:35 -0500 Subject: [PATCH 2/6] refactor: model rename for ComponentVersionContent/Media --- .../apps/authoring/backup_restore/zipper.py | 8 ++++---- .../apps/authoring/components/_admin.py | 4 ++-- .../apps/authoring/components/api.py | 18 +++++++++--------- .../apps/authoring/components/models.py | 10 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/openedx_learning/apps/authoring/backup_restore/zipper.py b/openedx_learning/apps/authoring/backup_restore/zipper.py index 5dcb4274d..4936dbdd0 100644 --- a/openedx_learning/apps/authoring/backup_restore/zipper.py +++ b/openedx_learning/apps/authoring/backup_restore/zipper.py @@ -22,7 +22,7 @@ Collection, ComponentType, ComponentVersion, - ComponentVersionContent, + ComponentVersionMedia, Media, LearningPackage, PublishableEntity, @@ -191,12 +191,12 @@ def get_publishable_entities(self) -> QuerySet[PublishableEntity]: # which is too large for this type of prefetch. Prefetch( "draft__version__componentversion__componentversioncontent_set", - queryset=ComponentVersionContent.objects.select_related("content"), + queryset=ComponentVersionMedia.objects.select_related("content"), to_attr="prefetched_contents", ), Prefetch( "published__version__componentversion__componentversioncontent_set", - queryset=ComponentVersionContent.objects.select_related("content"), + queryset=ComponentVersionMedia.objects.select_related("content"), to_attr="prefetched_contents", ), ) @@ -373,7 +373,7 @@ def create_zip(self, path: str) -> None: # Get content data associated with this version contents: QuerySet[ - ComponentVersionContent + ComponentVersionMedia ] = component_version.prefetched_contents # type: ignore[attr-defined] for component_version_content in contents: diff --git a/openedx_learning/apps/authoring/components/_admin.py b/openedx_learning/apps/authoring/components/_admin.py index 02f116603..fc21b287f 100644 --- a/openedx_learning/apps/authoring/components/_admin.py +++ b/openedx_learning/apps/authoring/components/_admin.py @@ -11,7 +11,7 @@ from openedx_learning.lib.admin_utils import ReadOnlyModelAdmin -from .models import Component, ComponentVersion, ComponentVersionContent +from .models import Component, ComponentVersion, ComponentVersionMedia class ComponentVersionInline(admin.TabularInline): @@ -134,7 +134,7 @@ def format_text_for_admin_display(text: str) -> SafeText: ) -def content_preview(cvc_obj: ComponentVersionContent) -> SafeText: +def content_preview(cvc_obj: ComponentVersionMedia) -> SafeText: """ Get the HTML to display a preview of the given ComponentVersionContent """ diff --git a/openedx_learning/apps/authoring/components/api.py b/openedx_learning/apps/authoring/components/api.py index d8867b239..e1942d703 100644 --- a/openedx_learning/apps/authoring/components/api.py +++ b/openedx_learning/apps/authoring/components/api.py @@ -25,7 +25,7 @@ from ..contents import api as contents_api from ..publishing import api as publishing_api -from .models import Component, ComponentType, ComponentVersion, ComponentVersionContent +from .models import Component, ComponentType, ComponentVersion, ComponentVersionMedia # The public API that will be re-exported by openedx_learning.apps.authoring.api # is listed in the __all__ entries below. Internal helper functions that are @@ -265,7 +265,7 @@ def create_next_component_version( content_pk = content.pk else: content_pk = content_pk_or_bytes - ComponentVersionContent.objects.create( + ComponentVersionMedia.objects.create( content_id=content_pk, component_version=component_version, key=key, @@ -276,11 +276,11 @@ def create_next_component_version( # Now copy any old associations that existed, as long as they aren't # in conflict with the new stuff or marked for deletion. - last_version_content_mapping = ComponentVersionContent.objects \ + last_version_content_mapping = ComponentVersionMedia.objects \ .filter(component_version=last_version) for cvrc in last_version_content_mapping: if cvrc.key not in content_to_replace: - ComponentVersionContent.objects.create( + ComponentVersionMedia.objects.create( content_id=cvrc.content_id, component_version=component_version, key=cvrc.key, @@ -454,7 +454,7 @@ def look_up_component_version_content( component_key: str, version_num: int, key: Path, -) -> ComponentVersionContent: +) -> ComponentVersionMedia: """ Look up ComponentVersionContent by human readable keys. @@ -470,7 +470,7 @@ def look_up_component_version_content( & Q(component_version__publishable_entity_version__version_num=version_num) & Q(key=key) ) - return ComponentVersionContent.objects \ + return ComponentVersionMedia.objects \ .select_related( "content", "content__media_type", @@ -485,7 +485,7 @@ def create_component_version_content( content_id: int, /, key: str, -) -> ComponentVersionContent: +) -> ComponentVersionMedia: """ Add a Content to the given ComponentVersion @@ -503,7 +503,7 @@ def create_component_version_content( ) key = key.lstrip('/') - cvrc, _created = ComponentVersionContent.objects.get_or_create( + cvrc, _created = ComponentVersionMedia.objects.get_or_create( component_version_id=component_version_id, content_id=content_id, key=key, @@ -622,7 +622,7 @@ def _error_header(error: AssetError) -> dict[str, str]: # Check: Does the ComponentVersion have the requested asset (Content)? try: cv_content = component_version.componentversioncontent_set.get(key=asset_path) - except ComponentVersionContent.DoesNotExist: + except ComponentVersionMedia.DoesNotExist: logger.error(f"ComponentVersion {component_version_uuid} has no asset {asset_path}") info_headers.update( _error_header(AssetError.ASSET_PATH_NOT_FOUND_FOR_COMPONENT_VERSION) diff --git a/openedx_learning/apps/authoring/components/models.py b/openedx_learning/apps/authoring/components/models.py index 344455236..50327b007 100644 --- a/openedx_learning/apps/authoring/components/models.py +++ b/openedx_learning/apps/authoring/components/models.py @@ -30,7 +30,7 @@ "ComponentType", "Component", "ComponentVersion", - "ComponentVersionContent", + "ComponentVersionMedia", ] @@ -198,7 +198,7 @@ class ComponentVersion(PublishableEntityVersionMixin): A particular version of a Component. This holds the content using a M:M relationship with Content via - ComponentVersionContent. + ComponentVersionMedia. """ # This is technically redundant, since we can get this through @@ -210,9 +210,9 @@ class ComponentVersion(PublishableEntityVersionMixin): # The contents hold the actual interesting data associated with this # ComponentVersion. - contents: models.ManyToManyField[Media, ComponentVersionContent] = models.ManyToManyField( + contents: models.ManyToManyField[Media, ComponentVersionMedia] = models.ManyToManyField( Media, - through="ComponentVersionContent", + through="ComponentVersionMedia", related_name="component_versions", ) @@ -221,7 +221,7 @@ class Meta: verbose_name_plural = "Component Versions" -class ComponentVersionContent(models.Model): +class ComponentVersionMedia(models.Model): """ Determines the Content for a given ComponentVersion. From 1200a2bb516ab7e67a835439f1d075f40c893a45 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 23 Dec 2025 14:45:08 -0500 Subject: [PATCH 3/6] refactor: more refactoring to fix test calls --- .../apps/authoring/backup_restore/zipper.py | 6 ++--- .../apps/authoring/components/_admin.py | 2 +- .../apps/authoring/components/api.py | 10 +++---- .../commands/add_assets_to_component.py | 2 +- .../apps/authoring/components/models.py | 22 ++++++++++------ .../apps/authoring/components/test_api.py | 26 +++++++++---------- 6 files changed, 37 insertions(+), 31 deletions(-) diff --git a/openedx_learning/apps/authoring/backup_restore/zipper.py b/openedx_learning/apps/authoring/backup_restore/zipper.py index 4936dbdd0..f685c22ef 100644 --- a/openedx_learning/apps/authoring/backup_restore/zipper.py +++ b/openedx_learning/apps/authoring/backup_restore/zipper.py @@ -190,12 +190,12 @@ def get_publishable_entities(self) -> QuerySet[PublishableEntity]: # especially with large libraries (up to 100K items), # which is too large for this type of prefetch. Prefetch( - "draft__version__componentversion__componentversioncontent_set", + "draft__version__componentversion__componentversionmedia_set", queryset=ComponentVersionMedia.objects.select_related("content"), to_attr="prefetched_contents", ), Prefetch( - "published__version__componentversion__componentversioncontent_set", + "published__version__componentversion__componentversionmedia_set", queryset=ComponentVersionMedia.objects.select_related("content"), to_attr="prefetched_contents", ), @@ -377,7 +377,7 @@ def create_zip(self, path: str) -> None: ] = component_version.prefetched_contents # type: ignore[attr-defined] for component_version_content in contents: - content: Media = component_version_content.content + content: Media = component_version_content.media # Important: The component_version_content.key contains implicitly # the file name and the file extension diff --git a/openedx_learning/apps/authoring/components/_admin.py b/openedx_learning/apps/authoring/components/_admin.py index fc21b287f..60ca41a90 100644 --- a/openedx_learning/apps/authoring/components/_admin.py +++ b/openedx_learning/apps/authoring/components/_admin.py @@ -138,7 +138,7 @@ def content_preview(cvc_obj: ComponentVersionMedia) -> SafeText: """ Get the HTML to display a preview of the given ComponentVersionContent """ - content_obj = cvc_obj.content + content_obj = cvc_obj.media if content_obj.media_type.type == "image": # This base64 encoding looks really goofy and is bad for performance, diff --git a/openedx_learning/apps/authoring/components/api.py b/openedx_learning/apps/authoring/components/api.py index e1942d703..9a7068572 100644 --- a/openedx_learning/apps/authoring/components/api.py +++ b/openedx_learning/apps/authoring/components/api.py @@ -266,7 +266,7 @@ def create_next_component_version( else: content_pk = content_pk_or_bytes ComponentVersionMedia.objects.create( - content_id=content_pk, + media_id=content_pk, component_version=component_version, key=key, ) @@ -281,7 +281,7 @@ def create_next_component_version( for cvrc in last_version_content_mapping: if cvrc.key not in content_to_replace: ComponentVersionMedia.objects.create( - content_id=cvrc.content_id, + media_id=cvrc.media_id, component_version=component_version, key=cvrc.key, ) @@ -505,7 +505,7 @@ def create_component_version_content( cvrc, _created = ComponentVersionMedia.objects.get_or_create( component_version_id=component_version_id, - content_id=content_id, + media_id=content_id, key=key, ) return cvrc @@ -621,7 +621,7 @@ def _error_header(error: AssetError) -> dict[str, str]: # Check: Does the ComponentVersion have the requested asset (Content)? try: - cv_content = component_version.componentversioncontent_set.get(key=asset_path) + cv_content = component_version.componentversionmedia_set.get(key=asset_path) except ComponentVersionMedia.DoesNotExist: logger.error(f"ComponentVersion {component_version_uuid} has no asset {asset_path}") info_headers.update( @@ -634,7 +634,7 @@ def _error_header(error: AssetError) -> dict[str, str]: # anyway, but we're explicitly not doing so because streaming large text # fields from the database is less scalable, and we don't want to encourage # that usage pattern. - content = cv_content.content + content = cv_content.media if not content.has_file: logger.error( f"ComponentVersion {component_version_uuid} has asset {asset_path}, " diff --git a/openedx_learning/apps/authoring/components/management/commands/add_assets_to_component.py b/openedx_learning/apps/authoring/components/management/commands/add_assets_to_component.py index 5e6518a99..e67c54515 100644 --- a/openedx_learning/apps/authoring/components/management/commands/add_assets_to_component.py +++ b/openedx_learning/apps/authoring/components/management/commands/add_assets_to_component.py @@ -83,5 +83,5 @@ def handle(self, *args, **options): f"Created v{next_version.version_num} of " f"{next_version.component.key} ({next_version.uuid}):" ) - for cvc in next_version.componentversioncontent_set.all(): + for cvc in next_version.componentversionmedia_set.all(): self.stdout.write(f"- {cvc.key} ({cvc.uuid})") diff --git a/openedx_learning/apps/authoring/components/models.py b/openedx_learning/apps/authoring/components/models.py index 50327b007..234c2ba81 100644 --- a/openedx_learning/apps/authoring/components/models.py +++ b/openedx_learning/apps/authoring/components/models.py @@ -208,14 +208,20 @@ class ComponentVersion(PublishableEntityVersionMixin): Component, on_delete=models.CASCADE, related_name="versions" ) - # The contents hold the actual interesting data associated with this + # The media relation holds the actual interesting data associated with this # ComponentVersion. - contents: models.ManyToManyField[Media, ComponentVersionMedia] = models.ManyToManyField( + media: models.ManyToManyField[Media, ComponentVersionMedia] = models.ManyToManyField( Media, through="ComponentVersionMedia", related_name="component_versions", ) + @property + def contents(self): + """Backwards compatibility shim.""" + return self.media + + class Meta: verbose_name = "Component Version" verbose_name_plural = "Component Versions" @@ -238,7 +244,7 @@ class ComponentVersionMedia(models.Model): """ component_version = models.ForeignKey(ComponentVersion, on_delete=models.CASCADE) - content = models.ForeignKey(Media, on_delete=models.RESTRICT) + media = models.ForeignKey(Media, on_delete=models.RESTRICT) # "key" is a reserved word for MySQL, so we're temporarily using the column # name of "_key" to avoid breaking downstream tooling. A possible @@ -254,16 +260,16 @@ class Meta: # with two different identifiers, that is permitted. models.UniqueConstraint( fields=["component_version", "key"], - name="oel_cvcontent_uniq_cv_key", + name="oel_cvmedia_uniq_cv_key", ), ] indexes = [ models.Index( - fields=["content", "component_version"], - name="oel_cvcontent_c_cv", + fields=["media", "component_version"], + name="oel_cvmediat_c_cv", ), models.Index( - fields=["component_version", "content"], - name="oel_cvcontent_cv_d", + fields=["component_version", "media"], + name="oel_cvmedia_cv_d", ), ] diff --git a/tests/openedx_learning/apps/authoring/components/test_api.py b/tests/openedx_learning/apps/authoring/components/test_api.py index 7970ce189..7ffde191e 100644 --- a/tests/openedx_learning/apps/authoring/components/test_api.py +++ b/tests/openedx_learning/apps/authoring/components/test_api.py @@ -417,7 +417,7 @@ def test_add(self): .get(publishable_entity_version__version_num=1) assert ( new_content == - new_version.contents.get(componentversioncontent__key="my/path/to/hello.txt") + new_version.contents.get(componentversionmedia__key="my/path/to/hello.txt") ) # Write the same content again, but to an absolute path (should auto- @@ -432,7 +432,7 @@ def test_add(self): .get(publishable_entity_version__version_num=1) assert ( new_content == - new_version.contents.get(componentversioncontent__key="nested/path/hello.txt") + new_version.contents.get(componentversionmedia__key="nested/path/hello.txt") ) def test_bytes_content(self): @@ -448,8 +448,8 @@ def test_bytes_content(self): created=self.now, ) - content_txt = version_1.media.get(componentversioncontent__key="raw.txt") - content_raw_txt = version_1.media.get(componentversioncontent__key="no_ext") + content_txt = version_1.media.get(componentversionmedia__key="raw.txt") + content_raw_txt = version_1.media.get(componentversionmedia__key="no_ext") assert content_txt.size == len(bytes_content) assert str(content_txt.media_type) == 'text/plain' @@ -496,12 +496,12 @@ def test_multiple_versions(self): assert ( hello_content == version_1.media - .get(componentversioncontent__key="hello.txt") + .get(componentversionmedia__key="hello.txt") ) assert ( goodbye_content == version_1.media - .get(componentversioncontent__key="goodbye.txt") + .get(componentversionmedia__key="goodbye.txt") ) # This should keep the old value for goodbye.txt, add blank.txt, and set @@ -520,17 +520,17 @@ def test_multiple_versions(self): assert ( blank_content == version_2.media - .get(componentversioncontent__key="hello.txt") + .get(componentversionmedia__key="hello.txt") ) assert ( goodbye_content == version_2.media - .get(componentversioncontent__key="goodbye.txt") + .get(componentversionmedia__key="goodbye.txt") ) assert ( blank_content == version_2.media - .get(componentversioncontent__key="blank.txt") + .get(componentversionmedia__key="blank.txt") ) # Now we're going to set "hello.txt" back to hello_content, but remove @@ -551,7 +551,7 @@ def test_multiple_versions(self): assert ( hello_content == version_3.media - .get(componentversioncontent__key="hello.txt") + .get(componentversionmedia__key="hello.txt") ) def test_create_next_version_forcing_num_version(self): @@ -613,17 +613,17 @@ def test_create_multiple_next_versions_and_diff_content(self): assert ( python_source_asset == version_2_draft.media.get( - componentversioncontent__key="static/profile.webp") + componentversionmedia__key="static/profile.webp") ) assert ( python_source_asset == version_2_draft.media.get( - componentversioncontent__key="static/new_file.webp") + componentversionmedia__key="static/new_file.webp") ) with self.assertRaises(ObjectDoesNotExist): # This file was in the published version, but not in the draft version # since we ignored previous content. - version_2_draft.media.get(componentversioncontent__key="static/background.webp") + version_2_draft.media.get(componentversionmedia__key="static/background.webp") class SetCollectionsTestCase(ComponentTestCase): From 6c4c71b0b6f3d3ee0a14d9c23b3ab16901b57c05 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 24 Dec 2025 11:13:26 -0500 Subject: [PATCH 4/6] refactor: refactor components models and api to use media --- .../management/commands/load_components.py | 4 +- .../apps/authoring/backup_restore/zipper.py | 6 +- .../apps/authoring/components/api.py | 4 +- ...entversioncontent_componentversionmedia.py | 18 +++++ .../0006_rename_fields_to_use_media.py | 48 +++++++++++++ .../apps/authoring/contents/api.py | 72 +++++++++---------- .../authoring/backup_restore/test_backup.py | 4 +- .../apps/authoring/components/test_api.py | 10 +-- .../apps/authoring/components/test_assets.py | 6 +- .../authoring/contents/test_file_storage.py | 2 +- 10 files changed, 120 insertions(+), 54 deletions(-) create mode 100644 openedx_learning/apps/authoring/components/migrations/0005_rename_componentversioncontent_componentversionmedia.py create mode 100644 openedx_learning/apps/authoring/components/migrations/0006_rename_fields_to_use_media.py diff --git a/olx_importer/management/commands/load_components.py b/olx_importer/management/commands/load_components.py index 55cd268c8..48b64a3a1 100644 --- a/olx_importer/management/commands/load_components.py +++ b/olx_importer/management/commands/load_components.py @@ -116,7 +116,7 @@ def create_content(self, static_local_path, now, component_version): logger.warning(f' Static reference not found: "{real_path}"') return # Might as well bail if we can't find the file. - content = contents_api.get_or_create_file_content( + content = contents_api.get_or_create_file_media( self.learning_package.id, data=data_bytes, mime_type=mime_type, @@ -165,7 +165,7 @@ def import_block_type(self, block_type_name, now): # , publish_log_entry): # Create the Content entry for the raw data... text = xml_file_path.read_text('utf-8') - text_content, _created = contents_api.get_or_create_text_content( + text_content, _created = contents_api.get_or_create_text_media( self.learning_package.id, text=text, mime_type=f"application/vnd.openedx.xblock.v1.{block_type_name}+xml", diff --git a/openedx_learning/apps/authoring/backup_restore/zipper.py b/openedx_learning/apps/authoring/backup_restore/zipper.py index f685c22ef..e6b4f0abe 100644 --- a/openedx_learning/apps/authoring/backup_restore/zipper.py +++ b/openedx_learning/apps/authoring/backup_restore/zipper.py @@ -191,12 +191,12 @@ def get_publishable_entities(self) -> QuerySet[PublishableEntity]: # which is too large for this type of prefetch. Prefetch( "draft__version__componentversion__componentversionmedia_set", - queryset=ComponentVersionMedia.objects.select_related("content"), + queryset=ComponentVersionMedia.objects.select_related("media"), to_attr="prefetched_contents", ), Prefetch( "published__version__componentversion__componentversionmedia_set", - queryset=ComponentVersionMedia.objects.select_related("content"), + queryset=ComponentVersionMedia.objects.select_related("media"), to_attr="prefetched_contents", ), ) @@ -984,7 +984,7 @@ def _resolve_static_files( # storing the value as a content instance if not self.learning_package_id: raise ValueError("learning_package_id must be set before resolving static files.") - text_content = contents_api.get_or_create_text_content( + text_content = contents_api.get_or_create_text_media( self.learning_package_id, contents_api.get_or_create_media_type(f"application/vnd.openedx.xblock.v1.{block_type}+xml").id, text=content_bytes.decode("utf-8"), diff --git a/openedx_learning/apps/authoring/components/api.py b/openedx_learning/apps/authoring/components/api.py index 9a7068572..4b3a8336e 100644 --- a/openedx_learning/apps/authoring/components/api.py +++ b/openedx_learning/apps/authoring/components/api.py @@ -256,7 +256,7 @@ def create_next_component_version( # RFC 2046: https://datatracker.ietf.org/doc/html/rfc2046 media_type_str = media_type_str or "application/octet-stream" media_type = contents_api.get_or_create_media_type(media_type_str) - content = contents_api.get_or_create_file_content( + content = contents_api.get_or_create_file_media( component.learning_package.id, media_type.id, data=file_content, @@ -647,7 +647,7 @@ def _error_header(error: AssetError) -> dict[str, str]: # At this point, we know that there is valid Content that we want to send. # This adds Content-level headers, like the hash/etag and content type. - info_headers.update(contents_api.get_content_info_headers(content)) + info_headers.update(contents_api.get_media_info_headers(content)) # Recompute redirect headers (reminder: this should never be cached). redirect_headers = contents_api.get_redirect_headers(content.path, public) diff --git a/openedx_learning/apps/authoring/components/migrations/0005_rename_componentversioncontent_componentversionmedia.py b/openedx_learning/apps/authoring/components/migrations/0005_rename_componentversioncontent_componentversionmedia.py new file mode 100644 index 000000000..d3fdb1063 --- /dev/null +++ b/openedx_learning/apps/authoring/components/migrations/0005_rename_componentversioncontent_componentversionmedia.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.9 on 2025-12-23 19:15 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_components', '0004_remove_componentversioncontent_uuid'), + ('oel_contents', '0003_rename_index'), + ] + + operations = [ + migrations.RenameModel( + old_name='ComponentVersionContent', + new_name='ComponentVersionMedia', + ), + ] diff --git a/openedx_learning/apps/authoring/components/migrations/0006_rename_fields_to_use_media.py b/openedx_learning/apps/authoring/components/migrations/0006_rename_fields_to_use_media.py new file mode 100644 index 000000000..f6b36d7ec --- /dev/null +++ b/openedx_learning/apps/authoring/components/migrations/0006_rename_fields_to_use_media.py @@ -0,0 +1,48 @@ +# Generated by Django 5.2.9 on 2025-12-23 19:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_components', '0005_rename_componentversioncontent_componentversionmedia'), + ('oel_contents', '0003_rename_index'), + ] + + operations = [ + migrations.RemoveConstraint( + model_name='componentversionmedia', + name='oel_cvcontent_uniq_cv_key', + ), + migrations.RemoveIndex( + model_name='componentversionmedia', + name='oel_cvcontent_c_cv', + ), + migrations.RemoveIndex( + model_name='componentversionmedia', + name='oel_cvcontent_cv_d', + ), + migrations.RenameField( + model_name='componentversion', + old_name='contents', + new_name='media', + ), + migrations.RenameField( + model_name='componentversionmedia', + old_name='content', + new_name='media', + ), + migrations.AddIndex( + model_name='componentversionmedia', + index=models.Index(fields=['media', 'component_version'], name='oel_cvmediat_c_cv'), + ), + migrations.AddIndex( + model_name='componentversionmedia', + index=models.Index(fields=['component_version', 'media'], name='oel_cvmedia_cv_d'), + ), + migrations.AddConstraint( + model_name='componentversionmedia', + constraint=models.UniqueConstraint(fields=('component_version', 'key'), name='oel_cvmedia_uniq_cv_key'), + ), + ] diff --git a/openedx_learning/apps/authoring/contents/api.py b/openedx_learning/apps/authoring/contents/api.py index b494997b9..1baa1d7a1 100644 --- a/openedx_learning/apps/authoring/contents/api.py +++ b/openedx_learning/apps/authoring/contents/api.py @@ -1,5 +1,5 @@ """ -Low Level Contents API (warning: UNSTABLE, in progress API) +Low Level Media API (warning: UNSTABLE, in progress API) Please look at the models.py file for more information about the kinds of data are stored in this app. @@ -22,10 +22,10 @@ # to be callable only by other apps in the authoring package. __all__ = [ "get_or_create_media_type", - "get_content", - "get_content_info_headers", - "get_or_create_text_content", - "get_or_create_file_content", + "get_media", + "get_media_info_headers", + "get_or_create_text_media", + "get_or_create_file_media", ] @@ -66,20 +66,20 @@ def get_or_create_media_type(mime_type: str) -> MediaType: return media_type -def get_content(content_id: int, /) -> Media: +def get_media(media_id: int, /) -> Media: """ - Get a single Content object by its ID. + Get a single Media object by its ID. - Content is always attached to something when it's created, like to a - ComponentVersion. That means the "right" way to access a Content is almost + Media is always attached to something when it's created, like to a + ComponentVersion. That means the "right" way to access a Media is almost always going to be via those relations and not via this function. But I include this function anyway because it's tiny to write and it's better than someone using a get_or_create_* function when they really just want to get. """ - return Media.objects.get(id=content_id) + return Media.objects.get(id=media_id) -def get_or_create_text_content( +def get_or_create_text_media( learning_package_id: int, media_type_id: int, /, @@ -88,7 +88,7 @@ def get_or_create_text_content( create_file: bool = False, ) -> Media: """ - Get or create a Content entry with text data stored in the database. + Get or create a Media entry with text data stored in the database. Use this when you want to create relatively small chunks of text that need to be accessed quickly, especially if you're pulling back multiple rows at @@ -101,7 +101,7 @@ def get_or_create_text_content( that file be downloadable by browsers in the LMS at some point. If you want to create a large text file, or want to create a text file that - doesn't need to be stored in the database, call ``create_file_content`` + doesn't need to be stored in the database, call ``create_file_media`` instead of this function. """ text_as_bytes = text.encode('utf-8') @@ -109,13 +109,13 @@ def get_or_create_text_content( with atomic(): try: - content = Media.objects.get( + media = Media.objects.get( learning_package_id=learning_package_id, media_type_id=media_type_id, hash_digest=hash_digest, ) except Media.DoesNotExist: - content = Media( + media = Media( learning_package_id=learning_package_id, media_type_id=media_type_id, hash_digest=hash_digest, @@ -124,16 +124,16 @@ def get_or_create_text_content( text=text, has_file=create_file, ) - content.full_clean() - content.save() + media.full_clean() + media.save() if create_file: - content.write_file(ContentFile(text_as_bytes)) + media.write_file(ContentFile(text_as_bytes)) - return content + return media -def get_or_create_file_content( +def get_or_create_file_media( learning_package_id: int, media_type_id: int, /, @@ -141,24 +141,24 @@ def get_or_create_file_content( created: datetime, ) -> Media: """ - Get or create a Content with data stored in a file storage backend. + Get or create a Media with data stored in a file storage backend. Use this function to store non-text data, large data, or data where low latency access is not necessary. Also use this function (or - ``get_or_create_text_content`` with ``create_file=True``) to store any - Content that you want to be downloadable by browsers in the LMS, since the - static asset serving system will only work with file-backed Content. + ``get_or_create_text_media`` with ``create_file=True``) to store any + Media that you want to be downloadable by browsers in the LMS, since the + static asset serving system will only work with file-backed Media. """ hash_digest = create_hash_digest(data) with atomic(): try: - content = Media.objects.get( + media = Media.objects.get( learning_package_id=learning_package_id, media_type_id=media_type_id, hash_digest=hash_digest, ) except Media.DoesNotExist: - content = Media( + media = Media( learning_package_id=learning_package_id, media_type_id=media_type_id, hash_digest=hash_digest, @@ -167,24 +167,24 @@ def get_or_create_file_content( text=None, has_file=True, ) - content.full_clean() - content.save() + media.full_clean() + media.save() - content.write_file(ContentFile(data)) + media.write_file(ContentFile(data)) - return content + return media -def get_content_info_headers(content: Media) -> dict[str, str]: +def get_media_info_headers(media: Media) -> dict[str, str]: """ - Return HTTP headers that are specific to this Content. + Return HTTP headers that are specific to this Media. This currently only consists of the Content-Type and ETag. These values are safe to cache. """ return { - "Content-Type": str(content.media_type), - "Etag": content.hash_digest, + "Content-Type": str(media.media_type), + "Etag": media.hash_digest, } @@ -196,7 +196,7 @@ def get_redirect_headers( """ Return a dict of headers for file redirect and caching. - This is a separate function from get_content_info_headers because the URLs + This is a separate function from get_media_info_headers because the URLs returned in these headers produced by this function should never be put into the backend Django cache (redis/memcached). The `stored_file_path` location *is* cacheable though–that's the actual storage location for the resource, @@ -228,7 +228,7 @@ def get_redirect_headers( cache_directive = "private" # This only stays on the user's browser, so cache for a whole day. This - # is okay to do because Content data is typically immutable–i.e. if an + # is okay to do because Media data is typically immutable–i.e. if an # asset actually changes, the user should be directed to a different URL # for it. max_age = max_age or (60 * 60 * 24) diff --git a/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py b/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py index f3be1611b..75116b6a5 100644 --- a/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py +++ b/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py @@ -100,7 +100,7 @@ def setUpTestData(cls): created=cls.now, ) - new_txt_content = api.get_or_create_text_content( + new_txt_content = api.get_or_create_text_media( cls.learning_package.pk, text_media_type.id, text="This is some data", @@ -129,7 +129,7 @@ def setUpTestData(cls): created=cls.now, ) - cls.html_asset_content = api.get_or_create_file_content( + cls.html_asset_content = api.get_or_create_file_media( cls.learning_package.id, html_media_type.id, data=b"hello world!", diff --git a/tests/openedx_learning/apps/authoring/components/test_api.py b/tests/openedx_learning/apps/authoring/components/test_api.py index 7ffde191e..ec98786e9 100644 --- a/tests/openedx_learning/apps/authoring/components/test_api.py +++ b/tests/openedx_learning/apps/authoring/components/test_api.py @@ -400,7 +400,7 @@ def test_add(self): created=self.now, created_by=None, ) - new_content = contents_api.get_or_create_text_content( + new_content = contents_api.get_or_create_text_media( self.learning_package.pk, self.text_media_type.id, text="This is some data", @@ -460,19 +460,19 @@ def test_bytes_content(self): assert content_raw_txt.read_file().read() == bytes_content def test_multiple_versions(self): - hello_content = contents_api.get_or_create_text_content( + hello_content = contents_api.get_or_create_text_media( self.learning_package.id, self.text_media_type.id, text="Hello World!", created=self.now, ) - goodbye_content = contents_api.get_or_create_text_content( + goodbye_content = contents_api.get_or_create_text_media( self.learning_package.id, self.text_media_type.id, text="Goodbye World!", created=self.now, ) - blank_content = contents_api.get_or_create_text_content( + blank_content = contents_api.get_or_create_text_media( self.learning_package.id, self.text_media_type.id, text="", @@ -573,7 +573,7 @@ def test_create_multiple_next_versions_and_diff_content(self): python_source_media_type = contents_api.get_or_create_media_type( "text/x-python", ) - python_source_asset = contents_api.get_or_create_file_content( + python_source_asset = contents_api.get_or_create_file_media( self.learning_package.id, python_source_media_type.id, data=b"print('hello world!')", diff --git a/tests/openedx_learning/apps/authoring/components/test_assets.py b/tests/openedx_learning/apps/authoring/components/test_assets.py index 04605c861..275f7b88d 100644 --- a/tests/openedx_learning/apps/authoring/components/test_assets.py +++ b/tests/openedx_learning/apps/authoring/components/test_assets.py @@ -65,7 +65,7 @@ def setUpTestData(cls) -> None: ) # ProblemBlock content that is stored as text Content, not a file. - cls.problem_content = contents_api.get_or_create_text_content( + cls.problem_content = contents_api.get_or_create_text_media( cls.learning_package.id, cls.problem_block_media_type.id, text="(pretend problem OLX is here)", @@ -79,7 +79,7 @@ def setUpTestData(cls) -> None: # Python source file, stored as a file. This is hypothetical, as we # don't actually support bundling grader files like this today. - cls.python_source_asset = contents_api.get_or_create_file_content( + cls.python_source_asset = contents_api.get_or_create_file_media( cls.learning_package.id, cls.python_source_media_type.id, data=b"print('hello world!')", @@ -92,7 +92,7 @@ def setUpTestData(cls) -> None: ) # An HTML file that is student downloadable - cls.html_asset_content = contents_api.get_or_create_file_content( + cls.html_asset_content = contents_api.get_or_create_file_media( cls.learning_package.id, cls.html_media_type.id, data=b"hello world!", diff --git a/tests/openedx_learning/apps/authoring/contents/test_file_storage.py b/tests/openedx_learning/apps/authoring/contents/test_file_storage.py index a94b31571..5df00df65 100644 --- a/tests/openedx_learning/apps/authoring/contents/test_file_storage.py +++ b/tests/openedx_learning/apps/authoring/contents/test_file_storage.py @@ -34,7 +34,7 @@ def setUp(self) -> None: title="Content File Storage Test Case Learning Package", ) self.html_media_type = contents_api.get_or_create_media_type("text/html") - self.html_content = contents_api.get_or_create_file_content( + self.html_content = contents_api.get_or_create_file_media( learning_package.id, self.html_media_type.id, data=b"hello world!", From 37ceafa940413c49f1ffd04c4f7806e0c9c51d20 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 24 Dec 2025 15:05:12 -0500 Subject: [PATCH 5/6] refactor: move package name from contents -> media (label is still the same) --- .importlinter | 2 +- .../management/commands/load_components.py | 2 +- openedx_learning/api/authoring.py | 2 +- openedx_learning/api/authoring_models.py | 2 +- .../apps/authoring/backup_restore/zipper.py | 2 +- .../apps/authoring/components/api.py | 2 +- .../apps/authoring/components/models.py | 2 +- .../apps/authoring/contents/__init__.py | 0 .../apps/authoring/contents/admin.py | 71 --- .../apps/authoring/contents/api.py | 239 ---------- .../apps/authoring/contents/apps.py | 15 - .../contents/migrations/0001_initial.py | 66 --- .../migrations/0002_rename_content_media.py | 23 - .../contents/migrations/0003_rename_index.py | 27 -- .../authoring/contents/migrations/__init__.py | 0 .../apps/authoring/contents/models.py | 410 ------------------ projects/dev.py | 2 +- test_settings.py | 20 +- .../apps/authoring/components/test_api.py | 4 +- .../apps/authoring/components/test_assets.py | 2 +- .../apps/authoring/contents/__init__.py | 0 .../authoring/contents/test_file_storage.py | 79 ---- .../authoring/contents/test_media_types.py | 24 - 23 files changed, 21 insertions(+), 975 deletions(-) delete mode 100644 openedx_learning/apps/authoring/contents/__init__.py delete mode 100644 openedx_learning/apps/authoring/contents/admin.py delete mode 100644 openedx_learning/apps/authoring/contents/api.py delete mode 100644 openedx_learning/apps/authoring/contents/apps.py delete mode 100644 openedx_learning/apps/authoring/contents/migrations/0001_initial.py delete mode 100644 openedx_learning/apps/authoring/contents/migrations/0002_rename_content_media.py delete mode 100644 openedx_learning/apps/authoring/contents/migrations/0003_rename_index.py delete mode 100644 openedx_learning/apps/authoring/contents/migrations/__init__.py delete mode 100644 openedx_learning/apps/authoring/contents/models.py delete mode 100644 tests/openedx_learning/apps/authoring/contents/__init__.py delete mode 100644 tests/openedx_learning/apps/authoring/contents/test_file_storage.py delete mode 100644 tests/openedx_learning/apps/authoring/contents/test_media_types.py diff --git a/.importlinter b/.importlinter index 6c278d0dd..bc572f7b2 100644 --- a/.importlinter +++ b/.importlinter @@ -47,7 +47,7 @@ layers= # The "contents" app stores the simplest pieces of binary and text data, # without versioning information. These belong to a single Learning Package. - openedx_learning.apps.authoring.contents + openedx_learning.apps.authoring.media # The "collections" app stores arbitrary groupings of PublishableEntities. # Its only dependency should be the publishing app. diff --git a/olx_importer/management/commands/load_components.py b/olx_importer/management/commands/load_components.py index 48b64a3a1..513591c79 100644 --- a/olx_importer/management/commands/load_components.py +++ b/olx_importer/management/commands/load_components.py @@ -29,7 +29,7 @@ # Model references to remove from openedx_learning.apps.authoring.components import api as components_api -from openedx_learning.apps.authoring.contents import api as contents_api +from openedx_learning.apps.authoring.media import api as contents_api from openedx_learning.apps.authoring.publishing import api as publishing_api SUPPORTED_TYPES = ["problem", "video", "html"] diff --git a/openedx_learning/api/authoring.py b/openedx_learning/api/authoring.py index 9082b33fb..b0d98cff4 100644 --- a/openedx_learning/api/authoring.py +++ b/openedx_learning/api/authoring.py @@ -12,7 +12,7 @@ from ..apps.authoring.backup_restore.api import * from ..apps.authoring.collections.api import * from ..apps.authoring.components.api import * -from ..apps.authoring.contents.api import * +from ..apps.authoring.media.api import * from ..apps.authoring.publishing.api import * from ..apps.authoring.sections.api import * from ..apps.authoring.subsections.api import * diff --git a/openedx_learning/api/authoring_models.py b/openedx_learning/api/authoring_models.py index 617d85dc4..3dd9e86ad 100644 --- a/openedx_learning/api/authoring_models.py +++ b/openedx_learning/api/authoring_models.py @@ -9,7 +9,7 @@ # pylint: disable=wildcard-import from ..apps.authoring.collections.models import * from ..apps.authoring.components.models import * -from ..apps.authoring.contents.models import * +from ..apps.authoring.media.models import * from ..apps.authoring.publishing.models import * from ..apps.authoring.sections.models import * from ..apps.authoring.subsections.models import * diff --git a/openedx_learning/apps/authoring/backup_restore/zipper.py b/openedx_learning/apps/authoring/backup_restore/zipper.py index e6b4f0abe..3fbb64ca7 100644 --- a/openedx_learning/apps/authoring/backup_restore/zipper.py +++ b/openedx_learning/apps/authoring/backup_restore/zipper.py @@ -47,7 +47,7 @@ ) from openedx_learning.apps.authoring.collections import api as collections_api from openedx_learning.apps.authoring.components import api as components_api -from openedx_learning.apps.authoring.contents import api as contents_api +from openedx_learning.apps.authoring.media import api as contents_api from openedx_learning.apps.authoring.publishing import api as publishing_api from openedx_learning.apps.authoring.sections import api as sections_api from openedx_learning.apps.authoring.subsections import api as subsections_api diff --git a/openedx_learning/apps/authoring/components/api.py b/openedx_learning/apps/authoring/components/api.py index 4b3a8336e..a31ea2dee 100644 --- a/openedx_learning/apps/authoring/components/api.py +++ b/openedx_learning/apps/authoring/components/api.py @@ -23,7 +23,7 @@ from django.db.transaction import atomic from django.http.response import HttpResponse, HttpResponseNotFound -from ..contents import api as contents_api +from ..media import api as contents_api from ..publishing import api as publishing_api from .models import Component, ComponentType, ComponentVersion, ComponentVersionMedia diff --git a/openedx_learning/apps/authoring/components/models.py b/openedx_learning/apps/authoring/components/models.py index 234c2ba81..3d34cfebe 100644 --- a/openedx_learning/apps/authoring/components/models.py +++ b/openedx_learning/apps/authoring/components/models.py @@ -23,7 +23,7 @@ from ....lib.fields import case_sensitive_char_field, key_field from ....lib.managers import WithRelationsManager -from ..contents.models import Media +from ..media.models import Media from ..publishing.models import LearningPackage, PublishableEntityMixin, PublishableEntityVersionMixin __all__ = [ diff --git a/openedx_learning/apps/authoring/contents/__init__.py b/openedx_learning/apps/authoring/contents/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/openedx_learning/apps/authoring/contents/admin.py b/openedx_learning/apps/authoring/contents/admin.py deleted file mode 100644 index 57cf254cb..000000000 --- a/openedx_learning/apps/authoring/contents/admin.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -Django admin for contents models -""" -import base64 - -from django.contrib import admin -from django.utils.html import format_html - -from openedx_learning.lib.admin_utils import ReadOnlyModelAdmin - -from .models import Media - - -@admin.register(Media) -class MediaAdmin(ReadOnlyModelAdmin): - """ - Django admin for Content model - """ - list_display = [ - "hash_digest", - "learning_package", - "media_type", - "size", - "created", - "has_file", - ] - fields = [ - "learning_package", - "hash_digest", - "media_type", - "size", - "created", - "has_file", - "path", - "os_path", - "text_preview", - "image_preview", - ] - list_filter = ("media_type", "learning_package") - search_fields = ("hash_digest",) - - @admin.display(description="OS Path") - def os_path(self, content: Media): - return content.os_path() or "" - - def path(self, content: Media): - return content.path if content.has_file else "" - - def text_preview(self, content: Media): - if not content.text: - return "" - return format_html( - '
\n{}\n
', - content.text, - ) - - def image_preview(self, content: Media): - """ - Return HTML for an image, if that is the underlying Content. - - Otherwise, just return a blank string. - """ - if content.media_type.type != "image": - return "" - - data = content.read_file().read() - return format_html( - '', - content.mime_type, - base64.encodebytes(data).decode('utf8'), - ) diff --git a/openedx_learning/apps/authoring/contents/api.py b/openedx_learning/apps/authoring/contents/api.py deleted file mode 100644 index 1baa1d7a1..000000000 --- a/openedx_learning/apps/authoring/contents/api.py +++ /dev/null @@ -1,239 +0,0 @@ -""" -Low Level Media API (warning: UNSTABLE, in progress API) - -Please look at the models.py file for more information about the kinds of data -are stored in this app. -""" -from __future__ import annotations - -from datetime import datetime -from logging import getLogger - -from django.core.files.base import ContentFile -from django.db.transaction import atomic - -from ....lib.fields import create_hash_digest -from .models import Media, MediaType - -# The public API that will be re-exported by openedx_learning.apps.authoring.api -# is listed in the __all__ entries below. Internal helper functions that are -# private to this module should start with an underscore. If a function does not -# start with an underscore AND it is not in __all__, that function is considered -# to be callable only by other apps in the authoring package. -__all__ = [ - "get_or_create_media_type", - "get_media", - "get_media_info_headers", - "get_or_create_text_media", - "get_or_create_file_media", -] - - -log = getLogger() - - -def get_or_create_media_type(mime_type: str) -> MediaType: - """ - Return the MediaType.id for the desired mime_type string. - - If it is not found in the database, a new entry will be created for it. This - lazy-writing means that MediaType entry IDs will *not* be the same across - different server instances, and apps should not assume that will be the - case. Even if we were to preload a bunch of common ones, we can't anticipate - the different XBlocks that will be installed in different server instances, - each of which will use their own MediaType. - - Caching Warning: Be careful about putting any caching decorator around this - function (e.g. ``lru_cache``). It's possible that incorrect cache values - could leak out in the event of a rollback–e.g. new types are introduced in - a large import transaction which later fails. You can safely cache the - results that come back from this function with a local dict in your import - process instead. - """ - if "+" in mime_type: - base, suffix = mime_type.split("+") - else: - base = mime_type - suffix = "" - - main_type, sub_type = base.split("/") - media_type, _created = MediaType.objects.get_or_create( - type=main_type, - sub_type=sub_type, - suffix=suffix, - ) - - return media_type - - -def get_media(media_id: int, /) -> Media: - """ - Get a single Media object by its ID. - - Media is always attached to something when it's created, like to a - ComponentVersion. That means the "right" way to access a Media is almost - always going to be via those relations and not via this function. But I - include this function anyway because it's tiny to write and it's better than - someone using a get_or_create_* function when they really just want to get. - """ - return Media.objects.get(id=media_id) - - -def get_or_create_text_media( - learning_package_id: int, - media_type_id: int, - /, - text: str, - created: datetime, - create_file: bool = False, -) -> Media: - """ - Get or create a Media entry with text data stored in the database. - - Use this when you want to create relatively small chunks of text that need - to be accessed quickly, especially if you're pulling back multiple rows at - once. For example, this is the function to call when storing OLX for a - component XBlock like a ProblemBlock. - - This function will *always* create a text entry in the database. In addition - to this, if you specify ``create_file=True``, it will also save a copy of - that text data to the file storage backend. This is useful if we want to let - that file be downloadable by browsers in the LMS at some point. - - If you want to create a large text file, or want to create a text file that - doesn't need to be stored in the database, call ``create_file_media`` - instead of this function. - """ - text_as_bytes = text.encode('utf-8') - hash_digest = create_hash_digest(text_as_bytes) - - with atomic(): - try: - media = Media.objects.get( - learning_package_id=learning_package_id, - media_type_id=media_type_id, - hash_digest=hash_digest, - ) - except Media.DoesNotExist: - media = Media( - learning_package_id=learning_package_id, - media_type_id=media_type_id, - hash_digest=hash_digest, - created=created, - size=len(text_as_bytes), - text=text, - has_file=create_file, - ) - media.full_clean() - media.save() - - if create_file: - media.write_file(ContentFile(text_as_bytes)) - - return media - - -def get_or_create_file_media( - learning_package_id: int, - media_type_id: int, - /, - data: bytes, - created: datetime, -) -> Media: - """ - Get or create a Media with data stored in a file storage backend. - - Use this function to store non-text data, large data, or data where low - latency access is not necessary. Also use this function (or - ``get_or_create_text_media`` with ``create_file=True``) to store any - Media that you want to be downloadable by browsers in the LMS, since the - static asset serving system will only work with file-backed Media. - """ - hash_digest = create_hash_digest(data) - with atomic(): - try: - media = Media.objects.get( - learning_package_id=learning_package_id, - media_type_id=media_type_id, - hash_digest=hash_digest, - ) - except Media.DoesNotExist: - media = Media( - learning_package_id=learning_package_id, - media_type_id=media_type_id, - hash_digest=hash_digest, - created=created, - size=len(data), - text=None, - has_file=True, - ) - media.full_clean() - media.save() - - media.write_file(ContentFile(data)) - - return media - - -def get_media_info_headers(media: Media) -> dict[str, str]: - """ - Return HTTP headers that are specific to this Media. - - This currently only consists of the Content-Type and ETag. These values are - safe to cache. - """ - return { - "Content-Type": str(media.media_type), - "Etag": media.hash_digest, - } - - -def get_redirect_headers( - stored_file_path: str, - public: bool = False, - max_age: int | None = None, -) -> dict[str, str]: - """ - Return a dict of headers for file redirect and caching. - - This is a separate function from get_media_info_headers because the URLs - returned in these headers produced by this function should never be put into - the backend Django cache (redis/memcached). The `stored_file_path` location - *is* cacheable though–that's the actual storage location for the resource, - and not a link that could potentially expire. - - TODO: We need to add support for short-lived URL generation from the - stored_file_path. - """ - if public: - # If an asset is public, then let it be cached by the reverse-proxy and - # CDN, but do require that it be revalidated after the suggested max - # age. This would help us do things like take a URL that was mistakenly - # made public and make it require authentication. Fortunately, checking - # that the content is up to date is a cheap operation, since it just - # requires examining the Etag. - cache_directive = "must-revalidate" - - # Default to an hour of caching, to make it easier to tighten access - # later on. - max_age = max_age or (5 * 60) - else: - # If an asset is meant to be private, that means this response should - # not be cached by either the reverse-proxy or any CDN–it's only ever - # cached on the user's browser. This is what you'd use for very granular - # permissions checking, e.g. "only let them see this image if they have - # access to the Component it's associated with". Note that we're not - # doing ``Vary: Cookie`` because that would fill the reverse-proxy and - # CDN caches with a lot of redundant entries. - cache_directive = "private" - - # This only stays on the user's browser, so cache for a whole day. This - # is okay to do because Media data is typically immutable–i.e. if an - # asset actually changes, the user should be directed to a different URL - # for it. - max_age = max_age or (60 * 60 * 24) - - return { - "Cache-Control": f"max-age={max_age}, {cache_directive}", - "X-Accel-Redirect": stored_file_path, - } diff --git a/openedx_learning/apps/authoring/contents/apps.py b/openedx_learning/apps/authoring/contents/apps.py deleted file mode 100644 index 3c23bd4f5..000000000 --- a/openedx_learning/apps/authoring/contents/apps.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -Django metadata for the Contents Django application. -""" -from django.apps import AppConfig - - -class ContentsConfig(AppConfig): - """ - Configuration for the Contents Django application. - """ - - name = "openedx_learning.apps.authoring.contents" - verbose_name = "Learning Core > Authoring > Contents" - default_auto_field = "django.db.models.BigAutoField" - label = "oel_contents" diff --git a/openedx_learning/apps/authoring/contents/migrations/0001_initial.py b/openedx_learning/apps/authoring/contents/migrations/0001_initial.py deleted file mode 100644 index 1abf65adf..000000000 --- a/openedx_learning/apps/authoring/contents/migrations/0001_initial.py +++ /dev/null @@ -1,66 +0,0 @@ -# Generated by Django 3.2.23 on 2024-02-06 03:23 - -import django.core.validators -import django.db.models.deletion -from django.db import migrations, models - -import openedx_learning.lib.fields -import openedx_learning.lib.validators - - -class Migration(migrations.Migration): - - initial = True - - dependencies = [ - ('oel_publishing', '0001_initial'), - ] - - operations = [ - migrations.CreateModel( - name='Content', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('size', models.PositiveBigIntegerField(validators=[django.core.validators.MaxValueValidator(50000000)])), - ('hash_digest', models.CharField(editable=False, max_length=40)), - ('has_file', models.BooleanField()), - ('text', openedx_learning.lib.fields.MultiCollationTextField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=50000, null=True)), - ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), - ], - options={ - 'verbose_name': 'Content', - 'verbose_name_plural': 'Contents', - }, - ), - migrations.CreateModel( - name='MediaType', - fields=[ - ('id', models.AutoField(primary_key=True, serialize=False)), - ('type', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)), - ('sub_type', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)), - ('suffix', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)), - ], - ), - migrations.AddConstraint( - model_name='mediatype', - constraint=models.UniqueConstraint(fields=('type', 'sub_type', 'suffix'), name='oel_contents_uniq_t_st_sfx'), - ), - migrations.AddField( - model_name='content', - name='learning_package', - field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.learningpackage'), - ), - migrations.AddField( - model_name='content', - name='media_type', - field=models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, to='oel_contents.mediatype'), - ), - migrations.AddIndex( - model_name='content', - index=models.Index(fields=['learning_package', '-size'], name='oel_content_idx_lp_rsize'), - ), - migrations.AddConstraint( - model_name='content', - constraint=models.UniqueConstraint(fields=('learning_package', 'media_type', 'hash_digest'), name='oel_content_uniq_lc_media_type_hash_digest'), - ), - ] diff --git a/openedx_learning/apps/authoring/contents/migrations/0002_rename_content_media.py b/openedx_learning/apps/authoring/contents/migrations/0002_rename_content_media.py deleted file mode 100644 index 809afb276..000000000 --- a/openedx_learning/apps/authoring/contents/migrations/0002_rename_content_media.py +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by Django 5.2.9 on 2025-12-22 23:49 - -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - ('oel_components', '0004_remove_componentversioncontent_uuid'), - ('oel_contents', '0001_initial'), - ('oel_publishing', '0010_backfill_dependencies'), - ] - - operations = [ - migrations.RenameModel( - old_name='Content', - new_name='Media', - ), - migrations.AlterModelOptions( - name='media', - options={'verbose_name': 'Media', 'verbose_name_plural': 'Media'}, - ), - ] diff --git a/openedx_learning/apps/authoring/contents/migrations/0003_rename_index.py b/openedx_learning/apps/authoring/contents/migrations/0003_rename_index.py deleted file mode 100644 index 9612843db..000000000 --- a/openedx_learning/apps/authoring/contents/migrations/0003_rename_index.py +++ /dev/null @@ -1,27 +0,0 @@ -# Generated by Django 5.2.9 on 2025-12-23 19:09 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('oel_contents', '0002_rename_content_media'), - ('oel_publishing', '0010_backfill_dependencies'), - ] - - operations = [ - migrations.RemoveConstraint( - model_name='media', - name='oel_content_uniq_lc_media_type_hash_digest', - ), - migrations.RenameIndex( - model_name='media', - new_name='oel_media_idx_lp_rsize', - old_name='oel_content_idx_lp_rsize', - ), - migrations.AddConstraint( - model_name='media', - constraint=models.UniqueConstraint(fields=('learning_package', 'media_type', 'hash_digest'), name='oel_media_uniq_lc_media_type_hash_digest'), - ), - ] diff --git a/openedx_learning/apps/authoring/contents/migrations/__init__.py b/openedx_learning/apps/authoring/contents/migrations/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/openedx_learning/apps/authoring/contents/models.py b/openedx_learning/apps/authoring/contents/models.py deleted file mode 100644 index 4af53925b..000000000 --- a/openedx_learning/apps/authoring/contents/models.py +++ /dev/null @@ -1,410 +0,0 @@ -""" -These models are the most basic pieces of content we support. Think of them as -the simplest building blocks to store data with. They need to be composed into -more intelligent data models to be useful. -""" -from __future__ import annotations - -from functools import cache, cached_property -from logging import getLogger - -from django.conf import settings -from django.core.exceptions import ImproperlyConfigured, ValidationError -from django.core.files.base import File -from django.core.files.storage import Storage -from django.core.validators import MaxValueValidator -from django.db import models -from django.utils.module_loading import import_string - -from ....lib.fields import MultiCollationTextField, case_insensitive_char_field, hash_field, manual_date_time_field -from ....lib.managers import WithRelationsManager -from ..publishing.models import LearningPackage - -logger = getLogger() - -__all__ = [ - "MediaType", - "Media", -] - - -@cache -def get_storage() -> Storage: - """ - Return the Storage instance for our Content file persistence. - - This will first search for an OPENEDX_LEARNING config dictionary and return - a Storage subclass based on that configuration. - - If there is no value for the OPENEDX_LEARNING setting, we return the default - MEDIA storage class. TODO: Should we make it just error instead? - """ - config_dict = getattr(settings, 'OPENEDX_LEARNING', {}) - - if 'MEDIA' in config_dict: - storage_cls = import_string(config_dict['MEDIA']['BACKEND']) - options = config_dict['MEDIA'].get('OPTIONS', {}) - return storage_cls(**options) - - raise ImproperlyConfigured( - "Cannot access file storage: Missing the OPENEDX_LEARNING['MEDIA'] " - "setting, which should have a storage BACKEND and OPTIONS values for " - "a Storage subclass. These files should be stored in a location that " - "is NOT publicly accessible to browsers (so not in the MEDIA_ROOT)." - ) - - -class MediaType(models.Model): - """ - Stores Media types for use by Content models. - - This is the same as MIME types (the IANA renamed MIME Types to Media Types). - We don't pre-populate this table, so APIs that add Content must ensure that - the desired Media Type exists. - - Media types are written as {type}/{sub_type}+{suffix}, where suffixes are - seldom used. Examples: - - * application/json - * text/css - * image/svg+xml - * application/vnd.openedx.xblock.v1.problem+xml - - We have this as a separate model (instead of a field on Content) because: - - 1. We can save a lot on storage and indexing for Content if we're just - storing foreign key references there, rather than the entire content - string to be indexed. This is especially relevant for our (long) custom - types like "application/vnd.openedx.xblock.v1.problem+xml". - 2. These values can occasionally change. For instance, "text/javascript" vs. - "application/javascript". Also, we will be using a fair number of "vnd." - style of custom content types, and we may want the flexibility of - changing that without having to worry about migrating millions of rows of - Content. - """ - # We're going to have many foreign key references from Content into this - # model, and we don't need to store those as 8-byte BigAutoField, as is the - # default for this app. It's likely that a SmallAutoField would work, but I - # can just barely imagine using more than 32K Media types if we have a bunch - # of custom "vnd." entries, or start tracking suffixes and parameters. Which - # is how we end up at the 4-byte AutoField. - id = models.AutoField(primary_key=True) - - # Media types are denoted as {type}/{sub_type}+{suffix}. We currently do not - # support parameters. - - # Media type, e.g. "application", "text", "image". Per RFC 4288, this can be - # at most 127 chars long and is case insensitive. In practice, it's almost - # always written in lowercase. - type = case_insensitive_char_field(max_length=127, blank=False, null=False) - - # Media sub-type, e.g. "json", "css", "png". Per RFC 4288, this can be at - # most 127 chars long and is case insensitive. In practice, it's almost - # always written in lowercase. - sub_type = case_insensitive_char_field(max_length=127, blank=False, null=False) - - # Suffix, like "xml" (e.g. "image/svg+xml"). Usually blank. I couldn't find - # an RFC description of the length limit, and 127 is probably excessive. But - # this table should be small enough where it doesn't really matter. - suffix = case_insensitive_char_field(max_length=127, blank=True, null=False) - - class Meta: - constraints = [ - # Make sure all (type + sub_type + suffix) combinations are unique. - models.UniqueConstraint( - fields=[ - "type", - "sub_type", - "suffix", - ], - name="oel_contents_uniq_t_st_sfx", - ), - ] - - def __str__(self) -> str: - base = f"{self.type}/{self.sub_type}" - if self.suffix: - return f"{base}+{self.suffix}" - return base - - -class Media(models.Model): - """ - This is the most primitive piece of content data. - - This model serves to lookup, de-duplicate, and store text and files. A piece - of Content is identified purely by its data, the media type, and the - LearningPackage it is associated with. It has no version or file name - metadata associated with it. It exists to be a dumb blob of data that higher - level models like ComponentVersions can assemble together. - - # In-model Text vs. File - - That being said, the Content model does have some complexity to accomodate - different access patterns that we have in our app. In particular, it can - store data in two ways: the ``text`` field and a file (``has_file=True``) - A Content object must use at least one of these methods, but can use both if - it's appropriate. - - Use the ``text`` field when: - * the content is a relatively small (< 50K, usually much less) piece of text - * you want to do be able to query up update across many rows at once - * low, predictable latency is important - - Use file storage when: - * the content is large, or not text-based - * you want to be able to serve the file content directly to the browser - - The high level tradeoff is that ``text`` will give you faster access, and - file storage will give you a much more affordable and scalable backend. The - backend used for files will also eventually allow direct browser download - access, whereas the ``text`` field will not. But again, you can use both at - the same time if needed. - - # Association with a LearningPackage - - Content is associated with a specific LearningPackage. Doing so allows us to - more easily query for how much storge space a specific LearningPackage - (likely a library) is using, and to clean up unused data. - - When we get to borrowing Content across LearningPackages, it's likely that - we will want to copy them. That way, even if the originating LearningPackage - is deleted, it won't break other LearningPackages that are making use if it. - - # Media Types, and file duplication - - Content is almost 1:1 with the files that it pushes to a storage backend, - but not quite. The file locations are generated purely as a product of the - LearningPackage UUID and the Content's ``hash_digest``, but Content also - takes into account the ``media_type``. - - For example, say we had a Content with the following data: - - ["hello", "world"] - - That is legal syntax for both JSON and YAML. If you want to attach some - YAML-specific metadata in a new model, you could make it 1:1 with the - Content that matched the "application/yaml" media type. The YAML and JSON - versions of this data would be two separate Content rows that would share - the same ``hash_digest`` value. If they both stored a file, they would be - pointing to the same file location. If they only used the ``text`` field, - then that value would be duplicated across the two separate Content rows. - - The alternative would have been to associate media types at the level where - this data was being added to a ComponentVersion, but that would have added - more complexity. Right now, you could make an ImageContent 1:1 model that - analyzed images and created metatdata entries for them (dimensions, GPS) - without having to understand how ComponentVerisons work. - - This is definitely an edge case, and it's likely the only time collisions - like this will happen in practice is with blank files. It also means that - using this table to measure disk usage may be slightly inaccurate when used - in a LearningPackage with collisions–though we expect to use numbers like - that mostly to get a broad sense of usage and look for major outliers, - rather than for byte-level accuracy (it wouldn't account for the non-trivial - indexing storage costs either). - - # Immutability - - From the outside, Content should appear immutable. Since the Content is - looked up by a hash of its data, a change in the data means that we should - look up the hash value of that new data and create a new Content if we don't - find a match. - - That being said, the Content model has different ways of storing that data, - and that is mutable. We could decide that a certain type of Content should - be optimized to store its text in the table. Or that a content type that we - had previously only stored as text now also needs to be stored on in the - file storage backend so that it can be made available to be downloaded. - These operations would be done as data migrations. - - # Extensibility - - Third-party apps are encouraged to create models that have a OneToOneField - relationship with Content. For instance, an ImageContent model might join - 1:1 with all Content that has image/* media types, and provide additional - metadata for that data. - """ - # Max size of the file. - MAX_FILE_SIZE = 50_000_000 - - # 50K is our limit for text data, like OLX. This means 50K *characters*, - # not bytes. Since UTF-8 encodes characters using as many as 4 bytes, this - # could be as much as 200K of data if we had nothing but emojis. - MAX_TEXT_LENGTH = 50_000 - - objects: models.Manager[Media] = WithRelationsManager('media_type') - - learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) - - # What is the Media type (a.k.a. MIME type) of this data? - media_type = models.ForeignKey(MediaType, on_delete=models.PROTECT) - - # This is the size of the file in bytes. This can be different than the - # character length of a text file, since UTF-8 encoding can use anywhere - # between 1-4 bytes to represent any given character. - size = models.PositiveBigIntegerField( - validators=[MaxValueValidator(MAX_FILE_SIZE)], - ) - - # This hash value may be calculated using create_hash_digest from the - # openedx.lib.fields module. When storing text, we hash the UTF-8 - # encoding of that text value, regardless of whether we also write it to a - # file or not. When storing just a file, we hash the bytes in the file. - hash_digest = hash_field() - - # Do we have file data stored for this Content in our file storage backend? - # We use has_file instead of a FileField because it's more space efficient. - # The location of a Content's file data is derivable from the Learning - # Package's UUID and the hash of the Content. There's no need to waste that - # space to encode it in every row. - has_file = models.BooleanField() - - # The ``text`` field contains the text representation of the Content, if - # it is available. A blank value means means that we are storing text for - # this Content, and that text happens to be an empty string. A null value - # here means that we are not storing any text here, and the Content exists - # only in file form. It is an error for ``text`` to be None and ``has_file`` - # to be False, since that would mean we haven't stored data anywhere at all. - # - text = MultiCollationTextField( - blank=True, - null=True, - max_length=MAX_TEXT_LENGTH, - # We don't really expect to ever sort by the text column, but we may - # want to do case-insensitive searches, so it's useful to have a case - # and accent insensitive collation. - db_collations={ - "sqlite": "NOCASE", - "mysql": "utf8mb4_unicode_ci", - } - ) - - # This should be manually set so that multiple Content rows being set in - # the same transaction are created with the same timestamp. The timestamp - # should be UTC. - created = manual_date_time_field() - - @cached_property - def mime_type(self) -> str: - """ - The IANA media type (a.k.a. MIME type) of the Content, in string form. - - MIME types reference: - https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types - """ - return str(self.media_type) - - @cached_property - def path(self): - """ - Logical path at which this content is stored (or would be stored). - - This path is relative to OPENEDX_LEARNING['MEDIA'] configured storage - root. This file may not exist because has_file=False, or because we - haven't written the file yet (this is the method we call when trying to - figure out where the file *should* go). - """ - return f"content/{self.learning_package.uuid}/{self.hash_digest}" - - def os_path(self): - """ - The full OS path for the underlying file for this Content. - - This will not be supported by all Storage class types. - - This will return ``None`` if there is no backing file (has_file=False). - """ - try: - if self.has_file: - return get_storage().path(self.path) - except NotImplementedError: - logger.warning("Storage backend does not support path()") - return None - - def read_file(self) -> File: - """ - Get a File object that has been open for reading. - - We intentionally don't expose an `open()` call where callers can open - this file in write mode. Writing a Content file should happen at most - once, and the logic is not obvious (see ``write_file``). - - At the end of the day, the caller can close the returned File and reopen - it in whatever mode they want, but we're trying to gently discourage - that kind of usage. - """ - return get_storage().open(self.path, 'rb') - - def write_file(self, file: File) -> None: - """ - Write file contents to the file storage backend. - - This function does nothing if the file already exists. Note that Content - is supposed to be immutable, so this should normally only be called once - for a given Content row. - """ - storage = get_storage() - - # There are two reasons why a file might already exist even if the the - # Content row is new: - # - # 1. We tried adding the file earlier, but an error rolled back the - # state of the database. The file storage system isn't covered by any - # sort of transaction semantics, so it won't get rolled back. - # - # 2. The Content is of a different MediaType. The same exact bytes can - # be two logically separate Content entries if they are different file - # types. This lets other models add data to Content via 1:1 relations by - # ContentType (e.g. all SRT files). This is definitely an edge case. - # - # 3. Similar to (2), but only part of the file was written before an - # error occurred. This seems unlikely, but possible if the underlying - # storage engine writes in chunks. - if storage.exists(self.path) and storage.size(self.path) == file.size: - return - storage.save(self.path, file) - - def file_url(self) -> str: - """ - This will sometimes be a time-limited signed URL. - """ - return get_storage().url(self.path) - - def clean(self): - """ - Make sure we're actually storing *something*. - - If this Content has neither a file or text data associated with it, - it's in a broken/useless state and shouldn't be saved. - """ - if (not self.has_file) and (self.text is None): - raise ValidationError( - f"Content {self.pk} with hash {self.hash_digest} must either " - "set a string value for 'text', or it must set has_file=True " - "(or both)." - ) - - class Meta: - constraints = [ - # Make sure we don't store duplicates of this raw data within the - # same LearningPackage, unless they're of different MIME types. - models.UniqueConstraint( - fields=[ - "learning_package", - "media_type", - "hash_digest", - ], - name="oel_media_uniq_lc_media_type_hash_digest", - ), - ] - indexes = [ - # LearningPackage (reverse) Size Index: - # * Find the largest Content entries. - models.Index( - fields=["learning_package", "-size"], - name="oel_media_idx_lp_rsize", - ), - ] - verbose_name = "Media" - verbose_name_plural = "Media" diff --git a/projects/dev.py b/projects/dev.py index 1b3b42f47..f233b9b84 100644 --- a/projects/dev.py +++ b/projects/dev.py @@ -33,7 +33,7 @@ # Learning Core Apps "openedx_learning.apps.authoring.collections.apps.CollectionsConfig", "openedx_learning.apps.authoring.components.apps.ComponentsConfig", - "openedx_learning.apps.authoring.contents.apps.ContentsConfig", + "openedx_learning.apps.authoring.media.apps.ContentsConfig", "openedx_learning.apps.authoring.publishing.apps.PublishingConfig", "openedx_learning.apps.authoring.sections.apps.SectionsConfig", "openedx_learning.apps.authoring.subsections.apps.SubsectionsConfig", diff --git a/test_settings.py b/test_settings.py index c4b22926d..24129e2de 100644 --- a/test_settings.py +++ b/test_settings.py @@ -29,7 +29,7 @@ def root(*args): # If you provision the 'oel'@'%' with broad permissions on your MySQL instance, # running the tests will auto-generate a database for running tests. This is # slower than the default sqlite3 setup above, but it's sometimes helpful for -# finding things that only break in CI. +# finding things that only break in CI. # # DATABASES = { # "default": { @@ -55,15 +55,15 @@ def root(*args): # django-rules based authorization 'rules.apps.AutodiscoverRulesConfig', # Our own apps - "openedx_learning.apps.authoring.collections.apps.CollectionsConfig", - "openedx_learning.apps.authoring.components.apps.ComponentsConfig", - "openedx_learning.apps.authoring.contents.apps.ContentsConfig", - "openedx_learning.apps.authoring.publishing.apps.PublishingConfig", - "openedx_tagging.core.tagging.apps.TaggingConfig", - "openedx_learning.apps.authoring.sections.apps.SectionsConfig", - "openedx_learning.apps.authoring.subsections.apps.SubsectionsConfig", - "openedx_learning.apps.authoring.units.apps.UnitsConfig", - "openedx_learning.apps.authoring.backup_restore.apps.BackupRestoreConfig", + "openedx_learning.apps.authoring.collections", + "openedx_learning.apps.authoring.components", + "openedx_learning.apps.authoring.media", + "openedx_learning.apps.authoring.publishing", + "openedx_tagging.core.tagging", + "openedx_learning.apps.authoring.sections", + "openedx_learning.apps.authoring.subsections", + "openedx_learning.apps.authoring.units", + "openedx_learning.apps.authoring.backup_restore", ] AUTHENTICATION_BACKENDS = [ diff --git a/tests/openedx_learning/apps/authoring/components/test_api.py b/tests/openedx_learning/apps/authoring/components/test_api.py index ec98786e9..fd5780ded 100644 --- a/tests/openedx_learning/apps/authoring/components/test_api.py +++ b/tests/openedx_learning/apps/authoring/components/test_api.py @@ -11,8 +11,8 @@ from openedx_learning.apps.authoring.collections.models import Collection from openedx_learning.apps.authoring.components import api as components_api from openedx_learning.apps.authoring.components.models import Component, ComponentType -from openedx_learning.apps.authoring.contents import api as contents_api -from openedx_learning.apps.authoring.contents.models import MediaType +from openedx_learning.apps.authoring.media import api as contents_api +from openedx_learning.apps.authoring.media.models import MediaType from openedx_learning.apps.authoring.publishing import api as publishing_api from openedx_learning.apps.authoring.publishing.models import LearningPackage from openedx_learning.lib.test_utils import TestCase diff --git a/tests/openedx_learning/apps/authoring/components/test_assets.py b/tests/openedx_learning/apps/authoring/components/test_assets.py index 275f7b88d..611653048 100644 --- a/tests/openedx_learning/apps/authoring/components/test_assets.py +++ b/tests/openedx_learning/apps/authoring/components/test_assets.py @@ -7,7 +7,7 @@ from openedx_learning.apps.authoring.components import api as components_api from openedx_learning.apps.authoring.components.api import AssetError -from openedx_learning.apps.authoring.contents import api as contents_api +from openedx_learning.apps.authoring.media import api as contents_api from openedx_learning.apps.authoring.publishing import api as publishing_api from openedx_learning.apps.authoring.publishing.models import LearningPackage from openedx_learning.lib.test_utils import TestCase diff --git a/tests/openedx_learning/apps/authoring/contents/__init__.py b/tests/openedx_learning/apps/authoring/contents/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/openedx_learning/apps/authoring/contents/test_file_storage.py b/tests/openedx_learning/apps/authoring/contents/test_file_storage.py deleted file mode 100644 index 5df00df65..000000000 --- a/tests/openedx_learning/apps/authoring/contents/test_file_storage.py +++ /dev/null @@ -1,79 +0,0 @@ -""" -Tests for file-backed Content -""" -from datetime import datetime, timezone - -from django.conf import settings -from django.core.exceptions import ImproperlyConfigured -from django.test import override_settings - -from openedx_learning.apps.authoring.contents import api as contents_api -from openedx_learning.apps.authoring.contents.models import get_storage -from openedx_learning.apps.authoring.publishing import api as publishing_api -from openedx_learning.lib.test_utils import TestCase - - -class ContentFileStorageTestCase(TestCase): - """ - Test the storage of files backing Content. - """ - def setUp(self) -> None: - """ - It's actually important that we use setUp and not setUpTestData here, - because at least one of our tests will clear the get_storage cache, - meaning that subsequent tests will get a new instance of the - InMemoryStorage backend–and consequently wouldn't see any data loaded - by setUpTestData. - - Recreating the test data for every test lets individual tests change the - storage configuration without creating side-effects for other tests. - """ - super().setUp() - learning_package = publishing_api.create_learning_package( - key="ContentFileStorageTestCase-test-key", - title="Content File Storage Test Case Learning Package", - ) - self.html_media_type = contents_api.get_or_create_media_type("text/html") - self.html_content = contents_api.get_or_create_file_media( - learning_package.id, - self.html_media_type.id, - data=b"hello world!", - created=datetime.now(tz=timezone.utc), - ) - - def test_file_path(self): - """ - Test that the file path doesn't change. - - If this test breaks, it means that we've changed the convention for - where we're storing the backing files for Content, which means we'll be - breaking backwards compatibility for everyone. Please be very careful if - you're updating this test. - """ - content = self.html_content - assert content.path == f"content/{content.learning_package.uuid}/{content.hash_digest}" - - storage_root = settings.OPENEDX_LEARNING['MEDIA']['OPTIONS']['location'] - assert content.os_path() == f"{storage_root}/{content.path}" - - def test_read(self): - """Make sure we can read the file data back.""" - assert b"hello world!" == self.html_content.read_file().read() - - @override_settings() - def test_misconfiguration(self): - """ - Require the OPENEDX_LEARNING setting for file operations. - - The main goal of this is that we don't want to store our files in the - default MEDIA_ROOT, because that would make them publicly accessible. - - We set our OPENEDX_LEARNING value in test_settings.py. We're going to - delete this setting in our test and make sure we raise the correct - exception (The @override_settings decorator will set everything back to - normal after the test completes.) - """ - get_storage.cache_clear() - del settings.OPENEDX_LEARNING - with self.assertRaises(ImproperlyConfigured): - self.html_content.read_file() diff --git a/tests/openedx_learning/apps/authoring/contents/test_media_types.py b/tests/openedx_learning/apps/authoring/contents/test_media_types.py deleted file mode 100644 index 6f9b16b30..000000000 --- a/tests/openedx_learning/apps/authoring/contents/test_media_types.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -A few tests to make sure our MediaType lookups are working as expected. -""" -from openedx_learning.apps.authoring.contents import api as contents_api -from openedx_learning.lib.test_utils import TestCase - - -class MediaTypeTest(TestCase): - """Basic testing of our Media Types for Content""" - - def test_get_or_create_dedupe(self): - """ - Make sure we're not creating redundant rows for the same media type. - """ - # The first time, a row is created for "text/html" - text_media_type_1 = contents_api.get_or_create_media_type("text/plain") - - # This should return the previously created row. - text_media_type_2 = contents_api.get_or_create_media_type("text/plain") - assert text_media_type_1 == text_media_type_2 - - # This is a different type though... - svg_media_type = contents_api.get_or_create_media_type("image/svg+xml") - assert text_media_type_1 != svg_media_type From 0102255e0c194874c9a6f3b3fd267dd31b2296e5 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 25 Dec 2025 00:04:04 -0500 Subject: [PATCH 6/6] refactor: the other half the media refactor --- .gitignore | 4 +- .../apps/authoring/media/__init__.py | 0 .../apps/authoring/media/admin.py | 71 +++ openedx_learning/apps/authoring/media/api.py | 239 ++++++++++ openedx_learning/apps/authoring/media/apps.py | 15 + .../media/migrations/0001_initial.py | 66 +++ .../migrations/0002_rename_content_media.py | 23 + .../media/migrations/0003_rename_index.py | 27 ++ .../authoring/media/migrations/__init__.py | 0 .../apps/authoring/media/models.py | 410 ++++++++++++++++++ .../apps/authoring/media/__init__.py | 0 .../apps/authoring/media/test_file_storage.py | 79 ++++ .../apps/authoring/media/test_media_types.py | 24 + 13 files changed, 956 insertions(+), 2 deletions(-) create mode 100644 openedx_learning/apps/authoring/media/__init__.py create mode 100644 openedx_learning/apps/authoring/media/admin.py create mode 100644 openedx_learning/apps/authoring/media/api.py create mode 100644 openedx_learning/apps/authoring/media/apps.py create mode 100644 openedx_learning/apps/authoring/media/migrations/0001_initial.py create mode 100644 openedx_learning/apps/authoring/media/migrations/0002_rename_content_media.py create mode 100644 openedx_learning/apps/authoring/media/migrations/0003_rename_index.py create mode 100644 openedx_learning/apps/authoring/media/migrations/__init__.py create mode 100644 openedx_learning/apps/authoring/media/models.py create mode 100644 tests/openedx_learning/apps/authoring/media/__init__.py create mode 100644 tests/openedx_learning/apps/authoring/media/test_file_storage.py create mode 100644 tests/openedx_learning/apps/authoring/media/test_media_types.py diff --git a/.gitignore b/.gitignore index 6123dd4cf..e7d722454 100644 --- a/.gitignore +++ b/.gitignore @@ -75,7 +75,7 @@ venv/ !.vscode/settings.json.example # Media files (for uploads) -media/ +/media/ # Media files generated during test runs -test_media/ +/test_media/ diff --git a/openedx_learning/apps/authoring/media/__init__.py b/openedx_learning/apps/authoring/media/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openedx_learning/apps/authoring/media/admin.py b/openedx_learning/apps/authoring/media/admin.py new file mode 100644 index 000000000..57cf254cb --- /dev/null +++ b/openedx_learning/apps/authoring/media/admin.py @@ -0,0 +1,71 @@ +""" +Django admin for contents models +""" +import base64 + +from django.contrib import admin +from django.utils.html import format_html + +from openedx_learning.lib.admin_utils import ReadOnlyModelAdmin + +from .models import Media + + +@admin.register(Media) +class MediaAdmin(ReadOnlyModelAdmin): + """ + Django admin for Content model + """ + list_display = [ + "hash_digest", + "learning_package", + "media_type", + "size", + "created", + "has_file", + ] + fields = [ + "learning_package", + "hash_digest", + "media_type", + "size", + "created", + "has_file", + "path", + "os_path", + "text_preview", + "image_preview", + ] + list_filter = ("media_type", "learning_package") + search_fields = ("hash_digest",) + + @admin.display(description="OS Path") + def os_path(self, content: Media): + return content.os_path() or "" + + def path(self, content: Media): + return content.path if content.has_file else "" + + def text_preview(self, content: Media): + if not content.text: + return "" + return format_html( + '
\n{}\n
', + content.text, + ) + + def image_preview(self, content: Media): + """ + Return HTML for an image, if that is the underlying Content. + + Otherwise, just return a blank string. + """ + if content.media_type.type != "image": + return "" + + data = content.read_file().read() + return format_html( + '', + content.mime_type, + base64.encodebytes(data).decode('utf8'), + ) diff --git a/openedx_learning/apps/authoring/media/api.py b/openedx_learning/apps/authoring/media/api.py new file mode 100644 index 000000000..1baa1d7a1 --- /dev/null +++ b/openedx_learning/apps/authoring/media/api.py @@ -0,0 +1,239 @@ +""" +Low Level Media API (warning: UNSTABLE, in progress API) + +Please look at the models.py file for more information about the kinds of data +are stored in this app. +""" +from __future__ import annotations + +from datetime import datetime +from logging import getLogger + +from django.core.files.base import ContentFile +from django.db.transaction import atomic + +from ....lib.fields import create_hash_digest +from .models import Media, MediaType + +# The public API that will be re-exported by openedx_learning.apps.authoring.api +# is listed in the __all__ entries below. Internal helper functions that are +# private to this module should start with an underscore. If a function does not +# start with an underscore AND it is not in __all__, that function is considered +# to be callable only by other apps in the authoring package. +__all__ = [ + "get_or_create_media_type", + "get_media", + "get_media_info_headers", + "get_or_create_text_media", + "get_or_create_file_media", +] + + +log = getLogger() + + +def get_or_create_media_type(mime_type: str) -> MediaType: + """ + Return the MediaType.id for the desired mime_type string. + + If it is not found in the database, a new entry will be created for it. This + lazy-writing means that MediaType entry IDs will *not* be the same across + different server instances, and apps should not assume that will be the + case. Even if we were to preload a bunch of common ones, we can't anticipate + the different XBlocks that will be installed in different server instances, + each of which will use their own MediaType. + + Caching Warning: Be careful about putting any caching decorator around this + function (e.g. ``lru_cache``). It's possible that incorrect cache values + could leak out in the event of a rollback–e.g. new types are introduced in + a large import transaction which later fails. You can safely cache the + results that come back from this function with a local dict in your import + process instead. + """ + if "+" in mime_type: + base, suffix = mime_type.split("+") + else: + base = mime_type + suffix = "" + + main_type, sub_type = base.split("/") + media_type, _created = MediaType.objects.get_or_create( + type=main_type, + sub_type=sub_type, + suffix=suffix, + ) + + return media_type + + +def get_media(media_id: int, /) -> Media: + """ + Get a single Media object by its ID. + + Media is always attached to something when it's created, like to a + ComponentVersion. That means the "right" way to access a Media is almost + always going to be via those relations and not via this function. But I + include this function anyway because it's tiny to write and it's better than + someone using a get_or_create_* function when they really just want to get. + """ + return Media.objects.get(id=media_id) + + +def get_or_create_text_media( + learning_package_id: int, + media_type_id: int, + /, + text: str, + created: datetime, + create_file: bool = False, +) -> Media: + """ + Get or create a Media entry with text data stored in the database. + + Use this when you want to create relatively small chunks of text that need + to be accessed quickly, especially if you're pulling back multiple rows at + once. For example, this is the function to call when storing OLX for a + component XBlock like a ProblemBlock. + + This function will *always* create a text entry in the database. In addition + to this, if you specify ``create_file=True``, it will also save a copy of + that text data to the file storage backend. This is useful if we want to let + that file be downloadable by browsers in the LMS at some point. + + If you want to create a large text file, or want to create a text file that + doesn't need to be stored in the database, call ``create_file_media`` + instead of this function. + """ + text_as_bytes = text.encode('utf-8') + hash_digest = create_hash_digest(text_as_bytes) + + with atomic(): + try: + media = Media.objects.get( + learning_package_id=learning_package_id, + media_type_id=media_type_id, + hash_digest=hash_digest, + ) + except Media.DoesNotExist: + media = Media( + learning_package_id=learning_package_id, + media_type_id=media_type_id, + hash_digest=hash_digest, + created=created, + size=len(text_as_bytes), + text=text, + has_file=create_file, + ) + media.full_clean() + media.save() + + if create_file: + media.write_file(ContentFile(text_as_bytes)) + + return media + + +def get_or_create_file_media( + learning_package_id: int, + media_type_id: int, + /, + data: bytes, + created: datetime, +) -> Media: + """ + Get or create a Media with data stored in a file storage backend. + + Use this function to store non-text data, large data, or data where low + latency access is not necessary. Also use this function (or + ``get_or_create_text_media`` with ``create_file=True``) to store any + Media that you want to be downloadable by browsers in the LMS, since the + static asset serving system will only work with file-backed Media. + """ + hash_digest = create_hash_digest(data) + with atomic(): + try: + media = Media.objects.get( + learning_package_id=learning_package_id, + media_type_id=media_type_id, + hash_digest=hash_digest, + ) + except Media.DoesNotExist: + media = Media( + learning_package_id=learning_package_id, + media_type_id=media_type_id, + hash_digest=hash_digest, + created=created, + size=len(data), + text=None, + has_file=True, + ) + media.full_clean() + media.save() + + media.write_file(ContentFile(data)) + + return media + + +def get_media_info_headers(media: Media) -> dict[str, str]: + """ + Return HTTP headers that are specific to this Media. + + This currently only consists of the Content-Type and ETag. These values are + safe to cache. + """ + return { + "Content-Type": str(media.media_type), + "Etag": media.hash_digest, + } + + +def get_redirect_headers( + stored_file_path: str, + public: bool = False, + max_age: int | None = None, +) -> dict[str, str]: + """ + Return a dict of headers for file redirect and caching. + + This is a separate function from get_media_info_headers because the URLs + returned in these headers produced by this function should never be put into + the backend Django cache (redis/memcached). The `stored_file_path` location + *is* cacheable though–that's the actual storage location for the resource, + and not a link that could potentially expire. + + TODO: We need to add support for short-lived URL generation from the + stored_file_path. + """ + if public: + # If an asset is public, then let it be cached by the reverse-proxy and + # CDN, but do require that it be revalidated after the suggested max + # age. This would help us do things like take a URL that was mistakenly + # made public and make it require authentication. Fortunately, checking + # that the content is up to date is a cheap operation, since it just + # requires examining the Etag. + cache_directive = "must-revalidate" + + # Default to an hour of caching, to make it easier to tighten access + # later on. + max_age = max_age or (5 * 60) + else: + # If an asset is meant to be private, that means this response should + # not be cached by either the reverse-proxy or any CDN–it's only ever + # cached on the user's browser. This is what you'd use for very granular + # permissions checking, e.g. "only let them see this image if they have + # access to the Component it's associated with". Note that we're not + # doing ``Vary: Cookie`` because that would fill the reverse-proxy and + # CDN caches with a lot of redundant entries. + cache_directive = "private" + + # This only stays on the user's browser, so cache for a whole day. This + # is okay to do because Media data is typically immutable–i.e. if an + # asset actually changes, the user should be directed to a different URL + # for it. + max_age = max_age or (60 * 60 * 24) + + return { + "Cache-Control": f"max-age={max_age}, {cache_directive}", + "X-Accel-Redirect": stored_file_path, + } diff --git a/openedx_learning/apps/authoring/media/apps.py b/openedx_learning/apps/authoring/media/apps.py new file mode 100644 index 000000000..e0168b663 --- /dev/null +++ b/openedx_learning/apps/authoring/media/apps.py @@ -0,0 +1,15 @@ +""" +Django metadata for the Contents Django application. +""" +from django.apps import AppConfig + + +class ContentsConfig(AppConfig): + """ + Configuration for the Contents Django application. + """ + + name = "openedx_learning.apps.authoring.media" + verbose_name = "Learning Core > Authoring > Media" + default_auto_field = "django.db.models.BigAutoField" + label = "oel_contents" diff --git a/openedx_learning/apps/authoring/media/migrations/0001_initial.py b/openedx_learning/apps/authoring/media/migrations/0001_initial.py new file mode 100644 index 000000000..1abf65adf --- /dev/null +++ b/openedx_learning/apps/authoring/media/migrations/0001_initial.py @@ -0,0 +1,66 @@ +# Generated by Django 3.2.23 on 2024-02-06 03:23 + +import django.core.validators +import django.db.models.deletion +from django.db import migrations, models + +import openedx_learning.lib.fields +import openedx_learning.lib.validators + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('oel_publishing', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Content', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('size', models.PositiveBigIntegerField(validators=[django.core.validators.MaxValueValidator(50000000)])), + ('hash_digest', models.CharField(editable=False, max_length=40)), + ('has_file', models.BooleanField()), + ('text', openedx_learning.lib.fields.MultiCollationTextField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=50000, null=True)), + ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), + ], + options={ + 'verbose_name': 'Content', + 'verbose_name_plural': 'Contents', + }, + ), + migrations.CreateModel( + name='MediaType', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('type', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)), + ('sub_type', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)), + ('suffix', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)), + ], + ), + migrations.AddConstraint( + model_name='mediatype', + constraint=models.UniqueConstraint(fields=('type', 'sub_type', 'suffix'), name='oel_contents_uniq_t_st_sfx'), + ), + migrations.AddField( + model_name='content', + name='learning_package', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.learningpackage'), + ), + migrations.AddField( + model_name='content', + name='media_type', + field=models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, to='oel_contents.mediatype'), + ), + migrations.AddIndex( + model_name='content', + index=models.Index(fields=['learning_package', '-size'], name='oel_content_idx_lp_rsize'), + ), + migrations.AddConstraint( + model_name='content', + constraint=models.UniqueConstraint(fields=('learning_package', 'media_type', 'hash_digest'), name='oel_content_uniq_lc_media_type_hash_digest'), + ), + ] diff --git a/openedx_learning/apps/authoring/media/migrations/0002_rename_content_media.py b/openedx_learning/apps/authoring/media/migrations/0002_rename_content_media.py new file mode 100644 index 000000000..809afb276 --- /dev/null +++ b/openedx_learning/apps/authoring/media/migrations/0002_rename_content_media.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.9 on 2025-12-22 23:49 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_components', '0004_remove_componentversioncontent_uuid'), + ('oel_contents', '0001_initial'), + ('oel_publishing', '0010_backfill_dependencies'), + ] + + operations = [ + migrations.RenameModel( + old_name='Content', + new_name='Media', + ), + migrations.AlterModelOptions( + name='media', + options={'verbose_name': 'Media', 'verbose_name_plural': 'Media'}, + ), + ] diff --git a/openedx_learning/apps/authoring/media/migrations/0003_rename_index.py b/openedx_learning/apps/authoring/media/migrations/0003_rename_index.py new file mode 100644 index 000000000..9612843db --- /dev/null +++ b/openedx_learning/apps/authoring/media/migrations/0003_rename_index.py @@ -0,0 +1,27 @@ +# Generated by Django 5.2.9 on 2025-12-23 19:09 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_contents', '0002_rename_content_media'), + ('oel_publishing', '0010_backfill_dependencies'), + ] + + operations = [ + migrations.RemoveConstraint( + model_name='media', + name='oel_content_uniq_lc_media_type_hash_digest', + ), + migrations.RenameIndex( + model_name='media', + new_name='oel_media_idx_lp_rsize', + old_name='oel_content_idx_lp_rsize', + ), + migrations.AddConstraint( + model_name='media', + constraint=models.UniqueConstraint(fields=('learning_package', 'media_type', 'hash_digest'), name='oel_media_uniq_lc_media_type_hash_digest'), + ), + ] diff --git a/openedx_learning/apps/authoring/media/migrations/__init__.py b/openedx_learning/apps/authoring/media/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openedx_learning/apps/authoring/media/models.py b/openedx_learning/apps/authoring/media/models.py new file mode 100644 index 000000000..4af53925b --- /dev/null +++ b/openedx_learning/apps/authoring/media/models.py @@ -0,0 +1,410 @@ +""" +These models are the most basic pieces of content we support. Think of them as +the simplest building blocks to store data with. They need to be composed into +more intelligent data models to be useful. +""" +from __future__ import annotations + +from functools import cache, cached_property +from logging import getLogger + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured, ValidationError +from django.core.files.base import File +from django.core.files.storage import Storage +from django.core.validators import MaxValueValidator +from django.db import models +from django.utils.module_loading import import_string + +from ....lib.fields import MultiCollationTextField, case_insensitive_char_field, hash_field, manual_date_time_field +from ....lib.managers import WithRelationsManager +from ..publishing.models import LearningPackage + +logger = getLogger() + +__all__ = [ + "MediaType", + "Media", +] + + +@cache +def get_storage() -> Storage: + """ + Return the Storage instance for our Content file persistence. + + This will first search for an OPENEDX_LEARNING config dictionary and return + a Storage subclass based on that configuration. + + If there is no value for the OPENEDX_LEARNING setting, we return the default + MEDIA storage class. TODO: Should we make it just error instead? + """ + config_dict = getattr(settings, 'OPENEDX_LEARNING', {}) + + if 'MEDIA' in config_dict: + storage_cls = import_string(config_dict['MEDIA']['BACKEND']) + options = config_dict['MEDIA'].get('OPTIONS', {}) + return storage_cls(**options) + + raise ImproperlyConfigured( + "Cannot access file storage: Missing the OPENEDX_LEARNING['MEDIA'] " + "setting, which should have a storage BACKEND and OPTIONS values for " + "a Storage subclass. These files should be stored in a location that " + "is NOT publicly accessible to browsers (so not in the MEDIA_ROOT)." + ) + + +class MediaType(models.Model): + """ + Stores Media types for use by Content models. + + This is the same as MIME types (the IANA renamed MIME Types to Media Types). + We don't pre-populate this table, so APIs that add Content must ensure that + the desired Media Type exists. + + Media types are written as {type}/{sub_type}+{suffix}, where suffixes are + seldom used. Examples: + + * application/json + * text/css + * image/svg+xml + * application/vnd.openedx.xblock.v1.problem+xml + + We have this as a separate model (instead of a field on Content) because: + + 1. We can save a lot on storage and indexing for Content if we're just + storing foreign key references there, rather than the entire content + string to be indexed. This is especially relevant for our (long) custom + types like "application/vnd.openedx.xblock.v1.problem+xml". + 2. These values can occasionally change. For instance, "text/javascript" vs. + "application/javascript". Also, we will be using a fair number of "vnd." + style of custom content types, and we may want the flexibility of + changing that without having to worry about migrating millions of rows of + Content. + """ + # We're going to have many foreign key references from Content into this + # model, and we don't need to store those as 8-byte BigAutoField, as is the + # default for this app. It's likely that a SmallAutoField would work, but I + # can just barely imagine using more than 32K Media types if we have a bunch + # of custom "vnd." entries, or start tracking suffixes and parameters. Which + # is how we end up at the 4-byte AutoField. + id = models.AutoField(primary_key=True) + + # Media types are denoted as {type}/{sub_type}+{suffix}. We currently do not + # support parameters. + + # Media type, e.g. "application", "text", "image". Per RFC 4288, this can be + # at most 127 chars long and is case insensitive. In practice, it's almost + # always written in lowercase. + type = case_insensitive_char_field(max_length=127, blank=False, null=False) + + # Media sub-type, e.g. "json", "css", "png". Per RFC 4288, this can be at + # most 127 chars long and is case insensitive. In practice, it's almost + # always written in lowercase. + sub_type = case_insensitive_char_field(max_length=127, blank=False, null=False) + + # Suffix, like "xml" (e.g. "image/svg+xml"). Usually blank. I couldn't find + # an RFC description of the length limit, and 127 is probably excessive. But + # this table should be small enough where it doesn't really matter. + suffix = case_insensitive_char_field(max_length=127, blank=True, null=False) + + class Meta: + constraints = [ + # Make sure all (type + sub_type + suffix) combinations are unique. + models.UniqueConstraint( + fields=[ + "type", + "sub_type", + "suffix", + ], + name="oel_contents_uniq_t_st_sfx", + ), + ] + + def __str__(self) -> str: + base = f"{self.type}/{self.sub_type}" + if self.suffix: + return f"{base}+{self.suffix}" + return base + + +class Media(models.Model): + """ + This is the most primitive piece of content data. + + This model serves to lookup, de-duplicate, and store text and files. A piece + of Content is identified purely by its data, the media type, and the + LearningPackage it is associated with. It has no version or file name + metadata associated with it. It exists to be a dumb blob of data that higher + level models like ComponentVersions can assemble together. + + # In-model Text vs. File + + That being said, the Content model does have some complexity to accomodate + different access patterns that we have in our app. In particular, it can + store data in two ways: the ``text`` field and a file (``has_file=True``) + A Content object must use at least one of these methods, but can use both if + it's appropriate. + + Use the ``text`` field when: + * the content is a relatively small (< 50K, usually much less) piece of text + * you want to do be able to query up update across many rows at once + * low, predictable latency is important + + Use file storage when: + * the content is large, or not text-based + * you want to be able to serve the file content directly to the browser + + The high level tradeoff is that ``text`` will give you faster access, and + file storage will give you a much more affordable and scalable backend. The + backend used for files will also eventually allow direct browser download + access, whereas the ``text`` field will not. But again, you can use both at + the same time if needed. + + # Association with a LearningPackage + + Content is associated with a specific LearningPackage. Doing so allows us to + more easily query for how much storge space a specific LearningPackage + (likely a library) is using, and to clean up unused data. + + When we get to borrowing Content across LearningPackages, it's likely that + we will want to copy them. That way, even if the originating LearningPackage + is deleted, it won't break other LearningPackages that are making use if it. + + # Media Types, and file duplication + + Content is almost 1:1 with the files that it pushes to a storage backend, + but not quite. The file locations are generated purely as a product of the + LearningPackage UUID and the Content's ``hash_digest``, but Content also + takes into account the ``media_type``. + + For example, say we had a Content with the following data: + + ["hello", "world"] + + That is legal syntax for both JSON and YAML. If you want to attach some + YAML-specific metadata in a new model, you could make it 1:1 with the + Content that matched the "application/yaml" media type. The YAML and JSON + versions of this data would be two separate Content rows that would share + the same ``hash_digest`` value. If they both stored a file, they would be + pointing to the same file location. If they only used the ``text`` field, + then that value would be duplicated across the two separate Content rows. + + The alternative would have been to associate media types at the level where + this data was being added to a ComponentVersion, but that would have added + more complexity. Right now, you could make an ImageContent 1:1 model that + analyzed images and created metatdata entries for them (dimensions, GPS) + without having to understand how ComponentVerisons work. + + This is definitely an edge case, and it's likely the only time collisions + like this will happen in practice is with blank files. It also means that + using this table to measure disk usage may be slightly inaccurate when used + in a LearningPackage with collisions–though we expect to use numbers like + that mostly to get a broad sense of usage and look for major outliers, + rather than for byte-level accuracy (it wouldn't account for the non-trivial + indexing storage costs either). + + # Immutability + + From the outside, Content should appear immutable. Since the Content is + looked up by a hash of its data, a change in the data means that we should + look up the hash value of that new data and create a new Content if we don't + find a match. + + That being said, the Content model has different ways of storing that data, + and that is mutable. We could decide that a certain type of Content should + be optimized to store its text in the table. Or that a content type that we + had previously only stored as text now also needs to be stored on in the + file storage backend so that it can be made available to be downloaded. + These operations would be done as data migrations. + + # Extensibility + + Third-party apps are encouraged to create models that have a OneToOneField + relationship with Content. For instance, an ImageContent model might join + 1:1 with all Content that has image/* media types, and provide additional + metadata for that data. + """ + # Max size of the file. + MAX_FILE_SIZE = 50_000_000 + + # 50K is our limit for text data, like OLX. This means 50K *characters*, + # not bytes. Since UTF-8 encodes characters using as many as 4 bytes, this + # could be as much as 200K of data if we had nothing but emojis. + MAX_TEXT_LENGTH = 50_000 + + objects: models.Manager[Media] = WithRelationsManager('media_type') + + learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) + + # What is the Media type (a.k.a. MIME type) of this data? + media_type = models.ForeignKey(MediaType, on_delete=models.PROTECT) + + # This is the size of the file in bytes. This can be different than the + # character length of a text file, since UTF-8 encoding can use anywhere + # between 1-4 bytes to represent any given character. + size = models.PositiveBigIntegerField( + validators=[MaxValueValidator(MAX_FILE_SIZE)], + ) + + # This hash value may be calculated using create_hash_digest from the + # openedx.lib.fields module. When storing text, we hash the UTF-8 + # encoding of that text value, regardless of whether we also write it to a + # file or not. When storing just a file, we hash the bytes in the file. + hash_digest = hash_field() + + # Do we have file data stored for this Content in our file storage backend? + # We use has_file instead of a FileField because it's more space efficient. + # The location of a Content's file data is derivable from the Learning + # Package's UUID and the hash of the Content. There's no need to waste that + # space to encode it in every row. + has_file = models.BooleanField() + + # The ``text`` field contains the text representation of the Content, if + # it is available. A blank value means means that we are storing text for + # this Content, and that text happens to be an empty string. A null value + # here means that we are not storing any text here, and the Content exists + # only in file form. It is an error for ``text`` to be None and ``has_file`` + # to be False, since that would mean we haven't stored data anywhere at all. + # + text = MultiCollationTextField( + blank=True, + null=True, + max_length=MAX_TEXT_LENGTH, + # We don't really expect to ever sort by the text column, but we may + # want to do case-insensitive searches, so it's useful to have a case + # and accent insensitive collation. + db_collations={ + "sqlite": "NOCASE", + "mysql": "utf8mb4_unicode_ci", + } + ) + + # This should be manually set so that multiple Content rows being set in + # the same transaction are created with the same timestamp. The timestamp + # should be UTC. + created = manual_date_time_field() + + @cached_property + def mime_type(self) -> str: + """ + The IANA media type (a.k.a. MIME type) of the Content, in string form. + + MIME types reference: + https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types + """ + return str(self.media_type) + + @cached_property + def path(self): + """ + Logical path at which this content is stored (or would be stored). + + This path is relative to OPENEDX_LEARNING['MEDIA'] configured storage + root. This file may not exist because has_file=False, or because we + haven't written the file yet (this is the method we call when trying to + figure out where the file *should* go). + """ + return f"content/{self.learning_package.uuid}/{self.hash_digest}" + + def os_path(self): + """ + The full OS path for the underlying file for this Content. + + This will not be supported by all Storage class types. + + This will return ``None`` if there is no backing file (has_file=False). + """ + try: + if self.has_file: + return get_storage().path(self.path) + except NotImplementedError: + logger.warning("Storage backend does not support path()") + return None + + def read_file(self) -> File: + """ + Get a File object that has been open for reading. + + We intentionally don't expose an `open()` call where callers can open + this file in write mode. Writing a Content file should happen at most + once, and the logic is not obvious (see ``write_file``). + + At the end of the day, the caller can close the returned File and reopen + it in whatever mode they want, but we're trying to gently discourage + that kind of usage. + """ + return get_storage().open(self.path, 'rb') + + def write_file(self, file: File) -> None: + """ + Write file contents to the file storage backend. + + This function does nothing if the file already exists. Note that Content + is supposed to be immutable, so this should normally only be called once + for a given Content row. + """ + storage = get_storage() + + # There are two reasons why a file might already exist even if the the + # Content row is new: + # + # 1. We tried adding the file earlier, but an error rolled back the + # state of the database. The file storage system isn't covered by any + # sort of transaction semantics, so it won't get rolled back. + # + # 2. The Content is of a different MediaType. The same exact bytes can + # be two logically separate Content entries if they are different file + # types. This lets other models add data to Content via 1:1 relations by + # ContentType (e.g. all SRT files). This is definitely an edge case. + # + # 3. Similar to (2), but only part of the file was written before an + # error occurred. This seems unlikely, but possible if the underlying + # storage engine writes in chunks. + if storage.exists(self.path) and storage.size(self.path) == file.size: + return + storage.save(self.path, file) + + def file_url(self) -> str: + """ + This will sometimes be a time-limited signed URL. + """ + return get_storage().url(self.path) + + def clean(self): + """ + Make sure we're actually storing *something*. + + If this Content has neither a file or text data associated with it, + it's in a broken/useless state and shouldn't be saved. + """ + if (not self.has_file) and (self.text is None): + raise ValidationError( + f"Content {self.pk} with hash {self.hash_digest} must either " + "set a string value for 'text', or it must set has_file=True " + "(or both)." + ) + + class Meta: + constraints = [ + # Make sure we don't store duplicates of this raw data within the + # same LearningPackage, unless they're of different MIME types. + models.UniqueConstraint( + fields=[ + "learning_package", + "media_type", + "hash_digest", + ], + name="oel_media_uniq_lc_media_type_hash_digest", + ), + ] + indexes = [ + # LearningPackage (reverse) Size Index: + # * Find the largest Content entries. + models.Index( + fields=["learning_package", "-size"], + name="oel_media_idx_lp_rsize", + ), + ] + verbose_name = "Media" + verbose_name_plural = "Media" diff --git a/tests/openedx_learning/apps/authoring/media/__init__.py b/tests/openedx_learning/apps/authoring/media/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/openedx_learning/apps/authoring/media/test_file_storage.py b/tests/openedx_learning/apps/authoring/media/test_file_storage.py new file mode 100644 index 000000000..ec40a9bd1 --- /dev/null +++ b/tests/openedx_learning/apps/authoring/media/test_file_storage.py @@ -0,0 +1,79 @@ +""" +Tests for file-backed Content +""" +from datetime import datetime, timezone + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.test import override_settings + +from openedx_learning.apps.authoring.media import api as contents_api +from openedx_learning.apps.authoring.media.models import get_storage +from openedx_learning.apps.authoring.publishing import api as publishing_api +from openedx_learning.lib.test_utils import TestCase + + +class ContentFileStorageTestCase(TestCase): + """ + Test the storage of files backing Content. + """ + def setUp(self) -> None: + """ + It's actually important that we use setUp and not setUpTestData here, + because at least one of our tests will clear the get_storage cache, + meaning that subsequent tests will get a new instance of the + InMemoryStorage backend–and consequently wouldn't see any data loaded + by setUpTestData. + + Recreating the test data for every test lets individual tests change the + storage configuration without creating side-effects for other tests. + """ + super().setUp() + learning_package = publishing_api.create_learning_package( + key="ContentFileStorageTestCase-test-key", + title="Content File Storage Test Case Learning Package", + ) + self.html_media_type = contents_api.get_or_create_media_type("text/html") + self.html_content = contents_api.get_or_create_file_media( + learning_package.id, + self.html_media_type.id, + data=b"hello world!", + created=datetime.now(tz=timezone.utc), + ) + + def test_file_path(self): + """ + Test that the file path doesn't change. + + If this test breaks, it means that we've changed the convention for + where we're storing the backing files for Content, which means we'll be + breaking backwards compatibility for everyone. Please be very careful if + you're updating this test. + """ + content = self.html_content + assert content.path == f"content/{content.learning_package.uuid}/{content.hash_digest}" + + storage_root = settings.OPENEDX_LEARNING['MEDIA']['OPTIONS']['location'] + assert content.os_path() == f"{storage_root}/{content.path}" + + def test_read(self): + """Make sure we can read the file data back.""" + assert b"hello world!" == self.html_content.read_file().read() + + @override_settings() + def test_misconfiguration(self): + """ + Require the OPENEDX_LEARNING setting for file operations. + + The main goal of this is that we don't want to store our files in the + default MEDIA_ROOT, because that would make them publicly accessible. + + We set our OPENEDX_LEARNING value in test_settings.py. We're going to + delete this setting in our test and make sure we raise the correct + exception (The @override_settings decorator will set everything back to + normal after the test completes.) + """ + get_storage.cache_clear() + del settings.OPENEDX_LEARNING + with self.assertRaises(ImproperlyConfigured): + self.html_content.read_file() diff --git a/tests/openedx_learning/apps/authoring/media/test_media_types.py b/tests/openedx_learning/apps/authoring/media/test_media_types.py new file mode 100644 index 000000000..f24be1880 --- /dev/null +++ b/tests/openedx_learning/apps/authoring/media/test_media_types.py @@ -0,0 +1,24 @@ +""" +A few tests to make sure our MediaType lookups are working as expected. +""" +from openedx_learning.apps.authoring.media import api as contents_api +from openedx_learning.lib.test_utils import TestCase + + +class MediaTypeTest(TestCase): + """Basic testing of our Media Types for Content""" + + def test_get_or_create_dedupe(self): + """ + Make sure we're not creating redundant rows for the same media type. + """ + # The first time, a row is created for "text/html" + text_media_type_1 = contents_api.get_or_create_media_type("text/plain") + + # This should return the previously created row. + text_media_type_2 = contents_api.get_or_create_media_type("text/plain") + assert text_media_type_1 == text_media_type_2 + + # This is a different type though... + svg_media_type = contents_api.get_or_create_media_type("image/svg+xml") + assert text_media_type_1 != svg_media_type