From 7fd67fec95cf1af35643787f5fe0674ef70aae63 Mon Sep 17 00:00:00 2001
From: amercader <amercadero@gmail.com>
Date: Fri, 17 Jan 2025 15:54:36 +0100
Subject: [PATCH] Add stub for croissant validation tests

---
 .../tests/profiles/croissant/test_validate.py | 23 ++++++
 .../ckan/ckan_full_dataset_croissant.json     | 76 +++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 ckanext/dcat/tests/profiles/croissant/test_validate.py
 create mode 100644 examples/ckan/ckan_full_dataset_croissant.json

diff --git a/ckanext/dcat/tests/profiles/croissant/test_validate.py b/ckanext/dcat/tests/profiles/croissant/test_validate.py
new file mode 100644
index 00000000..d98b32a2
--- /dev/null
+++ b/ckanext/dcat/tests/profiles/croissant/test_validate.py
@@ -0,0 +1,23 @@
+import json
+
+import mlcroissant as mlc
+
+from ckan.tests.helpers import call_action
+from ckanext.dcat.processors import RDFSerializer
+from ckanext.dcat.tests.utils import get_file_contents
+
+
+def test_valid_output():
+
+    dataset_dict = json.loads(get_file_contents("ckan/ckan_full_dataset_croissant.json"))
+
+    s = RDFSerializer(profiles=["croissant"])
+
+    s.graph_from_dataset(dataset_dict)
+
+    croissant_dict = s.g.serialize(format="json-ld")
+
+    try:
+        mlc.Dataset(croissant_dict)
+    except mlc.ValidationError as exception:
+        raise
diff --git a/examples/ckan/ckan_full_dataset_croissant.json b/examples/ckan/ckan_full_dataset_croissant.json
new file mode 100644
index 00000000..d387fe2f
--- /dev/null
+++ b/examples/ckan/ckan_full_dataset_croissant.json
@@ -0,0 +1,76 @@
+{
+    "id": "05469327-3d64-4e54-a23b-e97e0a0b9c81",
+    "name": "test-dataset-croissant",
+    "title": "Test Croissant dataset",
+    "notes": "Lorem ipsum",
+    "id_given": "my-custom-id",
+    "is_live_dataset": true,
+    "url": "http://example.org/ds1",
+    "version": "1.0b",
+    "tags": [
+        {
+            "name": "Tag 1"
+        },
+        {
+            "name": "Tag 2"
+        }
+    ],
+    "created": "2024-05-01",
+    "issued": "2024-05-02",
+    "modified": "2024-05-05",
+    "license": "http://creativecommons.org/licenses/by/3.0/",
+    "structured_data_license": "http://creativecommons.org/licenses/by/3.0/",
+    "language": [
+        "en",
+        "ca",
+        "es"
+    ],
+    "creator": [
+        {
+            "name": "Test Creator",
+            "email": "creator@example.org",
+            "url": "https://example.org",
+            "identifier": "http://example.org/creator-id",
+            "type": "person",
+            "id_given": "custom-creator-id"
+        }
+    ],
+    "publisher": [
+        {
+            "name": "Test Publisher",
+            "email": "publisher@example.org",
+            "url": "https://example.org",
+            "identifier": "http://example.org/publisher-id",
+            "type": "person",
+            "id_given": "custom-publisher-id"
+        }
+    ],
+    "same_as": [
+        "https://some.other.catalog/dataset/123",
+        "https://yet.another.catalog/dataset/xyz"
+    ],
+    "cite_as": "@Article{bloggs24data, author=\"Joe Bloggs and Sally Biggs\"}",
+    "resources": [
+        {
+            "id": "568b8ac9-8c69-4475-b35e-d7f812a63c32",
+            "name": "Resource 1",
+            "description": "Some description",
+            "url": "https://example.com/data.csv",
+            "format": "CSV",
+            "type": "fileSet",
+            "id_given": "my-custom-resource-id",
+            "size": 12323,
+            "hash": "4304cf2e751e6053c90b1804c89c0ebb758f395a",
+            "subresources": [
+                {
+                    "type": "fileObject",
+                    "id_given": "my-custom-subresource-id",
+                    "description": "Test subresource 1",
+                    "format": "CSV",
+                    "includes": "**.csv",
+                    "excludes": "**.txt"
+                }
+            ]
+        }
+    ]
+}