GenomicDataInfrastructure · hcvdwerf · Aug 27, 2025 · Aug 27, 2025 · Aug 27, 2025 · Sep 3, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -68,12 +68,13 @@ jobs:
         pip install -r ckanext-harvest/requirements.txt
         git clone https://github.com/ckan/ckanext-scheming
         pip install -e ckanext-scheming
-        pip install git+https://github.com/ckan/ckanext-fluent.git@4e9340a#egg=ckanext-fluent
+        git clone https://github.com/ckan/ckanext-fluent
+        pip install -e ckanext-fluent
         git clone https://github.com/ckan/ckanext-dataset-series
         pip install -e ckanext-dataset-series
     - name: Setup extension
       run: |
         ckan -c test.ini db init
         ckan -c test.ini db pending-migrations --apply
     - name: Run tests
-      run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=term-missing --cov-append --disable-warnings ckanext/dcat/tests
+      run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=term-missing --cov-append --disable-warnings ckanext/dcat/tests
diff --git a/.gitignore b/.gitignore
@@ -16,3 +16,9 @@ build/*
 tmp/*
 package/DEBIAN/control
 *.swp
+.idea/.gitignore
+.idea/ckanext-dcat.iml
+.idea/misc.xml
+.idea/modules.xml
+.idea/vcs.xml
+.idea/inspectionProfiles/profiles_settings.xml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,18 @@
 # Changelog
 
-## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v2.4.0...HEAD)
+## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v2.4.1...HEAD)
+
+## [v2.4.1](https://github.com/ckan/ckanext-dcat/compare/v2.4.0...v2.4.1) - 2025-09-25
+
+* Fix regression redirect from /dataset to /dataset_series ([#362](https://github.com/ckan/ckanext-dcat/pull/362))
+* Provide default language in Croissant JSON-LD context ([#361](https://github.com/ckan/ckanext-dcat/pull/361))
+* Added [`IDCATURIGenerator`](https://docs.ckan.org/projects/ckanext-dcat/en/latest/uri-customization/)
+  plugin interface to allow customization of the URIs generation ([#351](https://github.com/ckan/ckanext-dcat/pull/351))
+* Added support for new fields to DCAT classes: `dcat:Dataset` (`prov:wasGeneratedBy`, `prov:qualifiedAttribution`,
+  `dcat:hasVersion`), `dcat:Catalog` (`foaf:homepage`), `dcat:DataService` (`dct:conformsTo`, `dct:format`, 
+  `dct:identifier`, `dct:language`, `dct:rights`, `dcat:landingPage`, `dcat:keyword`) ([#352](https://github.com/ckan/ckanext-dcat/pull/352))
+* Add HealthDCAT-AP mapping to CKAN field mapping table ([#347](https://github.com/ckan/ckanext-dcat/pull/347))
+* Docs: Add HealthDCAT-AP mapping to CKAN field mapping table ([#347](https://github.com/ckan/ckanext-dcat/pull/347))
 
 
 ## [v2.4.0](https://github.com/ckan/ckanext-dcat/compare/v2.3.0...v2.4.0) - 2025-05-20

diff --git a/ckanext/dcat/blueprints.py b/ckanext/dcat/blueprints.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from flask import Blueprint, jsonify, make_response
 
 import ckantoolkit as toolkit
@@ -12,11 +11,7 @@
 config = toolkit.config
 
 
-dcat = Blueprint(
-    'dcat',
-    __name__,
-    url_defaults={u'package_type': u'dataset'}
-)
+dcat = Blueprint("dcat", __name__, url_defaults={"package_type": "dataset"})
 
 
 def read_catalog(_format=None, package_type=None):
@@ -30,36 +25,44 @@ def read_dataset(_id, _format=None, package_type=None):
 if endpoints_enabled():
 
     # requirements={'_format': 'xml|rdf|n3|ttl|jsonld'}
-    dcat.add_url_rule(config.get('ckanext.dcat.catalog_endpoint',
-                                 utils.DEFAULT_CATALOG_ENDPOINT).replace(
-                                     '{_format}', '<_format>'),
-                      view_func=read_catalog)
+    dcat.add_url_rule(
+        config.get(
+            "ckanext.dcat.catalog_endpoint", utils.DEFAULT_CATALOG_ENDPOINT
+        ).replace("{_format}", "<_format>"),
+        view_func=read_catalog,
+    )
 
     # TODO: Generalize for all dataset types
-    dcat.add_url_rule('/dataset_series/<_id>.<_format>', view_func=read_dataset)
-    dcat.add_url_rule('/dataset/<_id>.<_format>', view_func=read_dataset)
+    dcat.add_url_rule(
+        "/dataset_series/<_id>.<_format>",
+        view_func=read_dataset,
+        endpoint="read_dataset_series",
+    )
+    dcat.add_url_rule(
+        "/dataset/<_id>.<_format>", view_func=read_dataset, endpoint="read_dataset"
+    )
 
 
 if toolkit.asbool(config.get(utils.ENABLE_CONTENT_NEGOTIATION_CONFIG)):
-    dcat.add_url_rule('/', view_func=read_catalog)
+    dcat.add_url_rule("/", view_func=read_catalog)
 
-    dcat.add_url_rule('/dataset/new', view_func=CreateView.as_view(str(u'new')))
-    dcat.add_url_rule('/dataset/<_id>', view_func=read_dataset)
+    dcat.add_url_rule("/dataset/new", view_func=CreateView.as_view(str("new")))
+    dcat.add_url_rule("/dataset/<_id>", view_func=read_dataset)
 
-dcat_json_interface = Blueprint('dcat_json_interface', __name__)
+dcat_json_interface = Blueprint("dcat_json_interface", __name__)
 
 
 def dcat_json():
     datasets = utils.dcat_json_page()
     return jsonify(datasets)
 
 
-dcat_json_interface.add_url_rule(config.get('ckanext.dcat.json_endpoint',
-                                            '/dcat.json'),
-                                 view_func=dcat_json)
+dcat_json_interface.add_url_rule(
+    config.get("ckanext.dcat.json_endpoint", "/dcat.json"), view_func=dcat_json
+)
 
 
-croissant = Blueprint('croissant', __name__)
+croissant = Blueprint("croissant", __name__)
 
 
 def read_dataset_croissant(_id):
@@ -72,20 +75,22 @@ def read_dataset_croissant(_id):
         )
 
         context = {
-            'user': user_name,
+            "user": user_name,
         }
-        data_dict = {'id': _id}
+        data_dict = {"id": _id}
 
         dataset_dict = toolkit.get_action("package_show")(context, data_dict)
     except (toolkit.ObjectNotFound, toolkit.NotAuthorized):
         return toolkit.abort(
-            404,
-            toolkit._("Dataset not found or you have no permission to view it")
+            404, toolkit._("Dataset not found or you have no permission to view it")
         )
 
     response = make_response(croissant_serialization(dataset_dict))
     response.headers["Content-type"] = "application/ld+json"
 
     return response
 
-croissant.add_url_rule('/dataset/<_id>/croissant.jsonld', view_func=read_dataset_croissant)
+
+croissant.add_url_rule(
+    "/dataset/<_id>/croissant.jsonld", view_func=read_dataset_croissant
+)
diff --git a/ckanext/dcat/harvesters/rdf.py b/ckanext/dcat/harvesters/rdf.py
@@ -210,18 +210,39 @@ def gather_stage(self, harvest_job):
                 return []
 
             try:
-                source_dataset = model.Package.get(harvest_job.source.id)                
-
-                series_ids, series_mapping = self._parse_and_collect(
-                    parser.dataset_series(),
-                    source_dataset,
-                    harvest_job,
-                    guids_in_source,
-                    is_series=True,
-                    collect_series_mapping=True
-                )
-                object_ids += series_ids
-                object_ids += self._parse_and_collect(parser.datasets(series_mapping), source_dataset, harvest_job, guids_in_source, is_series=False)
+
+                source_dataset = model.Package.get(harvest_job.source.id)
+
+                for dataset in parser.datasets():
+                    if not dataset.get('name'):
+                        dataset['name'] = self._gen_new_name(dataset['title'])
+                    if dataset['name'] in self._names_taken:
+                        suffix = len([i for i in self._names_taken if i.startswith(dataset['name'] + '-')]) + 1
+                        dataset['name'] = '{}-{}'.format(dataset['name'], suffix)
+                    self._names_taken.append(dataset['name'])
+
+                    # Unless already set by the parser, get the owner organization (if any)
+                    # from the harvest source dataset
+                    if not dataset.get('owner_org'):
+                        if source_dataset.owner_org:
+                            dataset['owner_org'] = source_dataset.owner_org
+
+                    # Try to get a unique identifier for the harvested dataset
+                    guid = self._get_guid(dataset, source_url=source_dataset.url)
+
+                    if not guid:
+                        self._save_gather_error('Could not get a unique identifier for dataset: {0}'.format(dataset),
+                                                harvest_job)
+                        continue
+
+                    dataset['extras'].append({'key': 'guid', 'value': guid})
+                    guids_in_source.append(guid)
+
+                    obj = HarvestObject(guid=guid, job=harvest_job,
+                                        content=json.dumps(dataset))
+
+                    obj.save()
+                    object_ids.append(obj.id)
             except Exception as e:
                 self._save_gather_error('Error when processsing dataset: %r / %s' % (e, traceback.format_exc()),
                                         harvest_job)
@@ -401,70 +422,3 @@ def import_stage(self, harvest_object):
             model.Session.commit()
 
         return True
-
-    def _parse_and_collect(
-        self,
-        items,
-        source_dataset,
-        harvest_job,
-        guids_in_source,
-        is_series=False,
-        collect_series_mapping=False
-    ):
-        object_ids = []
-        label = "dataset series" if is_series else "dataset"
-        series_mapping = {} if collect_series_mapping else None
-
-        for item in items:
-            original_title = item.get("title", label)
-            if not item.get("name"):
-                item["name"] = self._gen_new_name(original_title)
-
-            if item["name"] in self._names_taken:
-                suffix = len([i for i in self._names_taken if i.startswith(item["name"] + "-")]) + 1
-                item["name"] = f"{item['name']}-{suffix}"
-
-            self._names_taken.append(item["name"])
-
-            if not item.get("owner_org") and source_dataset.owner_org:
-                item["owner_org"] = source_dataset.owner_org
-
-            guid = self._get_guid(item, source_url=source_dataset.url)
-            if not guid:
-                self._save_gather_error(f"Could not get a unique identifier for {label}: {item}", harvest_job)
-                continue
-
-            item.setdefault("extras", []).append({"key": "guid", "value": guid})
-            guids_in_source.append(guid)
-
-            obj = HarvestObject(guid=guid, job=harvest_job, content=json.dumps(item))
-            obj.save()
-            object_ids.append(obj.id)
-
-            # Store mapping of RDF URI to dataset name if requested
-            if collect_series_mapping:
-                series_uri = item.get("uri") or item.get("identifier")
-                if series_uri:
-                    # Try to find an existing active dataset series by 'guid' match
-                    existing = model.Session.query(model.Package).\
-                        join(model.PackageExtra).\
-                        filter(model.PackageExtra.key == 'guid').\
-                        filter(model.PackageExtra.value == series_uri).\
-                        filter(model.Package.type == 'dataset_series').\
-                        filter(model.Package.state == 'active').\
-                        first()
-
-                    if existing:
-                        item["name"] = existing.name
-
-                    series_mapping[str(series_uri)] = {
-                        "id": existing.id if existing else item.get("id"),
-                        "name": item["name"]
-                    }
-
-
-        if collect_series_mapping:
-            return object_ids, series_mapping
-
-        return object_ids
-
diff --git a/ckanext/dcat/helpers.py b/ckanext/dcat/helpers.py
@@ -72,7 +72,7 @@ def structured_data(dataset_dict, profiles=None):
     return _get_serialization(dataset_dict, profiles, "jsonld")
 
 
-def croissant(dataset_dict, profiles=None):
+def croissant(dataset_dict, profiles=None, jsonld_context=None):
     """
     Returns a string containing the Croissant ML representation of the given
     dataset using the `croissant` profile.
@@ -82,8 +82,10 @@ def croissant(dataset_dict, profiles=None):
     if not profiles:
         profiles = config.get("ckanext.dcat.croissant.profiles", ["croissant"])
 
-    frame = {"@context": JSONLD_CONTEXT, "@type": "sc:Dataset"}
+    context = jsonld_context or JSONLD_CONTEXT
+
+    frame = {"@context": context, "@type": "sc:Dataset"}
 
     return _get_serialization(
-        dataset_dict, profiles, "jsonld", context=JSONLD_CONTEXT, frame=frame
+        dataset_dict, profiles, "jsonld", context=context, frame=frame
     )
diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py
@@ -119,16 +119,6 @@ def _datasets(self):
         for dataset in self.g.subjects(RDF.type, DCAT.Dataset):
             yield dataset
 
-    def _dataset_series(self):
-        '''
-        Generator that returns all DCAT dataset series on the graph
-
-        Yields rdflib.term.URIRef objects that can be used on graph lookups
-        and queries
-        '''
-        for dataset_series in self.g.subjects(RDF.type, DCAT.DatasetSeries):
-            yield dataset_series
-
     def next_page(self):
         '''
         Returns the URL of the next page or None if there is no next page
@@ -183,7 +173,7 @@ def supported_formats(self):
                        for plugin
                        in rdflib.plugin.plugins(kind=rdflib.parser.Parser)])
 
-    def datasets(self, series_mapping=None):
+    def datasets(self):
         '''
         Generator that returns CKAN datasets parsed from the RDF graph
 
@@ -203,39 +193,6 @@ def datasets(self, series_mapping=None):
                 )
                 profile.parse_dataset(dataset_dict, dataset_ref)
 
-            # Add in_series if present in RDF and mapped
-            in_series = []
-            for series_ref in self.g.objects(dataset_ref, DCAT.inSeries):
-                key = str(series_ref)
-                if series_mapping and key in series_mapping:
-                    in_series.append(series_mapping[key]["id"])
-
-            if in_series:
-                dataset_dict["in_series"] = in_series
-
-            yield dataset_dict
-
-
-    def dataset_series(self):
-        '''
-        Generator that returns CKAN dataset series parsed from the RDF graph
-
-        Each dataset series is passed to all the loaded profiles before being
-        yielded, so it can be further modified by each one of them.
-
-        Returns a dataset series dict that can be passed to eg `package_create`
-        or `package_update`
-        '''
-        for dataset_ref in self._dataset_series():
-            dataset_dict = {}
-            for profile_class in self._profiles:
-                profile = profile_class(
-                    self.g,
-                    dataset_type=self.dataset_type,
-                    compatibility_mode=self.compatibility_mode
-                )
-                profile.parse_dataset(dataset_dict, dataset_ref)
-
             yield dataset_dict