Skip to content

Commit 44bd646

Browse files
Merge pull request #80 from GenomicDataInfrastructure/Upgrade-DCAT-AP3
Upgrade dcat ap3
2 parents bdd98f2 + 7fba24a commit 44bd646

File tree

11 files changed

+289
-976
lines changed

11 files changed

+289
-976
lines changed

.github/workflows/test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ jobs:
4444
pip install --upgrade pytest-rerunfailures
4545
pip install -e 'git+https://github.com/CivityNL/[email protected]#egg=ckanext-scheming[requirements]'
4646
pip install -e 'git+https://github.com/ckan/[email protected]#egg=ckanext-harvest[requirements]'
47-
pip install -e 'git+https://github.com/ckan/ckanext-dcat.git@v1.5.1#egg=ckanext-dcat[requirements]'
48-
pip install -r https://raw.githubusercontent.com/ckan/ckanext-dcat/v1.5.1/requirements.txt
47+
pip install -e 'git+https://github.com/ckan/ckanext-dcat.git@v2.0.0#egg=ckanext-dcat[requirements]'
48+
pip install -r https://raw.githubusercontent.com/ckan/ckanext-dcat/v2.0.0/requirements.txt
4949
python3 setup.py develop
5050
- name: Setup extension
5151
run: |

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ otherwise run:
108108
```commandline
109109
pip install -e 'git+https://github.com/ckan/[email protected]#egg=ckanext-scheming[requirements]'
110110
pip install -e 'git+https://github.com/ckan/[email protected]#egg=ckanext-harvest[requirements]'
111-
pip install -e 'git+https://github.com/ckan/ckanext-dcat.git@v1.5.1#egg=ckanext-dcat'
112-
pip install -r https://raw.githubusercontent.com/ckan/ckanext-dcat/v1.5.1/requirements.txt
111+
pip install -e 'git+https://github.com/ckan/ckanext-dcat.git@v2.0.0#egg=ckanext-dcat'
112+
pip install -r https://raw.githubusercontent.com/ckan/ckanext-dcat/v2.0.0/requirements.txt
113113
```
114114

115115
## Tests

ckanext/fairdatapoint/processors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def catalogs(self) -> Iterable[Dict]:
3535
for catalog_ref in self._catalogs():
3636
catalog_dict = {}
3737
for profile_class in self._profiles:
38-
profile = profile_class(self.g, self.compatibility_mode)
38+
profile = profile_class(graph= self.g,compatibility_mode = self.compatibility_mode)
3939
profile.parse_dataset(catalog_dict, catalog_ref)
4040

4141
yield catalog_dict

ckanext/fairdatapoint/profiles.py

Lines changed: 12 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import json
99
import logging
1010

11-
from ckanext.dcat.profiles import EuropeanDCATAP2Profile
11+
from ckanext.dcat.profiles import EuropeanDCATAP3Profile
1212
from ckan.plugins import toolkit
1313
from ckan import model
1414
import dateutil.parser as dateparser
@@ -21,46 +21,6 @@
2121

2222
VCARD = Namespace("http://www.w3.org/2006/vcard/ns#")
2323

24-
25-
def _convert_extras_to_declared_schema_fields(dataset_dict: Dict) -> Dict:
26-
"""
27-
Compares the extras dictionary with the declared schema.
28-
Updates the declared schema fields with the values that match from the extras.
29-
Remove the extras that are present on the declared schema.
30-
:param dataset_dict:
31-
:return: dataset_dict - Updated dataset_dict
32-
"""
33-
# Use the correct dataset type, Defaults to 'dataset'
34-
dataset_type = dataset_dict.get('type', 'dataset')
35-
# Gets the full Schema definition of the correct dataset type
36-
context = {'model': model, 'session': model.Session}
37-
data_dict = {'type': dataset_type}
38-
full_schema_dict = toolkit.get_action('scheming_dataset_schema_show')(context, data_dict)
39-
40-
dataset_fields = {x.get('field_name'): x.get('preset') for x in full_schema_dict.get('dataset_fields', [])}
41-
42-
# Populate the declared schema fields, if they are present in the extras
43-
for extra_dict in dataset_dict.get('extras', []):
44-
field_key = extra_dict.get('key')
45-
field_value = extra_dict.get('value')
46-
if field_key in dataset_fields:
47-
preset = dataset_fields[field_key]
48-
if preset == 'multiple_text' and field_value:
49-
try:
50-
dataset_dict[field_key] = json.loads(field_value)
51-
except JSONDecodeError:
52-
dataset_dict[field_key] = field_value
53-
elif preset == 'date' and field_value:
54-
dataset_dict[field_key] = convert_datetime_string(field_value)
55-
else:
56-
dataset_dict[field_key] = field_value
57-
58-
# Remove the extras that have been populated into the declared schema fields
59-
dataset_dict['extras'] = [d for d in dataset_dict['extras'] if d.get('key') not in dataset_fields]
60-
61-
return dataset_dict
62-
63-
6424
def validate_tags(values_list: List[Dict]) -> List:
6525
"""
6626
Validates tags strings to contain allowed characters, replaces others with spaces
@@ -85,31 +45,18 @@ def validate_tags(values_list: List[Dict]) -> List:
8545
return tags
8646

8747

88-
def convert_datetime_string(date_value: str) -> datetime:
89-
"""
90-
Converts datestrings (e.g. '2023-10-06T10:12:55.614000+00:00') to datetime class instance
91-
"""
92-
try:
93-
date_value = dateparser.parse(date_value, yearfirst=True)
94-
if date_value.tzinfo is not None:
95-
date_value = date_value.astimezone(timezone.utc)
96-
except ParserError:
97-
log.error(f'A date field string value {date_value} can not be parsed to a date')
98-
return date_value
99-
100-
101-
class FAIRDataPointDCATAPProfile(EuropeanDCATAP2Profile):
48+
class FAIRDataPointDCATAPProfile(EuropeanDCATAP3Profile):
10249
"""
10350
An RDF profile for FAIR data points
10451
"""
10552

10653
def parse_dataset(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict:
10754
super(FAIRDataPointDCATAPProfile, self).parse_dataset(dataset_dict, dataset_ref)
108-
dataset_dict = self._parse_contact_point(dataset_dict, dataset_ref)
10955

56+
#dataset_dict = self._parse_contact_point(dataset_dict, dataset_ref)
11057
dataset_dict = self._parse_creator(dataset_dict, dataset_ref)
11158

112-
dataset_dict = _convert_extras_to_declared_schema_fields(dataset_dict)
59+
## dataset_dict = _convert_extras_to_declared_schema_fields(dataset_dict)
11360

11461
dataset_dict['tags'] = validate_tags(dataset_dict['tags'])
11562

@@ -123,10 +70,10 @@ def _contact_point_details(self, subject, predicate) -> List:
12370

12471
for agent in self.g.objects(subject, predicate):
12572
contact = {
126-
'contact_uri': (str(agent) if isinstance(agent, URIRef)
73+
'uri': (str(agent) if isinstance(agent, URIRef)
12774
else self._get_vcard_property_value(agent, VCARD.hasUID)),
128-
'contact_name': self._get_vcard_property_value(agent, VCARD.hasFN, VCARD.fn),
129-
'contact_email': self._without_mailto(self._get_vcard_property_value(agent, VCARD.hasEmail))}
75+
'name': self._get_vcard_property_value(agent, VCARD.hasFN, VCARD.fn),
76+
'email': self._without_mailto(self._get_vcard_property_value(agent, VCARD.hasEmail))}
13077

13178
contact_list.append(contact)
13279

@@ -156,16 +103,16 @@ def _parse_creator(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict:
156103
creator_name = graph.value(creator_ref, FOAF.name)
157104

158105
if creator_identifier:
159-
creator['creator_identifier'] = str(creator_identifier)
106+
creator['identifier'] = str(creator_identifier)
160107
if creator_name:
161-
creator['creator_name'] = str(creator_name)
108+
creator['name'] = str(creator_name)
162109
else:
163110
# If the creator is a URI, use it as the identifier
164111
if isinstance(creator_ref, URIRef):
165-
creator['creator_identifier'] = str(creator_ref)
166-
creator['creator_name'] = str(creator_ref)
112+
creator['identifier'] = str(creator_ref)
113+
creator['name'] = str(creator_ref)
167114
else:
168-
creator['creator_name'] = str(creator_ref)
115+
creator['name'] = str(creator_ref)
169116

170117
creators.append(creator)
171118

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#SPDX-FileCopyrightText: 2024 Stichting Health-RI
2+
#
3+
#SPDX-License-Identifier: Apache-2.0
4+
5+
scheming_version: 2
6+
about: GDI scheming field presets
7+
about_url: https://github.com/GenomicDataInfrastructure/gdi-userportal-ckanext-gdi-userportal/
8+
presets:
9+
- preset_name: datetime_flex
10+
values:
11+
form_snippet: datetime.html
12+
display_snippet: datetime.html
13+
validators: scheming_isodatetime_flex convert_to_json_if_datetime
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#SPDX-FileCopyrightText: 2024 PNED G.I.E.
2+
#
3+
#SPDX-License-Identifier: Apache-2.0
4+
5+
scheming_version: 2
6+
dataset_type: dataset
7+
about: DCAT-AP 3 compatible schema
8+
about_url: http://github.com/ckan/ckanext-dcat
9+
10+
dataset_fields:
11+
- field_name: issued
12+
label:
13+
en: Issued Date
14+
nl: Uitgegeven Datum
15+
preset: datetime_flex
16+
help_text:
17+
en: "[dct:issued] This property contains the date of formal issuance (e.g., publication) of the Dataset."
18+
nl: "[dct:issued] Deze eigenschap bevat de datum van formele uitgave (bijv. publicatie) van de Dataset."
19+
20+
- field_name: modified
21+
label:
22+
en: Modification Date
23+
nl: Datum Wijziging
24+
preset: datetime_flex
25+
help_text:
26+
en: "[dct:modified] This property contains the most recent date on which the Dataset was changed or modified."
27+
nl: "[dct:modified] Deze eigenschap bevat de meest recente datum waarop de Dataset is gewijzigd of gewijzigd."
28+
29+
- field_name: has_version
30+
label:
31+
en: Has Version
32+
nl: Bevat Versie
33+
preset: multiple_text
34+
help_inline: true
35+
help_text:
36+
en: "[dct:hasVersion] This property refers to a related Dataset that is a version, edition, or adaptation of the described Dataset."
37+
nl: "[dct:hasVersion] Deze eigenschap verwijst naar een gerelateerde Dataset die een versie, editie of aanpassing is van de beschreven Dataset."
38+
39+
- field_name: temporal_start
40+
label:
41+
en: Temporal Start Date
42+
nl: Begindatum tijdsperiode
43+
help_inline: true
44+
help_text:
45+
en: "[dct:temporal] This property refers to a temporal period that the Dataset covers."
46+
nl: "[dct:temporal] Deze eigenschap verwijst naar een tijdsperiode die door de Dataset wordt gedekt."
47+
preset: datetime_flex
48+
49+
- field_name: temporal_end
50+
label:
51+
en: Temporal End Date
52+
nl: Einddatum tijdsperiode
53+
help_inline: true
54+
help_text:
55+
en: "[dct:temporal] This property refers to a temporal period that the Dataset covers."
56+
nl: "[dct:temporal] Deze eigenschap verwijst naar een tijdsperiode die door de Dataset wordt gedekt."
57+
preset: datetime_flex
58+
59+
- field_name: creator
60+
label: Creator
61+
repeating_subfields:
62+
63+
- field_name: uri
64+
label: Creator URI
65+
66+
- field_name: name
67+
label: Creator Name
68+
69+
- field_name: email
70+
label: Creator Email
71+
display_snippet: email.html
72+
73+
- field_name: url
74+
label: Creator URL
75+
display_snippet: link.html
76+
77+
- field_name: type
78+
label: Creator Type
79+
80+
- field_name: identifier
81+
label: Creator Identifier
82+
help_text:
83+
en: Unique identifier for the creator, such as a ROR ID.
84+
nl: Unieke identificatie voor de maker, zoals een ROR-ID.
85+
help_text:
86+
en: Entity responsible for producing the dataset.
87+
nl: Entiteit die verantwoordelijk is voor het produceren van de dataset.
88+
89+
resource_fields:
90+
- field_name: issued
91+
label:
92+
en: Issued Date
93+
nl: Uitgegeven Datum
94+
preset: datetime_flex
95+
help_text:
96+
en: "[dct:issued] This property contains the date of formal issuance (e.g., publication) of the Resource."
97+
nl: "[dct:issued] Deze eigenschap bevat de datum van formele uitgave (bijv. publicatie) van de Resource."
98+
99+
- field_name: modified
100+
label:
101+
en: Modification Date
102+
nl: Datum Wijziging
103+
preset: datetime_flex
104+
help_text:
105+
en: "[dct:modified] This property contains the most recent date on which the Resource was changed or modified."
106+
nl: "[dct:modified] Deze eigenschap bevat de meest recente datum waarop de Resource is gewijzigd of gewijzigd."

0 commit comments

Comments
 (0)