3
3
# check for multiple-text fields in the schema
4
4
# All changes are © Stichting Health-RI and are licensed under the AGPLv3 license
5
5
6
- from datetime import datetime
6
+ from datetime import datetime , timezone
7
7
import re
8
8
import json
9
9
import logging
15
15
from dateutil .parser import ParserError
16
16
from json import JSONDecodeError
17
17
from typing import Dict , List
18
- from rdflib import URIRef , Namespace
18
+ from rdflib import URIRef , Namespace , DCAT
19
19
20
20
log = logging .getLogger (__name__ )
21
21
@@ -85,9 +85,11 @@ def convert_datetime_string(date_value: str) -> datetime:
85
85
Converts datestrings (e.g. '2023-10-06T10:12:55.614000+00:00') to datetime class instance
86
86
"""
87
87
try :
88
- date_value = dateparser .parse (date_value )
88
+ date_value = dateparser .parse (date_value , yearfirst = True )
89
+ if date_value .tzinfo is not None :
90
+ date_value = date_value .astimezone (timezone .utc )
89
91
except ParserError :
90
- log .error ('A date field string value can not be parsed to a date' )
92
+ log .error (f 'A date field string value { date_value } can not be parsed to a date' )
91
93
return date_value
92
94
93
95
@@ -98,43 +100,42 @@ class FAIRDataPointDCATAPProfile(EuropeanDCATAP2Profile):
98
100
99
101
def parse_dataset (self , dataset_dict : Dict , dataset_ref : URIRef ) -> Dict :
100
102
super (FAIRDataPointDCATAPProfile , self ).parse_dataset (dataset_dict , dataset_ref )
103
+ dataset_dict = self ._parse_contact_point (dataset_dict , dataset_ref )
101
104
102
105
dataset_dict = _convert_extras_to_declared_schema_fields (dataset_dict )
103
106
104
107
dataset_dict ['tags' ] = validate_tags (dataset_dict ['tags' ])
105
108
106
109
return dataset_dict
107
110
108
- def _contact_details (self , subject , predicate ):
111
+ def _contact_point_details (self , subject , predicate ) -> List :
109
112
"""
110
113
Overrides RDFProfile._contact_details so uri is taken from hasUID for VCard
111
114
"""
112
- contact = {}
113
- # todo fix for multiple
115
+ contact_list = []
114
116
115
117
for agent in self .g .objects (subject , predicate ):
118
+ contact = {
119
+ 'contact_uri' : (str (agent ) if isinstance (agent , URIRef )
120
+ else self ._get_vcard_property_value (agent , VCARD .hasUID )),
121
+ 'contact_name' : self ._get_vcard_property_value (agent , VCARD .hasFN , VCARD .fn ),
122
+ 'contact_email' : self ._without_mailto (self ._get_vcard_property_value (agent , VCARD .hasEmail ))}
116
123
117
- contact ['uri' ] = (str (agent ) if isinstance (agent , URIRef )
118
- else self ._get_vcard_property_value (agent , VCARD .hasUID ))
119
-
120
- contact ['name' ] = self ._get_vcard_property_value (agent , VCARD .hasFN , VCARD .fn )
121
-
122
- contact ['email' ] = self ._without_mailto (self ._get_vcard_property_value (agent , VCARD .hasEmail ))
123
-
124
- return contact
125
-
126
- # def graph_from_dataset(self, dataset_dict, dataset_ref):
127
- #
128
- # g = self.g
129
- #
130
- # spatial_text = self._get_dataset_value(dataset_dict, 'hello')
131
- #
132
- # if spatial_uri:
133
- # spatial_ref = URIRef(spatial_uri)
134
- # else:
135
- # spatial_ref = BNode()
136
- #
137
- # if spatial_text:
138
- # g.add((dataset_ref, DCT.spatial, spatial_ref))
139
- # g.add((spatial_ref, RDF.type, DCT.Location))
140
- # g.add((spatial_ref, RDFS.label, Literal(spatial_text)))
124
+ contact_list .append (contact )
125
+
126
+ return contact_list
127
+
128
+ def _parse_contact_point (self , dataset_dict : Dict , dataset_ref : URIRef ) -> Dict :
129
+ """
130
+ ckan-dcat extension implies there can be just one contact point and in case a list is provided by source only
131
+ last value is taken. Besides it never solves uri from a VCard object. This function parses DCAT.contactPoint
132
+ information to a list of `pontact_point` dictionaries and replaces ckan-dcat values
133
+ """
134
+ contact_point = self ._contact_point_details (subject = dataset_ref , predicate = DCAT .contactPoint )
135
+ dcat_profile_contact_fields = ['contact_name' , 'contact_email' , 'contact_uri' ]
136
+ if contact_point :
137
+ dataset_dict ['extras' ].append ({'key' : 'contact_point' , 'value' : contact_point })
138
+ # Remove the extras contact_ fields if they were parsed by dcat extension
139
+ dataset_dict ['extras' ] = \
140
+ [item for item in dataset_dict ['extras' ] if item .get ('key' ) not in dcat_profile_contact_fields ]
141
+ return dataset_dict
0 commit comments