18
18
from typing import Dict , Iterable , Union
19
19
20
20
21
- LDP = Namespace (' http://www.w3.org/ns/ldp#' )
22
- VCARD = Namespace (' http://www.w3.org/2006/vcard/ns#' )
21
+ LDP = Namespace (" http://www.w3.org/ns/ldp#" )
22
+ VCARD = Namespace (" http://www.w3.org/2006/vcard/ns#" )
23
23
24
24
log = logging .getLogger (__name__ )
25
25
26
26
27
27
class FairDataPointRecordProvider :
28
28
29
- def __init__ (self , fdp_end_point : str ):
29
+ def __init__ (self , fdp_end_point : str , harvest_catalogs : bool = False ):
30
30
self .fair_data_point = FairDataPoint (fdp_end_point )
31
+ self .harvest_catalogs = harvest_catalogs
31
32
32
33
def get_record_ids (self ) -> Dict .keys :
33
34
"""
34
35
Returns all the FDP records which should end up as packages in CKAN to populate the "guids_in_harvest" list
35
36
https://rdflib.readthedocs.io/en/stable/intro_to_parsing.html
36
37
"""
37
- log .debug ('FAIR Data Point get_records from {}' .format (self .fair_data_point .fdp_end_point ))
38
+ log .debug (
39
+ "FAIR Data Point get_records from {}" .format (
40
+ self .fair_data_point .fdp_end_point
41
+ )
42
+ )
38
43
39
44
result = dict ()
40
45
@@ -52,20 +57,21 @@ def _process_catalog(self, path: Union[str, URIRef]) -> Dict:
52
57
catalogs_graph = self .fair_data_point .get_graph (path )
53
58
54
59
for catalog_subject in catalogs_graph .subjects (RDF .type , DCAT .Catalog ):
55
- identifier = Identifier ('' )
60
+ identifier = Identifier ("" )
56
61
57
- identifier .add (' catalog' , str (catalog_subject ))
62
+ identifier .add (" catalog" , str (catalog_subject ))
58
63
59
- result [identifier .guid ] = catalog_subject
64
+ if self .harvest_catalogs :
65
+ result [identifier .guid ] = catalog_subject
60
66
61
67
catalog_graph = self .fair_data_point .get_graph (catalog_subject )
62
68
63
69
for dataset_subject in catalog_graph .objects (predicate = DCAT .dataset ):
64
- identifier = Identifier ('' )
70
+ identifier = Identifier ("" )
65
71
66
- identifier .add (' catalog' , str (catalog_subject ))
72
+ identifier .add (" catalog" , str (catalog_subject ))
67
73
68
- identifier .add (' dataset' , str (dataset_subject ))
74
+ identifier .add (" dataset" , str (dataset_subject ))
69
75
70
76
result [identifier .guid ] = dataset_subject
71
77
@@ -76,7 +82,10 @@ def get_record_by_id(self, guid: str) -> str:
76
82
Get additional information for FDP record.
77
83
"""
78
84
log .debug (
79
- 'FAIR data point get_record_by_id from {} for {}' .format (self .fair_data_point .fdp_end_point , guid ))
85
+ "FAIR data point get_record_by_id from {} for {}" .format (
86
+ self .fair_data_point .fdp_end_point , guid
87
+ )
88
+ )
80
89
81
90
identifier = Identifier (guid )
82
91
@@ -89,7 +98,9 @@ def get_record_by_id(self, guid: str) -> str:
89
98
self ._remove_fdp_defaults (g , subject_uri )
90
99
91
100
# Add information from distribution to graph
92
- for distribution_uri in g .objects (subject = subject_uri , predicate = DCAT .distribution ):
101
+ for distribution_uri in g .objects (
102
+ subject = subject_uri , predicate = DCAT .distribution
103
+ ):
93
104
distribution_g = self .fair_data_point .get_graph (distribution_uri )
94
105
95
106
self ._remove_fdp_defaults (g , distribution_uri )
@@ -99,17 +110,21 @@ def get_record_by_id(self, guid: str) -> str:
99
110
DCTERMS .format ,
100
111
DCTERMS .license ,
101
112
DCTERMS .title ,
102
- DCAT .accessURL
113
+ DCAT .accessURL ,
103
114
]:
104
- for distr_attribute_value in self .get_values (distribution_g , distribution_uri , predicate ):
115
+ for distr_attribute_value in self .get_values (
116
+ distribution_g , distribution_uri , predicate
117
+ ):
105
118
g .add ((distribution_uri , predicate , distr_attribute_value ))
106
119
107
120
# Look-up contact information
108
121
for contact_point_uri in self .get_values (g , subject_uri , DCAT .contactPoint ):
109
122
if isinstance (contact_point_uri , URIRef ):
110
- self ._parse_contact_point (g = g , subject_uri = subject_uri , contact_point_uri = contact_point_uri )
123
+ self ._parse_contact_point (
124
+ g = g , subject_uri = subject_uri , contact_point_uri = contact_point_uri
125
+ )
111
126
112
- result = g .serialize (format = ' ttl' )
127
+ result = g .serialize (format = " ttl" )
113
128
114
129
return result
115
130
@@ -123,19 +138,23 @@ def _parse_contact_point(g: Graph, subject_uri: URIRef, contact_point_uri: URIRe
123
138
g .add ((subject_uri , DCAT .contactPoint , vcard_node ))
124
139
g .add ((vcard_node , RDF .type , VCARD .Kind ))
125
140
g .add ((vcard_node , VCARD .hasUID , contact_point_uri ))
126
- if ' orcid' in str (contact_point_uri ):
141
+ if " orcid" in str (contact_point_uri ):
127
142
try :
128
- orcid_response = requests .get (str (contact_point_uri ).rstrip ('/' ) + '/public-record.json' )
143
+ orcid_response = requests .get (
144
+ str (contact_point_uri ).rstrip ("/" ) + "/public-record.json"
145
+ )
129
146
json_orcid_response = orcid_response .json ()
130
- name = json_orcid_response [' displayName' ]
147
+ name = json_orcid_response [" displayName" ]
131
148
g .add ((vcard_node , VCARD .fn , Literal (name )))
132
149
except (JSONDecodeError , HTTPError ) as e :
133
- log .error (f' Failed to get data from ORCID for { contact_point_uri } : { e } ' )
150
+ log .error (f" Failed to get data from ORCID for { contact_point_uri } : { e } " )
134
151
135
152
@staticmethod
136
- def get_values (graph : Graph ,
137
- subject : Union [str , URIRef , Node ],
138
- predicate : Union [str , URIRef , Node ]) -> Iterable [Node ]:
153
+ def get_values (
154
+ graph : Graph ,
155
+ subject : Union [str , URIRef , Node ],
156
+ predicate : Union [str , URIRef , Node ],
157
+ ) -> Iterable [Node ]:
139
158
subject_uri = URIRef (subject )
140
159
predicate_uri = URIRef (predicate )
141
160
@@ -144,8 +163,8 @@ def get_values(graph: Graph,
144
163
145
164
@staticmethod
146
165
def _remove_fdp_defaults (g , subject_uri ):
147
- for ( s , p , o ) in g .triples ((subject_uri , DCTERMS .accessRights , None )):
148
- access_rights_default = URIRef (f' { subject_uri } #accessRights' )
166
+ for s , p , o in g .triples ((subject_uri , DCTERMS .accessRights , None )):
167
+ access_rights_default = URIRef (f" { subject_uri } #accessRights" )
149
168
if o == access_rights_default :
150
169
g .remove ((subject_uri , DCTERMS .accessRights , o ))
151
170
g .remove ((access_rights_default , None , None ))
0 commit comments