1
1
import logging
2
2
import yaml
3
- import csv
4
3
import deprecation
5
4
import requests
6
5
from functools import lru_cache , reduce
7
6
8
- from typing import List , Union , TextIO , Optional , Dict , Set
9
- from linkml_runtime .utils .schemaview import SchemaView , Namespaces
7
+ from typing import List , Union , TextIO , Optional , Dict
8
+ from linkml_runtime .utils .schemaview import SchemaView
10
9
from linkml_runtime .linkml_model .meta import (
11
10
SchemaDefinition ,
12
11
Element ,
15
14
ClassDefinition ,
16
15
SlotDefinition ,
17
16
)
18
- from pprint import pprint
19
17
from bmt .utils import format_element , parse_name
20
18
21
19
Url = str
@@ -202,7 +200,7 @@ def filter_values_on_slot(
202
200
slot_values : List [str ],
203
201
definition : SlotDefinition ,
204
202
field : str ,
205
- formatted : bool = True
203
+ formatted : bool = True
206
204
) -> bool :
207
205
"""
208
206
@@ -216,7 +214,7 @@ def filter_values_on_slot(
216
214
Name of embedded (slot) field rooting the tree of elements
217
215
against which the slot_values are to be matched.
218
216
formatted: bool = True
219
- Use of Biolink CURIE identifiers assumed when true
217
+ Use of Biolink CURIE identifiers assumed when True (default: True)
220
218
221
219
Returns
222
220
-------
@@ -230,9 +228,94 @@ def filter_values_on_slot(
230
228
if value :
231
229
value_set = self .get_descendants (value , formatted = formatted )
232
230
return any ([entry in slot_values for entry in value_set ])
231
+ if "description" in definition and definition ["description" ] is not None :
232
+ # In the case where the target 'field' is missing target details but the definition
233
+ # still has a 'description' field, we deflect responsibility for vetting the slot_values
234
+ # to the caller of the function (this is effectively saying 'all slot values are acceptable'
235
+ # in this position (although the description itself may informally constrain them otherwise)
236
+ return True
233
237
return False
234
238
235
- def match_slot_usage (self , element , slot : str , slot_values : List [str ]) -> bool :
239
+ @staticmethod
240
+ def get_local_slot_usage (element : Element , slot : str ) -> Optional [SlotDefinition ]:
241
+ """
242
+ Retrieve the definition of a specified slot from the 'slot_usage'
243
+ defined locally within the class model of the specified Element.
244
+
245
+ Parameters
246
+ ----------
247
+ element:
248
+ Element defining the specified 'slot' in its local 'slot_usage'.
249
+ slot: str
250
+ Name of the slot whose definition is to be retrieved.
251
+
252
+ Returns
253
+ -------
254
+ Optional[SlotDefinition]
255
+ None, if not available.
256
+
257
+ """
258
+ slot_definition : Optional [SlotDefinition ] = None
259
+ if "slot_usage" in element :
260
+ slot_usage = element ["slot_usage" ]
261
+ if slot_usage and slot in slot_usage :
262
+ slot_definition = slot_usage [slot ]
263
+ return slot_definition
264
+
265
+ def get_slot_usage (self , element : Element , slot : str ) -> Optional [SlotDefinition ]:
266
+ """
267
+ Get the definition of a specified slot from the 'slot_usage' of a specified Element.
268
+ A relatively deep search is made within the Element context - local class, parent class
269
+ and associated mixins - to discover an available 'slot_usage' definition for the slot.
270
+
271
+ Parameters
272
+ ----------
273
+ element:
274
+ Element defining the specified 'slot' in 'slot_usage'.
275
+ slot: str
276
+ Name of the slot whose definition is to be retrieved.
277
+
278
+ Returns
279
+ -------
280
+ Optional[SlotDefinition]
281
+ None, if not available.
282
+
283
+ """
284
+ # Check first for local referencing of the slot
285
+ slot_definition : Optional [SlotDefinition ] = self .get_local_slot_usage (element , slot )
286
+
287
+ # shallow (immediate parent) check up the class hierarchy...
288
+ # Note: we don't attempt a recursive search through ancestors for now
289
+ if slot_definition is None and "is_a" in element and element ["is_a" ]:
290
+ parent_name : str = element ["is_a" ]
291
+ parent : Element = self .get_element (parent_name )
292
+ slot_definition = self .get_local_slot_usage (parent , slot )
293
+
294
+ # if still empty-handed at this point follow the mixins
295
+ if slot_definition is None and "mixins" in element and element ["mixins" ]:
296
+ # 'slot_usage' for some fields may be inherited from the association mixins. For example:
297
+ #
298
+ # druggable gene to disease association
299
+ # mixins:
300
+ # - entity to disease association mixin
301
+ # - gene to entity association mixin
302
+ #
303
+ # the mixins would have a 'subject' slot_usage for Gene and 'object' usage for Disease
304
+ for mixin_name in element ["mixins" ]:
305
+ mixin : Element = self .get_element (mixin_name )
306
+ slot_definition = self .get_local_slot_usage (mixin , slot )
307
+ if slot_definition :
308
+ break
309
+
310
+ return slot_definition
311
+
312
+ def match_slot_usage (
313
+ self ,
314
+ element ,
315
+ slot : str ,
316
+ slot_values : List [str ],
317
+ formatted : bool = True
318
+ ) -> bool :
236
319
"""
237
320
Match slot_values against expected slot_usage for
238
321
specified slot in specified (association) element.
@@ -245,55 +328,78 @@ def match_slot_usage(self, element, slot: str, slot_values: List[str]) -> bool:
245
328
Name of target slot in given element, against which slot_usage is being assessed.
246
329
slot_values: List[str]
247
330
List of slot value (strings) e.g. categories, predicates, etc. - being assessed against slot_usage
331
+ formatted: bool = True
332
+ Use of Biolink CURIE identifiers in slot_values assumed when True (default: True)
248
333
249
334
Returns
250
335
-------
251
336
bool
252
- Returns 'True' if slot_values are compatible with slot usage within given element
337
+ Returns 'True' if slot exists and slot_values are compatible with slot usage
338
+ within the given element (or its immediate parent or mixins); False otherwise
253
339
254
340
"""
255
341
# scope of method sanity check for now
256
342
assert slot in ["subject" , "object" , "predicate" ]
257
343
258
- slot_definition : Optional [SlotDefinition ] = None
259
-
260
- if "slot_usage" in element :
261
- slot_usage = element ["slot_usage" ]
262
- if slot_usage and slot in slot_usage :
263
- slot_definition = slot_usage [slot ]
264
- elif "mixins" in element and element ["mixins" ]:
265
- # 'slot_usage' for some fields may be inherited
266
- # from the association mixins. For example:
267
- #
268
- # druggable gene to disease association
269
- # mixins:
270
- # - entity to disease association mixin
271
- # - gene to entity association mixin
272
- #
273
- # the mixins would have a 'subject' slot_usage
274
- # for Gene and 'object' usage for Disease
275
- #
276
- for mixin in element ["mixins" ]:
277
- mixin_element : Element = self .get_element (mixin )
278
- if "slot_usage" in mixin_element :
279
- slot_usage = mixin_element ["slot_usage" ]
280
- if slot_usage and slot in slot_usage :
281
- slot_definition = slot_usage [slot ]
282
- if slot_definition :
283
- break
344
+ slot_definition : Optional [SlotDefinition ] = self .get_slot_usage (element , slot )
284
345
285
346
# assess "slot_values" for "subject", "object"
286
347
# or "predicate" against stipulated constraints
287
348
if slot_definition :
288
349
if slot == "predicate" :
289
350
# check for a non-null "subproperty_of" constraint on a "predicate" slot_value
290
- return self .filter_values_on_slot (slot_values , slot_definition , "subproperty_of" )
351
+ return self .filter_values_on_slot (slot_values , slot_definition , "subproperty_of" , formatted = formatted )
291
352
else :
292
353
# check for a non-null "range" constraint on a "subject" or "object" slot_value
293
- return self .filter_values_on_slot (slot_values , slot_definition , "range" )
354
+ return self .filter_values_on_slot (slot_values , slot_definition , "range" , formatted = formatted )
355
+ elif slot == "predicate" :
356
+ # the default here if no 'predicate' slot_usage constraint is defined in the model,
357
+ # is to assume that any and all predicates are allowed for this specified subclass
358
+ # of biolink:Association. This is functionally identical to the 'description' property
359
+ # only slot definition (which doesn't computationally restrict things either).
360
+ return True
294
361
295
362
return False
296
363
364
+ def match_association (
365
+ self ,
366
+ assoc : Element ,
367
+ subj_cats : List [str ],
368
+ predicates : List [str ],
369
+ obj_cats : List [str ],
370
+ formatted : bool = True
371
+ ) -> bool :
372
+ """
373
+ Match a specified element (assumed to be a child of biolink:Association) to a given set of
374
+ Subject category -- Predicate -> Object category name constraints.
375
+
376
+ Parameters
377
+ ----------
378
+ assoc: Element
379
+ Subclass of biolink:Association to be matched.
380
+ subj_cats: List[str]
381
+ List of Biolink CURIEs of subject categories.
382
+ predicates: List[str]
383
+ List of Biolink CURIEs of predicates.
384
+ obj_cats: List[str]
385
+ List of Biolink CURIEs of object categories.
386
+ formatted: bool = True
387
+ Use of Biolink CURIE identifiers in 'subj_cats', 'preds' and 'obj_cats' assumed when True (default: True)
388
+
389
+ Returns
390
+ -------
391
+ bool:
392
+ True if all constraints match the slot_usage of the Association components.
393
+
394
+ """
395
+ if subj_cats and not self .match_slot_usage (assoc , "subject" , subj_cats , formatted = formatted ):
396
+ return False
397
+ if predicates and not self .match_slot_usage (assoc , "predicate" , predicates , formatted = formatted ):
398
+ return False
399
+ if obj_cats and not self .match_slot_usage (assoc , "object" , obj_cats , formatted = formatted ):
400
+ return False
401
+ return True
402
+
297
403
def get_associations (
298
404
self ,
299
405
subject_categories : Optional [List [str ]] = None ,
@@ -346,41 +452,27 @@ def get_associations(
346
452
inverse_predicates .append (inverse_p )
347
453
inverse_predicates = self ._format_all_elements (elements = inverse_predicates , formatted = True )
348
454
349
- def match_association (
350
- assoc : Element ,
351
- subj_cats : List [str ],
352
- preds : List [str ],
353
- obj_cats : List [str ],
354
-
355
- ) -> bool :
356
- if subj_cats and not self .match_slot_usage (assoc , "subject" , subj_cats ):
357
- return False
358
- if preds and not self .match_slot_usage (assoc , "predicate" , preds ):
359
- return False
360
- if obj_cats and not self .match_slot_usage (assoc , "object" , obj_cats ):
361
- return False
362
- return True
363
-
364
455
if subject_categories or predicates or object_categories :
365
456
# This feels like a bit of a brute force approach as an implementation,
366
457
# but we just use the list of all association names to retrieve each
367
458
# association record for filtering against the constraints?
368
459
for name in association_elements :
369
460
370
- association : Optional [Element ] = self .get_element (name )
371
- if not association :
372
- continue
461
+ # although get_element() is Optional[Element],
462
+ # the association_elements all come from
463
+ # get_descendants(), hence are assumed to be extant
464
+ association : Element = self .get_element (name )
373
465
374
466
# sanity checks, probably not necessary
375
467
# assert association, f"'{name}' not a Biolink Element?"
376
468
# assert isinstance(association, ClassDefinition), f"'{name}' not a ClassDefinition?"
377
469
378
470
# Try to match associations in the forward direction
379
471
if not (
380
- match_association (association , subject_categories , predicates , object_categories ) or
472
+ self . match_association (association , subject_categories , predicates , object_categories ) or
381
473
(
382
474
match_inverses and
383
- match_association (association , object_categories , inverse_predicates , subject_categories )
475
+ self . match_association (association , object_categories , inverse_predicates , subject_categories )
384
476
)
385
477
):
386
478
continue
0 commit comments