Skip to content

Commit 2e7ac9b

Browse files
Merge pull request #150 from biolink/tweak-pr-148-per-suggestions
tweak-pr-148-per-suggestions
2 parents 83b8cfd + bf8a748 commit 2e7ac9b

File tree

2 files changed

+266
-56
lines changed

2 files changed

+266
-56
lines changed

bmt/toolkit.py

Lines changed: 148 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
import logging
22
import yaml
3-
import csv
43
import deprecation
54
import requests
65
from functools import lru_cache, reduce
76

8-
from typing import List, Union, TextIO, Optional, Dict, Set
9-
from linkml_runtime.utils.schemaview import SchemaView, Namespaces
7+
from typing import List, Union, TextIO, Optional, Dict
8+
from linkml_runtime.utils.schemaview import SchemaView
109
from linkml_runtime.linkml_model.meta import (
1110
SchemaDefinition,
1211
Element,
@@ -15,7 +14,6 @@
1514
ClassDefinition,
1615
SlotDefinition,
1716
)
18-
from pprint import pprint
1917
from bmt.utils import format_element, parse_name
2018

2119
Url = str
@@ -202,7 +200,7 @@ def filter_values_on_slot(
202200
slot_values: List[str],
203201
definition: SlotDefinition,
204202
field: str,
205-
formatted: bool = True
203+
formatted: bool = True
206204
) -> bool:
207205
"""
208206
@@ -216,7 +214,7 @@ def filter_values_on_slot(
216214
Name of embedded (slot) field rooting the tree of elements
217215
against which the slot_values are to be matched.
218216
formatted: bool = True
219-
Use of Biolink CURIE identifiers assumed when true
217+
Use of Biolink CURIE identifiers assumed when True (default: True)
220218
221219
Returns
222220
-------
@@ -230,9 +228,94 @@ def filter_values_on_slot(
230228
if value:
231229
value_set = self.get_descendants(value, formatted=formatted)
232230
return any([entry in slot_values for entry in value_set])
231+
if "description" in definition and definition["description"] is not None:
232+
# In the case where the target 'field' is missing target details but the definition
233+
# still has a 'description' field, we deflect responsibility for vetting the slot_values
234+
# to the caller of the function (this is effectively saying 'all slot values are acceptable'
235+
# in this position (although the description itself may informally constrain them otherwise)
236+
return True
233237
return False
234238

235-
def match_slot_usage(self, element, slot: str, slot_values: List[str]) -> bool:
239+
@staticmethod
240+
def get_local_slot_usage(element: Element, slot: str) -> Optional[SlotDefinition]:
241+
"""
242+
Retrieve the definition of a specified slot from the 'slot_usage'
243+
defined locally within the class model of the specified Element.
244+
245+
Parameters
246+
----------
247+
element:
248+
Element defining the specified 'slot' in its local 'slot_usage'.
249+
slot: str
250+
Name of the slot whose definition is to be retrieved.
251+
252+
Returns
253+
-------
254+
Optional[SlotDefinition]
255+
None, if not available.
256+
257+
"""
258+
slot_definition: Optional[SlotDefinition] = None
259+
if "slot_usage" in element:
260+
slot_usage = element["slot_usage"]
261+
if slot_usage and slot in slot_usage:
262+
slot_definition = slot_usage[slot]
263+
return slot_definition
264+
265+
def get_slot_usage(self, element: Element, slot: str) -> Optional[SlotDefinition]:
266+
"""
267+
Get the definition of a specified slot from the 'slot_usage' of a specified Element.
268+
A relatively deep search is made within the Element context - local class, parent class
269+
and associated mixins - to discover an available 'slot_usage' definition for the slot.
270+
271+
Parameters
272+
----------
273+
element:
274+
Element defining the specified 'slot' in 'slot_usage'.
275+
slot: str
276+
Name of the slot whose definition is to be retrieved.
277+
278+
Returns
279+
-------
280+
Optional[SlotDefinition]
281+
None, if not available.
282+
283+
"""
284+
# Check first for local referencing of the slot
285+
slot_definition: Optional[SlotDefinition] = self.get_local_slot_usage(element, slot)
286+
287+
# shallow (immediate parent) check up the class hierarchy...
288+
# Note: we don't attempt a recursive search through ancestors for now
289+
if slot_definition is None and "is_a" in element and element["is_a"]:
290+
parent_name: str = element["is_a"]
291+
parent: Element = self.get_element(parent_name)
292+
slot_definition = self.get_local_slot_usage(parent, slot)
293+
294+
# if still empty-handed at this point follow the mixins
295+
if slot_definition is None and "mixins" in element and element["mixins"]:
296+
# 'slot_usage' for some fields may be inherited from the association mixins. For example:
297+
#
298+
# druggable gene to disease association
299+
# mixins:
300+
# - entity to disease association mixin
301+
# - gene to entity association mixin
302+
#
303+
# the mixins would have a 'subject' slot_usage for Gene and 'object' usage for Disease
304+
for mixin_name in element["mixins"]:
305+
mixin: Element = self.get_element(mixin_name)
306+
slot_definition = self.get_local_slot_usage(mixin, slot)
307+
if slot_definition:
308+
break
309+
310+
return slot_definition
311+
312+
def match_slot_usage(
313+
self,
314+
element,
315+
slot: str,
316+
slot_values: List[str],
317+
formatted: bool = True
318+
) -> bool:
236319
"""
237320
Match slot_values against expected slot_usage for
238321
specified slot in specified (association) element.
@@ -245,55 +328,78 @@ def match_slot_usage(self, element, slot: str, slot_values: List[str]) -> bool:
245328
Name of target slot in given element, against which slot_usage is being assessed.
246329
slot_values: List[str]
247330
List of slot value (strings) e.g. categories, predicates, etc. - being assessed against slot_usage
331+
formatted: bool = True
332+
Use of Biolink CURIE identifiers in slot_values assumed when True (default: True)
248333
249334
Returns
250335
-------
251336
bool
252-
Returns 'True' if slot_values are compatible with slot usage within given element
337+
Returns 'True' if slot exists and slot_values are compatible with slot usage
338+
within the given element (or its immediate parent or mixins); False otherwise
253339
254340
"""
255341
# scope of method sanity check for now
256342
assert slot in ["subject", "object", "predicate"]
257343

258-
slot_definition: Optional[SlotDefinition] = None
259-
260-
if "slot_usage" in element:
261-
slot_usage = element["slot_usage"]
262-
if slot_usage and slot in slot_usage:
263-
slot_definition = slot_usage[slot]
264-
elif "mixins" in element and element["mixins"]:
265-
# 'slot_usage' for some fields may be inherited
266-
# from the association mixins. For example:
267-
#
268-
# druggable gene to disease association
269-
# mixins:
270-
# - entity to disease association mixin
271-
# - gene to entity association mixin
272-
#
273-
# the mixins would have a 'subject' slot_usage
274-
# for Gene and 'object' usage for Disease
275-
#
276-
for mixin in element["mixins"]:
277-
mixin_element: Element = self.get_element(mixin)
278-
if "slot_usage" in mixin_element:
279-
slot_usage = mixin_element["slot_usage"]
280-
if slot_usage and slot in slot_usage:
281-
slot_definition = slot_usage[slot]
282-
if slot_definition:
283-
break
344+
slot_definition: Optional[SlotDefinition] = self.get_slot_usage(element, slot)
284345

285346
# assess "slot_values" for "subject", "object"
286347
# or "predicate" against stipulated constraints
287348
if slot_definition:
288349
if slot == "predicate":
289350
# check for a non-null "subproperty_of" constraint on a "predicate" slot_value
290-
return self.filter_values_on_slot(slot_values, slot_definition, "subproperty_of")
351+
return self.filter_values_on_slot(slot_values, slot_definition, "subproperty_of", formatted=formatted)
291352
else:
292353
# check for a non-null "range" constraint on a "subject" or "object" slot_value
293-
return self.filter_values_on_slot(slot_values, slot_definition, "range")
354+
return self.filter_values_on_slot(slot_values, slot_definition, "range", formatted=formatted)
355+
elif slot == "predicate":
356+
# the default here if no 'predicate' slot_usage constraint is defined in the model,
357+
# is to assume that any and all predicates are allowed for this specified subclass
358+
# of biolink:Association. This is functionally identical to the 'description' property
359+
# only slot definition (which doesn't computationally restrict things either).
360+
return True
294361

295362
return False
296363

364+
def match_association(
365+
self,
366+
assoc: Element,
367+
subj_cats: List[str],
368+
predicates: List[str],
369+
obj_cats: List[str],
370+
formatted: bool = True
371+
) -> bool:
372+
"""
373+
Match a specified element (assumed to be a child of biolink:Association) to a given set of
374+
Subject category -- Predicate -> Object category name constraints.
375+
376+
Parameters
377+
----------
378+
assoc: Element
379+
Subclass of biolink:Association to be matched.
380+
subj_cats: List[str]
381+
List of Biolink CURIEs of subject categories.
382+
predicates: List[str]
383+
List of Biolink CURIEs of predicates.
384+
obj_cats: List[str]
385+
List of Biolink CURIEs of object categories.
386+
formatted: bool = True
387+
Use of Biolink CURIE identifiers in 'subj_cats', 'preds' and 'obj_cats' assumed when True (default: True)
388+
389+
Returns
390+
-------
391+
bool:
392+
True if all constraints match the slot_usage of the Association components.
393+
394+
"""
395+
if subj_cats and not self.match_slot_usage(assoc, "subject", subj_cats, formatted=formatted):
396+
return False
397+
if predicates and not self.match_slot_usage(assoc, "predicate", predicates, formatted=formatted):
398+
return False
399+
if obj_cats and not self.match_slot_usage(assoc, "object", obj_cats, formatted=formatted):
400+
return False
401+
return True
402+
297403
def get_associations(
298404
self,
299405
subject_categories: Optional[List[str]] = None,
@@ -346,41 +452,27 @@ def get_associations(
346452
inverse_predicates.append(inverse_p)
347453
inverse_predicates = self._format_all_elements(elements=inverse_predicates, formatted=True)
348454

349-
def match_association(
350-
assoc: Element,
351-
subj_cats: List[str],
352-
preds: List[str],
353-
obj_cats: List[str],
354-
355-
) -> bool:
356-
if subj_cats and not self.match_slot_usage(assoc, "subject", subj_cats):
357-
return False
358-
if preds and not self.match_slot_usage(assoc, "predicate", preds):
359-
return False
360-
if obj_cats and not self.match_slot_usage(assoc, "object", obj_cats):
361-
return False
362-
return True
363-
364455
if subject_categories or predicates or object_categories:
365456
# This feels like a bit of a brute force approach as an implementation,
366457
# but we just use the list of all association names to retrieve each
367458
# association record for filtering against the constraints?
368459
for name in association_elements:
369460

370-
association: Optional[Element] = self.get_element(name)
371-
if not association:
372-
continue
461+
# although get_element() is Optional[Element],
462+
# the association_elements all come from
463+
# get_descendants(), hence are assumed to be extant
464+
association: Element = self.get_element(name)
373465

374466
# sanity checks, probably not necessary
375467
# assert association, f"'{name}' not a Biolink Element?"
376468
# assert isinstance(association, ClassDefinition), f"'{name}' not a ClassDefinition?"
377469

378470
# Try to match associations in the forward direction
379471
if not(
380-
match_association(association, subject_categories, predicates, object_categories) or
472+
self.match_association(association, subject_categories, predicates, object_categories) or
381473
(
382474
match_inverses and
383-
match_association(association, object_categories, inverse_predicates, subject_categories)
475+
self.match_association(association, object_categories, inverse_predicates, subject_categories)
384476
)
385477
):
386478
continue

0 commit comments

Comments
 (0)