Skip to content

Commit 06049ff

Browse files
authored
Merge pull request #4 from lareinahu-2023/feature/database/adapter_provider
Feature/database/adapter provider
2 parents ec1ac7c + a31069b commit 06049ff

File tree

6 files changed

+227898
-52
lines changed

6 files changed

+227898
-52
lines changed

models/Annotated_Models/RECON1_SBOannotated.xml

Lines changed: 51 additions & 51 deletions
Large diffs are not rendered by default.

models/Annotated_Models/RECON1_SBOannotated_enhanced.xml

Lines changed: 227456 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
__author__ = 'Nantia Leonidou & Elisabeth Fritze & Enhanced with Unified Provider'
2+
3+
""" SBOannotatorEnhanced Class: Same as original but with unified EC annotation """
4+
5+
import sqlite3
6+
from libsbml import writeSBMLToFile
7+
from collections import Counter
8+
import requests
9+
import json
10+
from tqdm import tqdm
11+
from adapter import callForECAnnotRxnUnified
12+
13+
# Import all original functions
14+
from SBOannotator import *
15+
16+
class SBOannotatorEnhanced:
17+
"""Enhanced SBOannotator class with unified data sources"""
18+
19+
def __init__(self, database_name):
20+
"""Initialize the enhanced SBOannotator"""
21+
self.database_name = database_name
22+
23+
def sbo_annotator_enhanced(self, doc, model_libsbml, modelType, new_filename):
24+
"""
25+
Enhanced main function - identical to original except uses callForECAnnotRxnUnified
26+
27+
Inputs:
28+
doc: SBML document
29+
model_libsbml (libsbml-model): input model (unannotated)
30+
modelType (str): type of modelling framework
31+
new_filename (str): file name for output model
32+
Output:
33+
Annotated libsbml model
34+
"""
35+
36+
# connect to database
37+
con = sqlite3.connect(self.database_name)
38+
cur = con.cursor()
39+
40+
try:
41+
with open(self.database_name + '.sql') as schema:
42+
cur.executescript(schema.read())
43+
except:
44+
try:
45+
with open('create_dbs.sql') as schema:
46+
cur.executescript(schema.read())
47+
except:
48+
print("Warning: Could not load database schema")
49+
50+
for reaction in model_libsbml.reactions:
51+
if not addSBOfromDB(reaction, cur):
52+
# print(reaction.getId())
53+
reaction.unsetSBOTerm()
54+
55+
# needs to be checked first
56+
splitTransportBiochem(reaction)
57+
58+
checkBiomass(reaction)
59+
checkSink(reaction)
60+
checkExchange(reaction)
61+
checkDemand(reaction)
62+
63+
# if transporter
64+
if reaction.getSBOTermID() == 'SBO:0000655':
65+
checkPassiveTransport(reaction)
66+
checkActiveTransport(reaction)
67+
if reaction.getSBOTermID() != 'SBO:0000657': # if not active
68+
checkCoTransport(reaction)
69+
if reaction.getSBOTermID() == 'SBO:0000654': # if not co-transport
70+
splitSymAntiPorter(reaction)
71+
# if metabolic reaction
72+
if reaction.getSBOTermID() == 'SBO:0000176':
73+
addSBOviaEC(reaction, cur) # use create_dbs.sql
74+
# if no hit found in db and still annotated as generic biochemical reaction
75+
if reaction.getSBOTermID() == 'SBO:0000176':
76+
checkRedox(reaction)
77+
(reaction)
78+
checkDecarbonylation(reaction)
79+
checkDecarboxylation(reaction)
80+
checkDeamination(reaction)
81+
checkPhosphorylation(reaction)
82+
83+
# If rxns still have general SBO term, assign more specific terms via EC numbers
84+
print('\nAssign SBO terms via E.C. numbers (Enhanced with Unified Provider)... \n')
85+
for reaction in tqdm(model_libsbml.reactions):
86+
87+
if reaction.getSBOTermID() == 'SBO:0000176':
88+
# if EC number exists for reaction, use it to derive SBO term via DB use
89+
if 'ec-code' in reaction.getAnnotationString():
90+
ECNums = getECNums(reaction)
91+
multipleECs(reaction, ECNums)
92+
# Enhanced: if EC number does not exist for reaction, use unified provider
93+
else:
94+
callForECAnnotRxnUnified(reaction) # The only change!
95+
96+
addSBOforMetabolites(model_libsbml)
97+
98+
addSBOforGenes(model_libsbml)
99+
100+
addSBOforModel(doc, modelType)
101+
102+
addSBOforGroups(model_libsbml)
103+
104+
addSBOforParameters(model_libsbml)
105+
106+
addSBOforCompartments(model_libsbml)
107+
108+
addSBOforRateLaw(model_libsbml)
109+
110+
addSBOforEvents(model_libsbml)
111+
112+
write_to_file(model_libsbml, new_filename)
113+
print(f'\nEnhanced model with SBO annotations written to {new_filename} ...\n')
114+
115+
# close database connection
116+
cur.close()
117+
con.close()
118+
119+
return model_libsbml
120+
121+
122+
# Provide the same function interface as the original SBOannotator.py
123+
def sbo_annotator_enhanced(doc, model_libsbml, modelType, database_name, new_filename):
124+
"""
125+
Enhanced version of sbo_annotator function with unified provider
126+
Usage is exactly the same as the original sbo_annotator
127+
"""
128+
annotator = SBOannotatorEnhanced(database_name)
129+
return annotator.sbo_annotator_enhanced(doc, model_libsbml, modelType, new_filename)

src/sboannotator/__main__.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
start = time.time()
99

10-
doc = readSBML('../../models/BiGG_Models/iYO844.xml')
10+
doc = readSBML('../../models/BiGG_Models/RECON1.xml')
1111
model = doc.getModel()
1212

1313
print('--------------------------------------------------------------------------------------------------------')
@@ -39,3 +39,47 @@
3939

4040
end = time.time()
4141
print(f'\n🕑\033[32;40m SBOannotator done after: {end - start} sec \033[0m')
42+
43+
44+
45+
46+
47+
48+
#from sboannotator import *
49+
from SBOannotatorEnhancedClass import *
50+
51+
start_enhanced= time.time()
52+
53+
# Load a fresh model for enhanced annotator to ensure fair comparison
54+
doc2 = readSBML('../../models/BiGG_Models/RECON1.xml')
55+
model2 = doc2.getModel()
56+
57+
print('--------------------------------------------------------------------------------------------------------')
58+
print("➡️ \033[32;40m SBO Terms – Before (Enhanced):\033[0m")
59+
print('--------------------------------------------------------------------------------------------------------')
60+
print(f'Reactions: {printCounts(model2)[0]}')
61+
print(f'\nMetabolites: {printCounts(model2)[1]}')
62+
print(f'\nGenes: {printCounts(model2)[2]}')
63+
print(f'\nCompartments: {printCounts(model2)[3]}')
64+
print('--------------------------------------------------------------------------------------------------------')
65+
66+
sbo_annotator_enhanced(doc2, model2, 'constraint-based','create_dbs', '../../models/Annotated_Models/'+model2.getId()+'_SBOannotated_enhanced.xml')
67+
68+
print('--------------------------------------------------------------------------------------------------------')
69+
print("➡️ \033[32;40m SBO Terms – After (Enhanced):\033[0m")
70+
print('--------------------------------------------------------------------------------------------------------')
71+
print(f'Reactions: {printCounts(model2)[0]}')
72+
print(f'\nMetabolites: {printCounts(model2)[1]}')
73+
print(f'\nGenes: {printCounts(model2)[2]}')
74+
print(f'\nCompartments: {printCounts(model2)[3]}\n')
75+
print('--------------------------------------------------------------------------------------------------------')
76+
77+
# counter-check which reactions remained without SBO annotation
78+
for r in model2.reactions:
79+
if r.isSetSBOTerm() is False:
80+
print('\n*********************')
81+
print(f'No SBO set for reactions: {r.getId()}')
82+
print('\n*********************')
83+
84+
end_enhanced = time.time()
85+
print(f'\n🕑\033[32;40m Enhanced SBOannotator done after: {end_enhanced - start_enhanced} sec \033[0m')

src/sboannotator/adapter.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# 1. Abstract interface definition: All database adapters need to implement these two methods
2+
from abc import ABC, abstractmethod
3+
from typing import List
4+
5+
from sboannotator.SBOannotator import multipleECs
6+
7+
8+
class EnzymeDataAdapter(ABC):
9+
@abstractmethod
10+
def extract_ids_from_annotation(self, annotation_string: str) -> List[str]:
11+
"""Extract database IDs from annotation"""
12+
pass
13+
14+
@abstractmethod
15+
def query_ec_numbers(self, database_id: str) -> List[str]:
16+
"""Query API based on database ID to get EC numbers"""
17+
pass
18+
19+
20+
# 2. KEGG database adapter implementation
21+
class KEGGAdapter(EnzymeDataAdapter):
22+
def extract_ids_from_annotation(self, annotation_string: str) -> List[str]:
23+
"""Extract KEGG reaction ID from annotation, e.g. R10747"""
24+
import re
25+
pattern = r'kegg\.reaction/([R]\d+)'
26+
return re.findall(pattern, annotation_string)
27+
28+
def query_ec_numbers(self, kegg_reaction_id: str) -> List[str]:
29+
"""Use KEGG REST API to query EC numbers"""
30+
import requests
31+
try:
32+
url = f"http://rest.kegg.jp/get/rn:{kegg_reaction_id}"
33+
response = requests.get(url)
34+
35+
if response.status_code == 200:
36+
ec_numbers = []
37+
for line in response.text.split('\n'):
38+
if line.startswith('ENZYME'):
39+
# Example line: ENZYME 1.1.1.1
40+
ec_num = line.split()[1]
41+
ec_numbers.append(ec_num)
42+
return ec_numbers
43+
except:
44+
pass
45+
return []
46+
47+
48+
# 3. BiGG database adapter implementation
49+
class BiGGAdapter(EnzymeDataAdapter):
50+
def extract_ids_from_annotation(self, annotation_string: str) -> List[str]:
51+
"""Extract BiGG reaction ID from annotation, get through reaction object ID"""
52+
# BiGG adapter does not extract ID from annotation, but directly uses reaction ID
53+
return []
54+
55+
def query_ec_numbers(self, reaction_id: str) -> List[str]:
56+
"""Use BiGG API to query EC numbers"""
57+
import requests
58+
import json
59+
60+
try:
61+
# Remove reaction ID prefix 'R_' if exists
62+
clean_id = reaction_id[2:] if reaction_id.startswith('R_') else reaction_id
63+
url = f"http://bigg.ucsd.edu/api/v2/universal/reactions/{clean_id}"
64+
response = requests.get(url)
65+
66+
if response.status_code == 200:
67+
info = response.json()
68+
ec_numbers = []
69+
if 'database_links' in info and 'EC Number' in info['database_links']:
70+
for link in info['database_links']['EC Number']:
71+
ec_numbers.append(link['id'])
72+
return ec_numbers
73+
except:
74+
pass
75+
return []
76+
77+
# 4. Reactome database adapter implementation (return parsing not yet implemented)
78+
# class ReactomeAdapter(EnzymeDataAdapter):
79+
# def extract_ids_from_annotation(self, annotation_string: str) -> List[str]:
80+
# """Extract Reactome reaction ID from annotation, e.g. R-ATH-71850"""
81+
# import re
82+
# pattern = r'reactome\.reaction/(R-\w+-\d+)'
83+
# return re.findall(pattern, annotation_string)
84+
#
85+
# def query_ec_numbers(self, reactome_id: str) -> List[str]:
86+
# """Use Reactome REST API to query EC numbers (to be completed)"""
87+
# import requests
88+
# try:
89+
# url = f"https://reactome.org/ContentService/data/query/{reactome_id}"
90+
# response = requests.get(url)
91+
#
92+
# if response.status_code == 200:
93+
# data = response.json()
94+
# # TODO: Parse JSON data, extract EC numbers (depending on specific structure)
95+
# return []
96+
# except:
97+
# pass
98+
# return []
99+
100+
101+
# 5. Unified provider: Integrate all Adapters, unified access to EC numbers
102+
class UnifiedEnzymeDataProvider:
103+
def __init__(self):
104+
self.bigg_adapter = BiGGAdapter()
105+
self.kegg_adapter = KEGGAdapter()
106+
# self.reactome_adapter = ReactomeAdapter()
107+
108+
def get_ec_numbers_from_reaction(self, reaction) -> List[str]:
109+
"""Get EC numbers from all data sources in the reaction object"""
110+
all_ec_numbers = []
111+
annotation_string = reaction.getAnnotationString()
112+
113+
# 1. First try BiGG API
114+
bigg_ecs = self.bigg_adapter.query_ec_numbers(reaction.getId())
115+
all_ec_numbers.extend(bigg_ecs)
116+
117+
# 2. Then try KEGG
118+
kegg_ids = self.kegg_adapter.extract_ids_from_annotation(annotation_string)
119+
for kegg_id in kegg_ids:
120+
kegg_ecs = self.kegg_adapter.query_ec_numbers(kegg_id)
121+
all_ec_numbers.extend(kegg_ecs)
122+
123+
# # 3. Try Reactome (if needed)
124+
# reactome_ids = self.reactome_adapter.extract_ids_from_annotation(annotation_string)
125+
# for reactome_id in reactome_ids:
126+
# reactome_ecs = self.reactome_adapter.query_ec_numbers(reactome_id)
127+
# all_ec_numbers.extend(reactome_ecs)
128+
129+
return list(set(all_ec_numbers)) # Return after deduplication
130+
131+
132+
# 6. New unified EC query function
133+
def callForECAnnotRxnUnified(rxn):
134+
"""
135+
Use unified provider to get EC numbers from data sources like BiGG and KEGG,
136+
and call multipleECs function to process results
137+
"""
138+
provider = UnifiedEnzymeDataProvider()
139+
ECNums = provider.get_ec_numbers_from_reaction(rxn)
140+
141+
if ECNums:
142+
from sboannotator.SBOannotator import multipleECs
143+
multipleECs(rxn, ECNums)
144+
else:
145+
rxn.setSBOTerm('SBO:0000176') # If no EC number found, still annotate as metabolic reaction
146+
147+
# doc = readSBML('../../models/BiGG_Models/iYO844.xml')
148+
# model = doc.getModel()

0 commit comments

Comments
 (0)