1
+ # 1. Abstract interface definition: All database adapters need to implement these two methods
2
+ from abc import ABC , abstractmethod
3
+ from typing import List
4
+
5
+ from sboannotator .SBOannotator import multipleECs
6
+
7
+
8
+ class EnzymeDataAdapter (ABC ):
9
+ @abstractmethod
10
+ def extract_ids_from_annotation (self , annotation_string : str ) -> List [str ]:
11
+ """Extract database IDs from annotation"""
12
+ pass
13
+
14
+ @abstractmethod
15
+ def query_ec_numbers (self , database_id : str ) -> List [str ]:
16
+ """Query API based on database ID to get EC numbers"""
17
+ pass
18
+
19
+
20
+ # 2. KEGG database adapter implementation
21
+ class KEGGAdapter (EnzymeDataAdapter ):
22
+ def extract_ids_from_annotation (self , annotation_string : str ) -> List [str ]:
23
+ """Extract KEGG reaction ID from annotation, e.g. R10747"""
24
+ import re
25
+ pattern = r'kegg\.reaction/([R]\d+)'
26
+ return re .findall (pattern , annotation_string )
27
+
28
+ def query_ec_numbers (self , kegg_reaction_id : str ) -> List [str ]:
29
+ """Use KEGG REST API to query EC numbers"""
30
+ import requests
31
+ try :
32
+ url = f"http://rest.kegg.jp/get/rn:{ kegg_reaction_id } "
33
+ response = requests .get (url )
34
+
35
+ if response .status_code == 200 :
36
+ ec_numbers = []
37
+ for line in response .text .split ('\n ' ):
38
+ if line .startswith ('ENZYME' ):
39
+ # Example line: ENZYME 1.1.1.1
40
+ ec_num = line .split ()[1 ]
41
+ ec_numbers .append (ec_num )
42
+ return ec_numbers
43
+ except :
44
+ pass
45
+ return []
46
+
47
+
48
+ # 3. BiGG database adapter implementation
49
+ class BiGGAdapter (EnzymeDataAdapter ):
50
+ def extract_ids_from_annotation (self , annotation_string : str ) -> List [str ]:
51
+ """Extract BiGG reaction ID from annotation, get through reaction object ID"""
52
+ # BiGG adapter does not extract ID from annotation, but directly uses reaction ID
53
+ return []
54
+
55
+ def query_ec_numbers (self , reaction_id : str ) -> List [str ]:
56
+ """Use BiGG API to query EC numbers"""
57
+ import requests
58
+ import json
59
+
60
+ try :
61
+ # Remove reaction ID prefix 'R_' if exists
62
+ clean_id = reaction_id [2 :] if reaction_id .startswith ('R_' ) else reaction_id
63
+ url = f"http://bigg.ucsd.edu/api/v2/universal/reactions/{ clean_id } "
64
+ response = requests .get (url )
65
+
66
+ if response .status_code == 200 :
67
+ info = response .json ()
68
+ ec_numbers = []
69
+ if 'database_links' in info and 'EC Number' in info ['database_links' ]:
70
+ for link in info ['database_links' ]['EC Number' ]:
71
+ ec_numbers .append (link ['id' ])
72
+ return ec_numbers
73
+ except :
74
+ pass
75
+ return []
76
+
77
+ # 4. Reactome database adapter implementation (return parsing not yet implemented)
78
+ # class ReactomeAdapter(EnzymeDataAdapter):
79
+ # def extract_ids_from_annotation(self, annotation_string: str) -> List[str]:
80
+ # """Extract Reactome reaction ID from annotation, e.g. R-ATH-71850"""
81
+ # import re
82
+ # pattern = r'reactome\.reaction/(R-\w+-\d+)'
83
+ # return re.findall(pattern, annotation_string)
84
+ #
85
+ # def query_ec_numbers(self, reactome_id: str) -> List[str]:
86
+ # """Use Reactome REST API to query EC numbers (to be completed)"""
87
+ # import requests
88
+ # try:
89
+ # url = f"https://reactome.org/ContentService/data/query/{reactome_id}"
90
+ # response = requests.get(url)
91
+ #
92
+ # if response.status_code == 200:
93
+ # data = response.json()
94
+ # # TODO: Parse JSON data, extract EC numbers (depending on specific structure)
95
+ # return []
96
+ # except:
97
+ # pass
98
+ # return []
99
+
100
+
101
+ # 5. Unified provider: Integrate all Adapters, unified access to EC numbers
102
+ class UnifiedEnzymeDataProvider :
103
+ def __init__ (self ):
104
+ self .bigg_adapter = BiGGAdapter ()
105
+ self .kegg_adapter = KEGGAdapter ()
106
+ # self.reactome_adapter = ReactomeAdapter()
107
+
108
+ def get_ec_numbers_from_reaction (self , reaction ) -> List [str ]:
109
+ """Get EC numbers from all data sources in the reaction object"""
110
+ all_ec_numbers = []
111
+ annotation_string = reaction .getAnnotationString ()
112
+
113
+ # 1. First try BiGG API
114
+ bigg_ecs = self .bigg_adapter .query_ec_numbers (reaction .getId ())
115
+ all_ec_numbers .extend (bigg_ecs )
116
+
117
+ # 2. Then try KEGG
118
+ kegg_ids = self .kegg_adapter .extract_ids_from_annotation (annotation_string )
119
+ for kegg_id in kegg_ids :
120
+ kegg_ecs = self .kegg_adapter .query_ec_numbers (kegg_id )
121
+ all_ec_numbers .extend (kegg_ecs )
122
+
123
+ # # 3. Try Reactome (if needed)
124
+ # reactome_ids = self.reactome_adapter.extract_ids_from_annotation(annotation_string)
125
+ # for reactome_id in reactome_ids:
126
+ # reactome_ecs = self.reactome_adapter.query_ec_numbers(reactome_id)
127
+ # all_ec_numbers.extend(reactome_ecs)
128
+
129
+ return list (set (all_ec_numbers )) # Return after deduplication
130
+
131
+
132
+ # 6. New unified EC query function
133
+ def callForECAnnotRxnUnified (rxn ):
134
+ """
135
+ Use unified provider to get EC numbers from data sources like BiGG and KEGG,
136
+ and call multipleECs function to process results
137
+ """
138
+ provider = UnifiedEnzymeDataProvider ()
139
+ ECNums = provider .get_ec_numbers_from_reaction (rxn )
140
+
141
+ if ECNums :
142
+ from sboannotator .SBOannotator import multipleECs
143
+ multipleECs (rxn , ECNums )
144
+ else :
145
+ rxn .setSBOTerm ('SBO:0000176' ) # If no EC number found, still annotate as metabolic reaction
146
+
147
+ # doc = readSBML('../../models/BiGG_Models/iYO844.xml')
148
+ # model = doc.getModel()
0 commit comments