draeger-lab
diff --git a/‎models/Annotated_Models/RECON1_SBOannotated.xml‎
Lines changed: 51 additions & 51 deletions b/‎models/Annotated_Models/RECON1_SBOannotated.xml‎
Lines changed: 51 additions & 51 deletions
diff --git a/‎models/Annotated_Models/RECON1_SBOannotated_enhanced.xml‎
Lines changed: 227456 additions & 0 deletions b/‎models/Annotated_Models/RECON1_SBOannotated_enhanced.xml‎
Lines changed: 227456 additions & 0 deletions
diff --git a/‎src/sboannotator/SBOannotatorEnhancedClass.py‎
Lines changed: 129 additions & 0 deletions b/‎src/sboannotator/SBOannotatorEnhancedClass.py‎
Lines changed: 129 additions & 0 deletions
diff --git a/‎src/sboannotator/__main__.py‎
Lines changed: 45 additions & 1 deletion b/‎src/sboannotator/__main__.py‎
Lines changed: 45 additions & 1 deletion
diff --git a/‎src/sboannotator/adapter.py‎
Lines changed: 148 additions & 0 deletions b/‎src/sboannotator/adapter.py‎
Lines changed: 148 additions & 0 deletions
@@ -0,0 +1,129 @@
+__author__ = 'Nantia Leonidou & Elisabeth Fritze & Enhanced with Unified Provider'
+
+""" SBOannotatorEnhanced Class: Same as original but with unified EC annotation """
+
+import sqlite3
+from libsbml import writeSBMLToFile
+from collections import Counter
+import requests
+import json
+from tqdm import tqdm
+from adapter import callForECAnnotRxnUnified
+
+# Import all original functions
+from SBOannotator import *
+
+class SBOannotatorEnhanced:
+    """Enhanced SBOannotator class with unified data sources"""
+    
+    def __init__(self, database_name):
+        """Initialize the enhanced SBOannotator"""
+        self.database_name = database_name
+    
+    def sbo_annotator_enhanced(self, doc, model_libsbml, modelType, new_filename):
+        """
+        Enhanced main function - identical to original except uses callForECAnnotRxnUnified
+        
+        Inputs:
+            doc: SBML document
+            model_libsbml (libsbml-model): input model (unannotated)
+            modelType (str): type of modelling framework
+            new_filename (str): file name for output model
+        Output:
+            Annotated libsbml model
+        """
+
+        # connect to database
+        con = sqlite3.connect(self.database_name)
+        cur = con.cursor()
+
+        try:
+            with open(self.database_name + '.sql') as schema:
+                cur.executescript(schema.read())
+        except:
+            try:
+                with open('create_dbs.sql') as schema:
+                    cur.executescript(schema.read())
+            except:
+                print("Warning: Could not load database schema")
+
+        for reaction in model_libsbml.reactions:
+            if not addSBOfromDB(reaction, cur):
+                # print(reaction.getId())
+                reaction.unsetSBOTerm()
+
+                # needs to be checked first
+                splitTransportBiochem(reaction)
+
+                checkBiomass(reaction)
+                checkSink(reaction)
+                checkExchange(reaction)
+                checkDemand(reaction)
+
+                # if transporter
+                if reaction.getSBOTermID() == 'SBO:0000655':
+                    checkPassiveTransport(reaction)
+                    checkActiveTransport(reaction)
+                    if reaction.getSBOTermID() != 'SBO:0000657':  # if not active
+                        checkCoTransport(reaction)
+                        if reaction.getSBOTermID() == 'SBO:0000654':  # if not co-transport
+                            splitSymAntiPorter(reaction)
+                # if metabolic reaction
+                if reaction.getSBOTermID() == 'SBO:0000176':
+                    addSBOviaEC(reaction, cur)  # use create_dbs.sql
+                # if no hit found in db and still annotated as generic biochemical reaction
+                if reaction.getSBOTermID() == 'SBO:0000176':
+                    checkRedox(reaction)
+                    (reaction)
+                    checkDecarbonylation(reaction)
+                    checkDecarboxylation(reaction)
+                    checkDeamination(reaction)
+                    checkPhosphorylation(reaction)
+
+        # If rxns still have general SBO term, assign more specific terms via EC numbers
+        print('\nAssign SBO terms via E.C. numbers (Enhanced with Unified Provider)... \n')
+        for reaction in tqdm(model_libsbml.reactions):
+
+            if reaction.getSBOTermID() == 'SBO:0000176':
+                # if EC number exists for reaction, use it to derive SBO term via DB use
+                if 'ec-code' in reaction.getAnnotationString():
+                    ECNums = getECNums(reaction)
+                    multipleECs(reaction, ECNums)
+                # Enhanced: if EC number does not exist for reaction, use unified provider
+                else:
+                    callForECAnnotRxnUnified(reaction)  # The only change!
+
+        addSBOforMetabolites(model_libsbml)
+
+        addSBOforGenes(model_libsbml)
+
+        addSBOforModel(doc, modelType)
+
+        addSBOforGroups(model_libsbml)
+
+        addSBOforParameters(model_libsbml)
+
+        addSBOforCompartments(model_libsbml)
+
+        addSBOforRateLaw(model_libsbml)
+
+        addSBOforEvents(model_libsbml)
+
+        write_to_file(model_libsbml, new_filename)
+        print(f'\nEnhanced model with SBO annotations written to {new_filename} ...\n')
+
+        # close database connection
+        cur.close()
+        con.close()
+
+        return model_libsbml
+
+
+# Provide the same function interface as the original SBOannotator.py
+def sbo_annotator_enhanced(doc, model_libsbml, modelType, database_name, new_filename):
+    """
+    Enhanced version of sbo_annotator function with unified provider
+    Usage is exactly the same as the original sbo_annotator
+    """
+    annotator = SBOannotatorEnhanced(database_name)
+    return annotator.sbo_annotator_enhanced(doc, model_libsbml, modelType, new_filename)
@@ -7,7 +7,7 @@
 
 start = time.time()
 
-doc = readSBML('../../models/BiGG_Models/iYO844.xml')
+doc = readSBML('../../models/BiGG_Models/RECON1.xml')
 model = doc.getModel()
 
 print('--------------------------------------------------------------------------------------------------------')
@@ -39,3 +39,47 @@
 
 end = time.time()
 print(f'\n🕑\033[32;40m SBOannotator done after:  {end - start} sec \033[0m')
+
+
+
+
+
+
+#from sboannotator import *
+from SBOannotatorEnhancedClass import *
+
+start_enhanced= time.time()
+
+# Load a fresh model for enhanced annotator to ensure fair comparison
+doc2 = readSBML('../../models/BiGG_Models/RECON1.xml')
+model2 = doc2.getModel()
+
+print('--------------------------------------------------------------------------------------------------------')
+print("➡️ \033[32;40m SBO Terms – Before (Enhanced):\033[0m")
+print('--------------------------------------------------------------------------------------------------------')
+print(f'Reactions: {printCounts(model2)[0]}')
+print(f'\nMetabolites: {printCounts(model2)[1]}')
+print(f'\nGenes: {printCounts(model2)[2]}')
+print(f'\nCompartments: {printCounts(model2)[3]}')
+print('--------------------------------------------------------------------------------------------------------')
+
+sbo_annotator_enhanced(doc2, model2, 'constraint-based','create_dbs', '../../models/Annotated_Models/'+model2.getId()+'_SBOannotated_enhanced.xml')
+
+print('--------------------------------------------------------------------------------------------------------')
+print("➡️ \033[32;40m SBO Terms – After (Enhanced):\033[0m")
+print('--------------------------------------------------------------------------------------------------------')
+print(f'Reactions: {printCounts(model2)[0]}')
+print(f'\nMetabolites: {printCounts(model2)[1]}')
+print(f'\nGenes: {printCounts(model2)[2]}')
+print(f'\nCompartments: {printCounts(model2)[3]}\n')
+print('--------------------------------------------------------------------------------------------------------')
+
+# counter-check which reactions remained without SBO annotation
+for r in model2.reactions:
+    if r.isSetSBOTerm() is False:
+        print('\n*********************')
+        print(f'No SBO set for reactions: {r.getId()}')
+        print('\n*********************')
+
+end_enhanced = time.time()
+print(f'\n🕑\033[32;40m Enhanced SBOannotator done after:  {end_enhanced - start_enhanced} sec \033[0m')
@@ -0,0 +1,148 @@
+# 1. Abstract interface definition: All database adapters need to implement these two methods
+from abc import ABC, abstractmethod
+from typing import List
+
+from sboannotator.SBOannotator import multipleECs
+
+
+class EnzymeDataAdapter(ABC):
+    @abstractmethod
+    def extract_ids_from_annotation(self, annotation_string: str) -> List[str]:
+        """Extract database IDs from annotation"""
+        pass
+
+    @abstractmethod
+    def query_ec_numbers(self, database_id: str) -> List[str]:
+        """Query API based on database ID to get EC numbers"""
+        pass
+
+
+# 2. KEGG database adapter implementation
+class KEGGAdapter(EnzymeDataAdapter):
+    def extract_ids_from_annotation(self, annotation_string: str) -> List[str]:
+        """Extract KEGG reaction ID from annotation, e.g. R10747"""
+        import re
+        pattern = r'kegg\.reaction/([R]\d+)'
+        return re.findall(pattern, annotation_string)
+
+    def query_ec_numbers(self, kegg_reaction_id: str) -> List[str]:
+        """Use KEGG REST API to query EC numbers"""
+        import requests
+        try:
+            url = f"http://rest.kegg.jp/get/rn:{kegg_reaction_id}"
+            response = requests.get(url)
+
+            if response.status_code == 200:
+                ec_numbers = []
+                for line in response.text.split('\n'):
+                    if line.startswith('ENZYME'):
+                        # Example line: ENZYME      1.1.1.1
+                        ec_num = line.split()[1]
+                        ec_numbers.append(ec_num)
+                return ec_numbers
+        except:
+            pass
+        return []
+
+
+# 3. BiGG database adapter implementation
+class BiGGAdapter(EnzymeDataAdapter):
+    def extract_ids_from_annotation(self, annotation_string: str) -> List[str]:
+        """Extract BiGG reaction ID from annotation, get through reaction object ID"""
+        # BiGG adapter does not extract ID from annotation, but directly uses reaction ID
+        return []
+    
+    def query_ec_numbers(self, reaction_id: str) -> List[str]:
+        """Use BiGG API to query EC numbers"""
+        import requests
+        import json
+        
+        try:
+            # Remove reaction ID prefix 'R_' if exists
+            clean_id = reaction_id[2:] if reaction_id.startswith('R_') else reaction_id
+            url = f"http://bigg.ucsd.edu/api/v2/universal/reactions/{clean_id}"
+            response = requests.get(url)
+            
+            if response.status_code == 200:
+                info = response.json()
+                ec_numbers = []
+                if 'database_links' in info and 'EC Number' in info['database_links']:
+                    for link in info['database_links']['EC Number']:
+                        ec_numbers.append(link['id'])
+                return ec_numbers
+        except:
+            pass
+        return []
+
+# 4. Reactome database adapter implementation (return parsing not yet implemented)
+# class ReactomeAdapter(EnzymeDataAdapter):
+#     def extract_ids_from_annotation(self, annotation_string: str) -> List[str]:
+#         """Extract Reactome reaction ID from annotation, e.g. R-ATH-71850"""
+#         import re
+#         pattern = r'reactome\.reaction/(R-\w+-\d+)'
+#         return re.findall(pattern, annotation_string)
+#
+#     def query_ec_numbers(self, reactome_id: str) -> List[str]:
+#         """Use Reactome REST API to query EC numbers (to be completed)"""
+#         import requests
+#         try:
+#             url = f"https://reactome.org/ContentService/data/query/{reactome_id}"
+#             response = requests.get(url)
+#
+#             if response.status_code == 200:
+#                 data = response.json()
+#                 # TODO: Parse JSON data, extract EC numbers (depending on specific structure)
+#                 return []
+#         except:
+#             pass
+#         return []
+
+
+# 5. Unified provider: Integrate all Adapters, unified access to EC numbers
+class UnifiedEnzymeDataProvider:
+    def __init__(self):
+        self.bigg_adapter = BiGGAdapter()
+        self.kegg_adapter = KEGGAdapter()
+        # self.reactome_adapter = ReactomeAdapter()
+
+    def get_ec_numbers_from_reaction(self, reaction) -> List[str]:
+        """Get EC numbers from all data sources in the reaction object"""
+        all_ec_numbers = []
+        annotation_string = reaction.getAnnotationString()
+        
+        # 1. First try BiGG API
+        bigg_ecs = self.bigg_adapter.query_ec_numbers(reaction.getId())
+        all_ec_numbers.extend(bigg_ecs)
+        
+        # 2. Then try KEGG
+        kegg_ids = self.kegg_adapter.extract_ids_from_annotation(annotation_string)
+        for kegg_id in kegg_ids:
+            kegg_ecs = self.kegg_adapter.query_ec_numbers(kegg_id)
+            all_ec_numbers.extend(kegg_ecs)
+        
+        # # 3. Try Reactome (if needed)
+        # reactome_ids = self.reactome_adapter.extract_ids_from_annotation(annotation_string)
+        # for reactome_id in reactome_ids:
+        #     reactome_ecs = self.reactome_adapter.query_ec_numbers(reactome_id)
+        #     all_ec_numbers.extend(reactome_ecs)
+
+        return list(set(all_ec_numbers))  # Return after deduplication
+
+
+# 6. New unified EC query function
+def callForECAnnotRxnUnified(rxn):
+    """
+    Use unified provider to get EC numbers from data sources like BiGG and KEGG,
+    and call multipleECs function to process results
+    """
+    provider = UnifiedEnzymeDataProvider()
+    ECNums = provider.get_ec_numbers_from_reaction(rxn)
+
+    if ECNums:
+        from sboannotator.SBOannotator import multipleECs
+        multipleECs(rxn, ECNums)
+    else:
+        rxn.setSBOTerm('SBO:0000176')  # If no EC number found, still annotate as metabolic reaction
+
+# doc = readSBML('../../models/BiGG_Models/iYO844.xml')
+# model = doc.getModel()