SBRG · JoshuaMeyers · Dec 7, 2020 · Dec 7, 2020
diff --git a/ssbio/databases/pdb.py b/ssbio/databases/pdb.py
@@ -312,11 +312,30 @@ def map_uniprot_resnum_to_pdb(uniprot_resnum, chain_id, sifts_file):
     # TODO: "Engineered_Mutation is also a possible annotation, need to figure out what to do with that
     my_pdb_annotation = False
 
-    # Find the right chain (entities in the xml doc)
+    # Find the right chain (entities in the xml doc).
+    # Note EntityID != ChainID. Entities are alphabetical e.g. Chains ('E','F','X') == Entities ('A','B','C')
+
+    # first find all chains in SIFTS file
     ent = './/{http://www.ebi.ac.uk/pdbe/docs/sifts/eFamily.xsd}entity'
-    for chain in root.findall(ent):
-        # TODO: IMPORTANT - entityId is not the chain ID!!! it is just in alphabetical order!
-        if chain.attrib['entityId'] == chain_id:
+    sifts_chain_ids = []
+    all_entities = root.findall(ent)
+    for i, chain in enumerate(all_entities):
+        # keep track of chain ids
+        uchains = './/{http://www.ebi.ac.uk/pdbe/docs/sifts/eFamily.xsd}crossRefDb[@dbSource="PDB"]'
+        my_chains = chain.findall(uchains)
+        if len(my_chains):
+            cid = my_chains[0].attrib['dbChainId']
+            if cid not in sifts_chain_ids:
+                sifts_chain_ids.append(cid)
+
+    # then assume alphabet mapping of chains -> entities
+    if chain_id not in sifts_chain_ids:
+        return None, False
+    sifts_entity_id = chr(ord('@') + (sifts_chain_ids.index(chain_id) + 1))
+
+    # find the right chain entity. and parse.
+    for chain in all_entities:
+        if chain.attrib['entityId'] == sifts_entity_id:
             # Find the "crossRefDb" tag that has the attributes dbSource="UniProt" and  dbResNum="your_resnum_here"
             # Then match it to the crossRefDb dbResNum that has the attribute dbSource="PDBresnum"
 

diff --git a/ssbio/test/conftest.py b/ssbio/test/conftest.py
@@ -101,4 +101,11 @@ def pdb_ids_obsolete():
 
 @pytest.fixture(scope='module')
 def pdb_ids_false():
-    return ['soda','meow','1984','pycharm']
+    return ['soda','meow','1984','pycharm']
+
+
+@pytest.fixture(scope='module')
+def sifts_xml(test_files_structures):
+    """ SIFTS XML file for protein structure with non-A,B chains """
+    # ssbio/test/test_files/structures/1atp.sifts.xml
+    return op.join(test_files_structures, '1atp.sifts.xml')
diff --git a/ssbio/test/test_databases_pdb.py b/ssbio/test/test_databases_pdb.py
@@ -16,6 +16,7 @@ def test_download_mmcif_header(pdb_ids_working, pdb_ids_obsolete, pdb_ids_false,
         with pytest.raises(URLError):
             pdb.download_mmcif_header(pdb_id=fp, outdir=test_files_tempdir, force_rerun=True)
 
+
 def test_download_sifts_xml(pdb_ids_working, pdb_ids_obsolete, pdb_ids_false, test_files_tempdir):
     for wp in pdb_ids_working:
         pdb.download_sifts_xml(pdb_id=wp, outdir=test_files_tempdir)
@@ -28,8 +29,18 @@ def test_download_sifts_xml(pdb_ids_working, pdb_ids_obsolete, pdb_ids_false, te
         with pytest.raises(URLError):
             pdb.download_sifts_xml(pdb_id=fp, outdir=test_files_tempdir, force_rerun=True)
 
-def test_map_uniprot_resnum_to_pdb(pdb_ids_working, pdb_ids_obsolete, pdb_ids_false, test_files_tempdir):
-    pass
+
+def test_map_uniprot_resnum_to_pdb(sifts_xml):
+    mapping_cases = [
+        # Tuple(inputs, expected_outputs)
+        ({'uniprot_resnum': 20, 'chain_id': 'I', 'sifts_file': sifts_xml}, (19, True)),
+        ({'uniprot_resnum': 20, 'chain_id': 'A', 'sifts_file': sifts_xml}, (None, False)),  # invalid chain
+        ({'uniprot_resnum': 999, 'chain_id': 'I', 'sifts_file': sifts_xml}, (None, False)),  # invalid res
+    ]
+    for inputs, outputs in mapping_cases:
+        mapped_resnum, is_observed = pdb.map_uniprot_resnum_to_pdb(**inputs)
+        assert (mapped_resnum, is_observed) == outputs
+
 
 def test_best_structures(pdb_ids_working, pdb_ids_obsolete, pdb_ids_false, test_files_tempdir):
     pass