Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions ssbio/databases/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,11 +312,30 @@ def map_uniprot_resnum_to_pdb(uniprot_resnum, chain_id, sifts_file):
# TODO: "Engineered_Mutation is also a possible annotation, need to figure out what to do with that
my_pdb_annotation = False

# Find the right chain (entities in the xml doc)
# Find the right chain (entities in the xml doc).
# Note EntityID != ChainID. Entities are alphabetical e.g. Chains ('E','F','X') == Entities ('A','B','C')

# first find all chains in SIFTS file
ent = './/{http://www.ebi.ac.uk/pdbe/docs/sifts/eFamily.xsd}entity'
for chain in root.findall(ent):
# TODO: IMPORTANT - entityId is not the chain ID!!! it is just in alphabetical order!
if chain.attrib['entityId'] == chain_id:
sifts_chain_ids = []
all_entities = root.findall(ent)
for i, chain in enumerate(all_entities):
# keep track of chain ids
uchains = './/{http://www.ebi.ac.uk/pdbe/docs/sifts/eFamily.xsd}crossRefDb[@dbSource="PDB"]'
my_chains = chain.findall(uchains)
if len(my_chains):
cid = my_chains[0].attrib['dbChainId']
if cid not in sifts_chain_ids:
sifts_chain_ids.append(cid)

# then assume alphabet mapping of chains -> entities
if chain_id not in sifts_chain_ids:
return None, False
sifts_entity_id = chr(ord('@') + (sifts_chain_ids.index(chain_id) + 1))

# find the right chain entity. and parse.
for chain in all_entities:
if chain.attrib['entityId'] == sifts_entity_id:
# Find the "crossRefDb" tag that has the attributes dbSource="UniProt" and dbResNum="your_resnum_here"
# Then match it to the crossRefDb dbResNum that has the attribute dbSource="PDBresnum"

Expand Down
9 changes: 8 additions & 1 deletion ssbio/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,11 @@ def pdb_ids_obsolete():

@pytest.fixture(scope='module')
def pdb_ids_false():
return ['soda','meow','1984','pycharm']
return ['soda','meow','1984','pycharm']


@pytest.fixture(scope='module')
def sifts_xml(test_files_structures):
""" SIFTS XML file for protein structure with non-A,B chains """
# ssbio/test/test_files/structures/1atp.sifts.xml
return op.join(test_files_structures, '1atp.sifts.xml')
15 changes: 13 additions & 2 deletions ssbio/test/test_databases_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def test_download_mmcif_header(pdb_ids_working, pdb_ids_obsolete, pdb_ids_false,
with pytest.raises(URLError):
pdb.download_mmcif_header(pdb_id=fp, outdir=test_files_tempdir, force_rerun=True)


def test_download_sifts_xml(pdb_ids_working, pdb_ids_obsolete, pdb_ids_false, test_files_tempdir):
for wp in pdb_ids_working:
pdb.download_sifts_xml(pdb_id=wp, outdir=test_files_tempdir)
Expand All @@ -28,8 +29,18 @@ def test_download_sifts_xml(pdb_ids_working, pdb_ids_obsolete, pdb_ids_false, te
with pytest.raises(URLError):
pdb.download_sifts_xml(pdb_id=fp, outdir=test_files_tempdir, force_rerun=True)

def test_map_uniprot_resnum_to_pdb(pdb_ids_working, pdb_ids_obsolete, pdb_ids_false, test_files_tempdir):
pass

def test_map_uniprot_resnum_to_pdb(sifts_xml):
mapping_cases = [
# Tuple(inputs, expected_outputs)
({'uniprot_resnum': 20, 'chain_id': 'I', 'sifts_file': sifts_xml}, (19, True)),
({'uniprot_resnum': 20, 'chain_id': 'A', 'sifts_file': sifts_xml}, (None, False)), # invalid chain
({'uniprot_resnum': 999, 'chain_id': 'I', 'sifts_file': sifts_xml}, (None, False)), # invalid res
]
for inputs, outputs in mapping_cases:
mapped_resnum, is_observed = pdb.map_uniprot_resnum_to_pdb(**inputs)
assert (mapped_resnum, is_observed) == outputs


def test_best_structures(pdb_ids_working, pdb_ids_obsolete, pdb_ids_false, test_files_tempdir):
pass
Expand Down
Loading