Skip to content

Commit c878c51

Browse files
Merge pull request #97 from GenomicDataInfrastructure/fix/identifier-value-parsing
fix(identifier): correctly parse values containing '=' in Identifier
2 parents cad9f85 + 2b02ace commit c878c51

File tree

2 files changed

+18
-18
lines changed

2 files changed

+18
-18
lines changed

ckanext/fairdatapoint/harvesters/domain/identifier.py

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
class IdentifierException(Exception):
1111
pass
1212

13-
1413
class Identifier:
1514

1615
def __init__(self, guid: str):
@@ -31,22 +30,17 @@ def get_id_value(self) -> str:
3130
def get_part(self, index: int) -> str:
3231
key_values = self.guid.split(SEPARATOR)
3332

34-
if len(key_values) > 0:
35-
# Get the last one, that's the one we are interested in
36-
key_value = key_values[-1].split(KEY_VALUE_SEPARATOR)
37-
if len(key_value) == 2:
38-
result = key_value[index]
39-
else:
40-
raise IdentifierException(
41-
"Unexpected number of parts in key_value [{}]: [{}]",
42-
key_values[1],
43-
len(key_value),
44-
)
45-
else:
33+
if not self.guid.strip() or key_values == ['']:
34+
raise IdentifierException(
35+
f"Empty or improperly formatted record identifier: [{self.guid}]"
36+
)
37+
38+
key_value = key_values[-1].split(KEY_VALUE_SEPARATOR, 1)
39+
40+
if len(key_value) != 2:
4641
raise IdentifierException(
47-
"Unexpected number of parts in record identifier [{}]: [{}]",
48-
self.guid,
49-
len(key_values),
42+
f"Unexpected number of parts in key_value [{key_values[-1]}]: [{len(key_value)}]"
5043
)
5144

52-
return result
45+
return key_value[index]
46+

ckanext/fairdatapoint/tests/test_identifier.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,17 @@ def test_get_part_zero_index(self):
3333
assert actual == expected
3434

3535
def test_get_part_no_separator(self):
36-
with pytest.raises(IndexError):
36+
with pytest.raises(IdentifierException):
3737
identifier = Identifier("some_id_no_separator")
3838
part = identifier.get_part(index=0)
3939

4040
def test_get_part_raises(self):
4141
with pytest.raises(IdentifierException):
4242
identifier = Identifier("too_many;id_separators;in_an_id")
4343
part = identifier.get_part(index=1)
44+
45+
def test_get_id_type_and_value(self):
46+
guid = "dataset=http://example.com/resource?id=123"
47+
identifier = Identifier(guid)
48+
assert identifier.get_id_type() == "dataset"
49+
assert identifier.get_id_value() == "http://example.com/resource?id=123"

0 commit comments

Comments
 (0)