Skip to content

Commit 4a39868

Browse files
authored
Merge pull request #22 from GenomicDataInfrastructure/fix-tags-validation
fix: improve tags validation
2 parents 00e7a43 + 2793ec8 commit 4a39868

File tree

2 files changed

+15
-7
lines changed

2 files changed

+15
-7
lines changed

ckanext/fairdatapoint/profiles.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,19 @@ def validate_tags(values_list: List[Dict]) -> List:
6969
tags = []
7070
for item in values_list:
7171
tag_value = item['name']
72-
find_illegal = re.search(illegal_pattern, tag_value)
73-
if find_illegal:
74-
log.warning(f'Tag {tag_value} contains values other than alphanumeric characters, spaces, hyphens, '
75-
f'underscores or dots, they will be replaces with spaces')
76-
tag = {'name': re.sub(illegal_pattern, ' ', tag_value)}
77-
tags.append(tag)
72+
if len(tag_value) < 2:
73+
log.warning(f'Tag {tag_value} is shorter than 2 characters and will be removed')
74+
elif len(tag_value) > 100:
75+
log.warning(f'Tag {tag_value} is longer than 100 characters and will be removed')
7876
else:
79-
tags.append(item)
77+
find_illegal = re.search(illegal_pattern, tag_value)
78+
if find_illegal:
79+
log.warning(f'Tag {tag_value} contains values other than alphanumeric characters, spaces, hyphens, '
80+
f'underscores or dots, they will be replaces with spaces')
81+
tag = {'name': re.sub(illegal_pattern, ' ', tag_value)}
82+
tags.append(tag)
83+
else:
84+
tags.append(item)
8085
return tags
8186

8287

ckanext/fairdatapoint/tests/test_profiles.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
([{"name": "CNS/Brain"}], [{"name": "CNS Brain"}]),
3333
([{"name": "COVID-19"}, {"name": "3`-DNA"}], [{"name": "COVID-19"}, {"name": "3 -DNA"}]),
3434
([{"name": "something-1.1"}, {"name": "breast cancer"}], [{"name": "something-1.1"}, {"name": "breast cancer"}]),
35+
([{"name": "-"}], []),
36+
([{"name": "It is a ridiculously long (more 100 chars) text for a tag therefore it should be removed from the "
37+
"result to prevent CKAN harvester from failing"}], []),
3538
([], [])
3639
])
3740
def test_validate_tags(input_tags, expected_tags):

0 commit comments

Comments
 (0)