From 9fa1f0161512dbac60278dddfd57e0a5679e194b Mon Sep 17 00:00:00 2001 From: Renovate Bot Date: Mon, 11 Aug 2025 15:07:01 +0000 Subject: [PATCH 01/11] chore(deps): update actions/checkout action to v5 --- .github/workflows/generate_changelog.yml | 2 +- .github/workflows/main.yml | 2 +- .github/workflows/release.yml | 6 +++--- .github/workflows/test.yml | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/generate_changelog.yml b/.github/workflows/generate_changelog.yml index 385d01a..2b466f2 100644 --- a/.github/workflows/generate_changelog.yml +++ b/.github/workflows/generate_changelog.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: token: '${{ secrets.GITHUB_TOKEN }}' fetch-depth: 0 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c8e27d2..8b55501 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,7 +18,7 @@ jobs: permissions: contents: read steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: oss-review-toolkit/ort-ci-github-action@v1 with: allow-dynamic-versions: "true" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5c5796c..3d93cf5 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: oss-review-toolkit/ort-ci-github-action@v1 with: allow-dynamic-versions: "true" @@ -33,7 +33,7 @@ jobs: new_tag: ${{ steps.tagging.outputs.new_tag }} steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 @@ -73,7 +73,7 @@ jobs: needs: versioning steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Install GitHub CLI run: sudo apt-get install -y gh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 560c787..7d8746e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -37,7 +37,7 @@ jobs: CKAN_REDIS_URL: redis://redis:6379/1 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install requirements (common) run: | pip install -r requirements.txt @@ -78,6 +78,6 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: REUSE Compliance Check uses: fsfe/reuse-action@v5 From 02c7b4bf058c995c30578a73232fd4c2eba1f67c Mon Sep 17 00:00:00 2001 From: Renovate Bot Date: Fri, 19 Sep 2025 06:07:14 +0000 Subject: [PATCH 02/11] chore(deps): update dependency rdflib to ~=7.2.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1c08861..39b2b08 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: AGPL-3.0-only -rdflib~=7.1.0 +rdflib~=7.2.1 setuptools~=80.9.0 nose~=1.3.7 requests~=2.32.3 \ No newline at end of file From d5fc4789a68313d4f4ceca651e6e166d3e17a217 Mon Sep 17 00:00:00 2001 From: Hans-christian Date: Wed, 24 Sep 2025 15:54:00 +0200 Subject: [PATCH 03/11] feat(multi-lingual-support) Added extra sanitasation on translated tags --- ckanext/fairdatapoint/profiles.py | 32 +++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/ckanext/fairdatapoint/profiles.py b/ckanext/fairdatapoint/profiles.py index 9d6b3fc..ac24583 100644 --- a/ckanext/fairdatapoint/profiles.py +++ b/ckanext/fairdatapoint/profiles.py @@ -64,12 +64,40 @@ class FAIRDataPointDCATAPProfile(EuropeanHealthDCATAPProfile): def parse_dataset(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict: super(FAIRDataPointDCATAPProfile, self).parse_dataset(dataset_dict, dataset_ref) - dataset_dict['tags'] = validate_tags(dataset_dict['tags']) + tags_translated = dataset_dict.get('tags_translated') + if isinstance(tags_translated, dict): + dataset_dict['tags_translated'] = self._sanitize_tags_translated(tags_translated) + + default_lang_tags = dataset_dict['tags_translated'].get(self._default_lang) or next( + (values for values in dataset_dict['tags_translated'].values() if values), + [] + ) + dataset_dict['tags'] = [{'name': tag} for tag in default_lang_tags] + + dataset_dict['tags'] = validate_tags(dataset_dict.get('tags', [])) dataset_dict = self._fix_wikidata_uris(dataset_dict, PACKAGE_REPLACE_FIELDS) return dataset_dict + def _sanitize_tags_translated(self, tags_translated: Dict[str, List[str]]) -> Dict[str, List[str]]: + """Remove invalid multilingual tags to satisfy CKAN length rules.""" + + sanitized: Dict[str, List[str]] = {} + + for lang, values in tags_translated.items(): + tag_dicts = [{'name': value} for value in values if value] + cleaned = validate_tags(tag_dicts) + sanitized[lang] = [tag['name'] for tag in cleaned] + + if len(values) != len(sanitized[lang]): + log.warning( + 'Removed invalid tags for language %s during multilingual sanitation', + lang + ) + + return sanitized + @staticmethod def _rewrite_wikidata_url(uri: str) -> str: """This function fixes Wikidata URIs to use references instead of web URI @@ -110,4 +138,4 @@ def _fix_wikidata_uris(self, dataset_dict: dict, fields_list: list[str]): else: new_value = self._rewrite_wikidata_url(value) dataset_dict[field] = new_value - return dataset_dict \ No newline at end of file + return dataset_dict From 29cf6c4f646962db6faaa17c94c028493ff1b2df Mon Sep 17 00:00:00 2001 From: Hans-christian Date: Fri, 26 Sep 2025 11:05:24 +0200 Subject: [PATCH 04/11] update test.yml --- .github/workflows/test.yml | 97 +- .../scheming/schemas/gdi_userportal.yaml | 965 +++++++++++++++++- test.ini | 6 +- 3 files changed, 971 insertions(+), 97 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 560c787..1137cea 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,7 +27,7 @@ jobs: POSTGRES_DB: postgres options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 redis: - image: redis:8 + image: redis:8 env: CKAN_SQLALCHEMY_URL: postgresql://ckan_default:pass@postgres/ckan_test @@ -36,48 +36,59 @@ jobs: CKAN_SOLR_URL: http://solr:8983/solr/ckan CKAN_REDIS_URL: redis://redis:6379/1 + permissions: + contents: read + packages: write + steps: - - uses: actions/checkout@v4 - - name: Install requirements (common) - run: | - pip install -r requirements.txt - pip install -r dev-requirements.txt - pip install -e . - - name: Setup CKAN extensions (harvest, scheming, dcat) - run: | - # Harvest v1.6.1 from GitHub - git clone https://github.com/ckan/ckanext-harvest - cd ckanext-harvest - git checkout tags/v1.6.1 - pip install -e . - pip install -r requirements.txt + - uses: actions/checkout@v5 + - name: REUSE Compliance Check + uses: fsfe/reuse-action@v5 + - name: Install requirements (common) + run: | + pip install -r requirements.txt + pip install -r dev-requirements.txt + pip install -e . + - name: Setup CKAN extensions (harvest, scheming, dcat, fluent) + run: | + # Harvest v1.6.1 from GitHub + git clone https://github.com/ckan/ckanext-harvest + cd ckanext-harvest + git checkout tags/v1.6.1 + pip install -e . + pip install -r requirements.txt + cd .. + + # Scheming release 3.1.0 + pip install -e 'git+https://github.com/ckan/ckanext-scheming.git@release-3.1.0#egg=ckanext-scheming[requirements]' - # Scheming (Civity fork) - pip install -e 'git+https://github.com/CivityNL/ckanext-scheming.git@3.0.0-civity-1#egg=ckanext-scheming[requirements]' + # DCAT extension for FAIR Data Point + git clone https://github.com/GenomicDataInfrastructure/gdi-userportal-ckanext-dcat + cd gdi-userportal-ckanext-dcat + git checkout v2.3.3 + pip install -e . + if [ -f requirements.txt ]; then + pip install -r requirements.txt + fi + cd .. - git clone https://github.com/GenomicDataInfrastructure/gdi-userportal-ckanext-dcat - cd gdi-userportal-ckanext-dcat - git checkout master - pip install -e . - pip install -r requirements.txt - - name: Setup extension - run: | - sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini - ckan -c test.ini db init - ckan -c test.ini db pending-migrations --apply - - name: Run tests - run: | - pytest --ckan-ini=test.ini --cov=ckanext.fairdatapoint --disable-warnings ckanext/fairdatapoint - - name: Generate coverage report - run: | - coverage xml -o coverage.xml - - name: Install unzip - run: apt-get update && apt-get install -y unzip - - name: SonarCloud Scan - uses: sonarsource/sonarcloud-github-action@v5 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any - SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} - - uses: actions/checkout@v4 - - name: REUSE Compliance Check - uses: fsfe/reuse-action@v5 + # Fluent extension + pip install -e 'git+https://github.com/ckan/ckanext-fluent.git#egg=ckanext-fluent' + - name: Setup extension + run: | + sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini + ckan -c test.ini db init + ckan -c test.ini db pending-migrations --apply + - name: Run tests + run: | + pytest --ckan-ini=test.ini --cov=ckanext.fairdatapoint --disable-warnings ckanext/fairdatapoint + - name: Generate coverage report + run: | + coverage xml -o coverage.xml + - name: Install unzip + run: apt-get update && apt-get install -y unzip + - name: SonarCloud Scan + uses: sonarsource/sonarcloud-github-action@v5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} diff --git a/ckanext/fairdatapoint/tests/test_data/scheming/schemas/gdi_userportal.yaml b/ckanext/fairdatapoint/tests/test_data/scheming/schemas/gdi_userportal.yaml index 690deaa..854a64b 100644 --- a/ckanext/fairdatapoint/tests/test_data/scheming/schemas/gdi_userportal.yaml +++ b/ckanext/fairdatapoint/tests/test_data/scheming/schemas/gdi_userportal.yaml @@ -1,77 +1,940 @@ -#SPDX-FileCopyrightText: 2024 PNED G.I.E. +# SPDX-FileCopyrightText: 2024 PNED G.I.E. # -#SPDX-License-Identifier: Apache-2.0 +# SPDX-License-Identifier: Apache-2.0 scheming_version: 2 dataset_type: dataset -about: DCAT-AP 3 compatible schema -about_url: http://github.com/ckan/ckanext-dcat +about: HealthDCAT-AP schema extended with GDI-specific fields +about_url: https://github.com/GenomicDataInfrastructure/gdi-userportal-ckan-docker + +form_languages: [en, nl] dataset_fields: -- field_name: has_version - label: - en: Has Version - nl: Bevat Versie - preset: multiple_text - help_inline: true - help_text: - en: "[dct:hasVersion] This property refers to a related Dataset that is a version, edition, or adaptation of the described Dataset." - nl: "[dct:hasVersion] Deze eigenschap verwijst naar een gerelateerde Dataset die een versie, editie of aanpassing is van de beschreven Dataset." +- field_name: title_translated + label: Title + preset: fluent_core_translated + help_text: A descriptive title for the dataset. + +- field_name: name + label: URL + preset: dataset_slug + form_placeholder: eg. my-dataset + +- field_name: notes_translated + label: Description + preset: fluent_core_translated + form_snippet: fluent_markdown.html + display_snippet: fluent_markdown.html + help_text: A free-text account of the dataset. + +- field_name: tags_translated + label: Keywords + preset: fluent_tags + form_placeholder: eg. economy, mental health, government + help_text: Keywords or tags describing the dataset. Use commas to separate multiple values. + +- field_name: contact + label: Contact points + repeating_label: Contact point + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: name + label: Name + + - field_name: name_translated + label: Name (translations) + preset: fluent_core_translated + help_text: Name of the contact point in each language. + + - field_name: email + label: Email + display_snippet: email.html + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the contact point. Such as a ROR ID. + + help_text: Contact information for enquiries about the dataset. + +- field_name: publisher + label: Publisher + repeating_label: Publisher + repeating_once: true + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: name + label: Name + + - field_name: name_translated + label: Name (translations) + preset: fluent_core_translated + help_text: Name of the entity or person who published the dataset in each language. + + - field_name: email + label: Email + display_snippet: email.html + + - field_name: url + label: URL + display_snippet: link.html + + - field_name: type + label: Type + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the publisher, such as a ROR ID. + help_text: Entity responsible for making the dataset available. + +- field_name: creator + label: Creator + repeating_label: Creator + repeating_once: true + repeating_subfields: + + - field_name: uri + label: URI + help_text: URI of the creator, if available. + + - field_name: name + label: Name + help_text: Name of the entity or person who created the dataset. + - field_name: name_translated + label: Name (translations) + preset: fluent_core_translated + help_text: Name of the entity or person who created the dataset in each language. + + - field_name: email + label: Email + display_snippet: email.html + help_text: Contact email of the creator. + + - field_name: url + label: URL + display_snippet: link.html + help_text: URL for more information about the creator. + + - field_name: type + label: Type + help_text: Type of creator (e.g., Organization, Person). + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the creator, such as an ORCID or ROR ID. + +- field_name: license_id + label: License + form_snippet: license.html + help_text: License definitions and additional information can be found at http://opendefinition.org/. + +- field_name: owner_org + label: Organization + preset: dataset_organization + help_text: The CKAN organization the dataset belongs to. + +- field_name: url + label: Landing page + form_placeholder: http://example.com/dataset.json + display_snippet: link.html + help_text: Web page that can be navigated to gain access to the dataset, its distributions and/or additional information. + +# Note: this will fall back to metadata_created if not present - field_name: issued - label: - en: Issued Date - nl: Uitgegeven Datum + label: Release date preset: datetime_flex - help_text: - en: "[dct:issued] This property contains the date of formal issuance (e.g., publication) of the Dataset." - nl: "[dct:issued] Deze eigenschap bevat de datum van formele uitgave (bijv. publicatie) van de Dataset." + help_text: Date of publication of the dataset. +# Note: this will fall back to metadata_modified if not present - field_name: modified - label: - en: Modification Date - nl: Datum Wijziging + label: Modification date preset: datetime_flex - help_text: - en: "[dct:modified] This property contains the most recent date on which the Dataset was changed or modified." - nl: "[dct:modified] Deze eigenschap bevat de meest recente datum waarop de Dataset is gewijzigd of gewijzigd." + help_text: Most recent date on which the dataset was changed, updated or modified. - field_name: temporal_start - label: - en: Temporal Start Date - nl: Begindatum tijdsperiode - help_inline: true - help_text: - en: "[dct:temporal] This property refers to a temporal period that the Dataset covers." - nl: "[dct:temporal] Deze eigenschap verwijst naar een tijdsperiode die door de Dataset wordt gedekt." + label: Temporal start date preset: datetime_flex + help_inline: true + help_text: Start of the time period that the dataset covers. - field_name: temporal_end - label: - en: Temporal End Date - nl: Einddatum tijdsperiode - help_inline: true - help_text: - en: "[dct:temporal] This property refers to a temporal period that the Dataset covers." - nl: "[dct:temporal] Deze eigenschap verwijst naar een tijdsperiode die door de Dataset wordt gedekt." + label: Temporal end date preset: datetime_flex + help_inline: true + help_text: End of the time period that the dataset covers. + +- field_name: in_series + label: In series + form_snippet: multiple_select.html + display_snippet: multiple_choice.html + validators: ignore_missing series_validator + output_validators: scheming_multiple_choice_output + convert: convert_to_extras + choices_helper: in_series_choices + form_select_attrs: + data-module: autocomplete + class: ~ + help_text: Link this dataset to one or more dataset series. + +- field_name: version + label: Version + validators: ignore_missing unicode_safe package_version_validator + help_text: Version number or other version designation of the dataset. + +- field_name: version_notes + label: Version notes + preset: fluent_markdown + help_text: A description of the differences between this version and a previous version of the dataset. + +# Note: CKAN will generate a unique identifier for each dataset +- field_name: identifier + label: Identifier + help_text: A unique identifier of the dataset. + +- field_name: frequency + label: Frequency + help_text: The frequency at which dataset is published. + +- field_name: provenance + label: Provenance + preset: fluent_markdown + help_text: A statement about the lineage of the dataset. + +- field_name: dcat_type + label: Type + help_text: The type of the dataset. + # TODO: controlled vocabulary? + +- field_name: temporal_coverage + label: Temporal coverage + repeating_subfields: + + - field_name: start + label: Start + preset: datetime_flex + + - field_name: end + label: End + preset: datetime_flex + help_text: The temporal period or periods the dataset covers. + +- field_name: temporal_resolution + label: Temporal resolution + help_text: Minimum time period resolvable in the dataset. + +- field_name: spatial_coverage + label: Spatial coverage + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: text + label: Label + + - field_name: geom + label: Geometry + + - field_name: bbox + label: Bounding Box + + - field_name: centroid + label: Centroid + help_text: A geographic region that is covered by the dataset. + +- field_name: spatial_resolution_in_meters + label: Spatial resolution in meters + help_text: Minimum spatial separation resolvable in a dataset, measured in meters. + +- field_name: access_rights + label: Access rights + validators: ignore_missing unicode_safe + help_text: Information that indicates whether the dataset is Open Data, has access restrictions or is not public. + +- field_name: alternate_identifier + label: Other identifier + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: This property refers to a secondary identifier of the dataset, such as MAST/ADS, DataCite, DOI, etc. + +- field_name: theme + label: Theme + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: A category of the dataset. A Dataset may be associated with multiple themes. + +- field_name: language + label: Language + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: Language or languages of the dataset. + # TODO: language form snippet / validator / graph + +- field_name: documentation + label: Documentation + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: A page or document about this dataset. + +- field_name: conforms_to + label: Conforms to + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: An implementing rule or other specification that the dataset follows. + +- field_name: is_referenced_by + label: Is referenced by + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: A related resource, such as a publication, that references, cites, or otherwise points to the dataset. + +- field_name: analytics + label: Analytics + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: > + An analytics distribution of the dataset. + Publishers are encouraged to provide URLs pointing to API endpoints or document + repositories where users can access or request associated resources such as + technical reports of the dataset, quality measurements, usability indicators,... + or analytics services. + +- field_name: applicable_legislation + label: Applicable legislation + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: The legislation that mandates the creation or management of the dataset. + +- field_name: has_version + label: Has version + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_inline: true + help_text: This property refers to a related Dataset that is a version, edition, or adaptation of the described Dataset. + +- field_name: code_values + label: Code values + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: Health classifications and their codes associated with the dataset. + +- field_name: coding_system + label: Coding system + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: > + Coding systems in use (e.g., ICD-10-CM, DGRs, SNOMED CT, ...). + To comply with HealthDCAT-AP, Wikidata URIs MUST be used. + +- field_name: purpose + label: Purpose + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: A free text statement of the purpose of the processing of data or personal data. + +- field_name: health_category + label: Health category + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: > + The health category to which this dataset belongs as described in the Commission Regulation on + the European Health Data Space laying down a list of categories of electronic data for + secondary use, Art.33. + +- field_name: health_theme + label: Health theme + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: > + A category of the Dataset or tag describing the Dataset. + +- field_name: legal_basis + label: Legal basis + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: The legal basis used to justify processing of personal data. + +- field_name: min_typical_age + label: Minimum typical age + validators: ignore_missing int_validator + form_snippet: number.html + help_text: Minimum typical age of the population within the dataset. + +- field_name: max_typical_age + label: Maximum typical age + validators: ignore_missing int_validator + form_snippet: number.html + help_text: Maximum typical age of the population within the dataset. + +- field_name: number_of_records + label: Number of records + validators: ignore_missing int_validator + form_snippet: number.html + help_text: Size of the dataset in terms of the number of records. + +- field_name: number_of_unique_individuals + label: Number of records for unique individuals. + validators: ignore_missing int_validator + form_snippet: number.html + help_text: Number of records for unique individuals. + +- field_name: personal_data + label: Personal data + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: Key elements that represent an individual in the dataset. + +- field_name: publisher_note + label: Publisher note + preset: fluent_markdown + help_text: A description of the publisher activities. + +- field_name: publisher_type + label: Publisher type + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: A type of organisation that makes the Dataset available. + +- field_name: trusted_data_holder + label: Trusted Data Holder + preset: select + choices: + - value: false + label: "No" + - value: true + label: "Yes" + validators: ignore_missing boolean_validator + help_text: Indicates whether the dataset is held by a trusted data holder. + output_validators: boolean_validator + +- field_name: population_coverage + label: Population coverage + preset: fluent_markdown + help_text: A definition of the population within the dataset. + +- field_name: retention_period + label: Retention period + repeating_subfields: + + - field_name: start + label: Start + preset: datetime_flex + + - field_name: end + label: End + preset: datetime_flex + + help_text: A temporal period which the dataset is available for secondary use. + +- field_name: hdab + label: Health data access body + repeating_label: Health data access body + repeating_once: true + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: name + label: Name + + - field_name: name_translated + label: Name (translations) + preset: fluent_core_translated + help_text: Name of the health data access body in each language. + + - field_name: email + label: Email + display_snippet: email.html + + - field_name: url + label: URL + display_snippet: link.html + + - field_name: type + label: Type + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the HDAB, such as a ROR ID. + help_text: Health Data Access Body supporting access to data in the Member State. + +- field_name: qualified_relation + label: Qualified relation + repeating_label: Relationship + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: relation + label: Relation + help_text: The resource related to the source resource. + + - field_name: role + label: Role + help_text: The function of an entity or agent with respect to another entity or resource. + help_text: A description of a relationship with another resource. + +- field_name: provenance_activity + label: Provenance activity + repeating_label: Provenance activity + repeating_once: true + repeating_subfields: + - field_name: uri + label: Activity URI + help_text: URI of the provenance activity (if available). + - field_name: label + label: Label + help_text: Human-readable label for the activity. + - field_name: type + label: Activity type + help_text: Type of the activity. + - field_name: seeAlso + label: See also + help_text: Related link for the activity. + - field_name: dct_type + label: Type + help_text: Type of the activity (URI). + - field_name: startedAtTime + label: Started at time + preset: datetime_flex + help_text: When the activity started (ISO 8601). + - field_name: wasAssociatedWith + label: Associated agent + repeating_label: Agent + repeating_once: true + repeating_subfields: + - field_name: uri + label: URI + - field_name: name + label: Name + - field_name: email + label: Email + display_snippet: email.html + - field_name: url + label: URL + display_snippet: link.html + - field_name: homepage + label: Homepage + display_snippet: link.html + - field_name: type + label: Type + - field_name: identifier + label: Identifier + - field_name: actedOnBehalfOf + label: Acted on behalf of + repeating_label: Organization + repeating_once: true + repeating_subfields: + - field_name: uri + label: URI + - field_name: name + label: Name + - field_name: email + label: Email + display_snippet: email.html + - field_name: url + label: URL + display_snippet: link.html + - field_name: type + label: Type + - field_name: identifier + label: Identifier + help_text: Structured provenance activity information, including agents and organizations. + +- field_name: qualified_attribution + label: Qualified attribution + repeating_label: Attribution + repeating_once: true + repeating_subfields: + - field_name: agent + label: Agent + repeating_label: Agent + repeating_once: true + repeating_subfields: + - field_name: uri + label: URI + - field_name: name + label: Name + + - field_name: name_translated + label: Name (translations) + preset: fluent_core_translated + help_text: Agent name in each language. + + - field_name: email + label: Email + display_snippet: email.html + - field_name: url + label: URL + display_snippet: link.html + - field_name: homepage + label: Homepage + display_snippet: link.html + - field_name: type + label: Type + - field_name: identifier + label: Identifier + - field_name: role + label: Role + help_text: Role of the agent (e.g., data processor, contributor). + help_text: Structured qualified attribution information including agent and role. + +- field_name: quality_annotation + label: Quality annotations + repeating_label: Quality annotation + repeating_subfields: + - field_name: body + label: Body + help_text: Content of the quality annotation (e.g., URL to certificate, measurement value, assessment result). + - field_name: target + label: Target + help_text: Aspect of the dataset being annotated (e.g., URI or description of what is being assessed). + - field_name: motivated_by + label: Motivated by + help_text: Motivation or reason for the quality annotation. + help_text: Quality annotations following DQV and Web Annotation standards. + +# Note: if not provided, this will be autogenerated +- field_name: uri + label: URI + help_text: An URI for this dataset (if not provided it will be autogenerated). resource_fields: + +- field_name: url + label: URL + preset: resource_url_upload + +- field_name: name_translated + label: Name + preset: fluent_core_translated + help_text: A descriptive title for the resource. + +- field_name: description_translated + label: Description + preset: fluent_core_translated + form_snippet: fluent_markdown.html + display_snippet: fluent_markdown.html + help_text: A free-text account of the resource. + +- field_name: format + label: Format + preset: resource_format_autocomplete + help_text: File format. If not provided it will be guessed. + +- field_name: mimetype + label: Media type + validators: if_empty_guess_format ignore_missing unicode_safe + help_text: Media type for this format. If not provided it will be guessed. + +- field_name: compress_format + label: Compress format + help_text: The format of the file in which the data is contained in a compressed form. + +- field_name: package_format + label: Package format + help_text: The format of the file in which one or more data files are grouped together. + +- field_name: size + label: Size + validators: ignore_missing int_validator + form_snippet: number.html + display_snippet: file_size.html + help_text: File size in bytes. + +- field_name: hash + label: Hash + help_text: Checksum of the downloaded file. + +- field_name: hash_algorithm + label: Hash Algorithm + help_text: Algorithm used to calculate to checksum. + +- field_name: rights + label: Rights + preset: fluent_markdown + help_text: Some statement about the rights associated with the resource. + +- field_name: availability + label: Availability + help_text: Indicates how long it is planned to keep the resource available. + +- field_name: status + label: Status + preset: select + choices: + - value: http://purl.org/adms/status/Completed + label: Completed + - value: http://purl.org/adms/status/UnderDevelopment + label: Under Development + - value: http://purl.org/adms/status/Deprecated + label: Deprecated + - value: http://purl.org/adms/status/Withdrawn + label: Withdrawn + help_text: The status of the resource in the context of maturity lifecycle. + +- field_name: license + label: License + help_text: License in which the resource is made available. If not provided will be inherited from the dataset. + +# Note: this falls back to the standard resource url field +- field_name: access_url + label: Access URL + help_text: URL that gives access to the dataset (defaults to the standard resource URL). + +# Note: this falls back to the standard resource url field +- field_name: download_url + label: Download URL + display_snippet: link.html + help_text: URL that provides a direct link to a downloadable file (defaults to the standard resource URL). + - field_name: issued - label: - en: Issued Date - nl: Uitgegeven Datum + label: Release date preset: datetime_flex - help_text: - en: "[dct:issued] This property contains the date of formal issuance (e.g., publication) of the Resource." - nl: "[dct:issued] Deze eigenschap bevat de datum van formele uitgave (bijv. publicatie) van de Resource." + help_text: Date of publication of the resource. - field_name: modified - label: - en: Modification Date - nl: Datum Wijziging + label: Modification date preset: datetime_flex - help_text: - en: "[dct:modified] This property contains the most recent date on which the Resource was changed or modified." - nl: "[dct:modified] Deze eigenschap bevat de meest recente datum waarop de Resource is gewijzigd of gewijzigd." \ No newline at end of file + help_text: Most recent date on which the resource was changed, updated or modified. + +- field_name: retention_period + label: Retention period + repeating_subfields: + - field_name: start + label: Start + preset: datetime_flex + + - field_name: end + label: End + preset: datetime_flex + help_text: Temporal period during which the resource remains available for use. + +- field_name: temporal_resolution + label: Temporal resolution + help_text: Minimum time period resolvable in the distribution. + +- field_name: spatial_resolution_in_meters + label: Spatial resolution in meters + help_text: Minimum spatial separation resolvable in the distribution, measured in meters. + +- field_name: language + label: Language + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: Language or languages of the resource. + +- field_name: documentation + label: Documentation + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: A page or document about this resource. + +- field_name: conforms_to + label: Conforms to + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: An established schema to which the described resource conforms. + +- field_name: applicable_legislation + label: Applicable legislation + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: The legislation that mandates the creation or management of the resource. + +- field_name: access_services + label: Access services + repeating_label: Access service + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: title + label: Title + + - field_name: description + label: Description + form_snippet: markdown.html + help_text: A free-text account of the data service. + + - field_name: endpoint_description + label: Endpoint description + + - field_name: endpoint_url + label: Endpoint URL + preset: multiple_text + + - field_name: serves_dataset + label: Serves dataset + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: access_rights + label: Access rights + validators: ignore_missing unicode_safe + help_text: Information regarding access or restrictions based on privacy, security, or other policies. + + - field_name: conforms_to + label: Conforms to + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: format + label: Format + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: identifier + label: Identifier + + - field_name: language + label: Language + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: rights + label: Rights + form_snippet: markdown.html + help_text: Rights statement for the data service. + + - field_name: landing_page + label: Landing page + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: keyword + label: Keywords + preset: tag_string_autocomplete + form_placeholder: eg. economy, mental health, government + help_text: Keywords or tags describing the data service. Use commas to separate multiple values. + + - field_name: applicable_legislation + label: Applicable legislation + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: The legislation that mandates the creation or management of the data service. + + - field_name: contact + label: Contact point + repeating_label: Contact point + repeating_once: true + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: name + label: Name + + - field_name: name_translated + label: Name (translations) + preset: fluent_core_translated + help_text: Name of the contact point in each language. + + - field_name: email + label: Email + display_snippet: email.html + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the contact point, such as a ROR ID. + + - field_name: url + label: URL + display_snippet: link.html + help_text: Contact information for enquiries about the data service. + + - field_name: creator + label: Creator + repeating_label: Creator + repeating_subfields: + + - field_name: uri + label: URI + help_text: URI of the creator, if available. + + - field_name: name + label: Name + help_text: Name of the entity or person who created the data service. + + - field_name: name_translated + label: Name (translations) + preset: fluent_core_translated + help_text: Name of the entity or person who created the data service in each language. + + - field_name: email + label: Email + display_snippet: email.html + help_text: Contact email of the creator. + + - field_name: url + label: URL + display_snippet: link.html + help_text: URL for more information about the creator. + + - field_name: type + label: Type + help_text: Type of creator (e.g., Organization, Person). + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the creator, such as an ORCID or ROR ID. + + - field_name: publisher + label: Publisher + repeating_label: Publisher + repeating_once: true + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: name + label: Name + + - field_name: name_translated + label: Name (translations) + preset: fluent_core_translated + help_text: Name of the entity or person who publishes the data service in each language. + + - field_name: email + label: Email + display_snippet: email.html + + - field_name: url + label: URL + display_snippet: link.html + + - field_name: type + label: Type + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the publisher, such as a ROR ID. + help_text: Entity responsible for making the data service available. + + - field_name: license + label: License + help_text: License in which the data service is made available. + + - field_name: modified + label: Modification date + preset: datetime_flex + help_text: Most recent date on which the data service was changed, updated or modified. + + help_text: A data service that gives access to the resource. + +# Note: if not provided, this will be autogenerated +- field_name: uri + label: URI + help_text: An URI for this resource (if not provided it will be autogenerated). diff --git a/test.ini b/test.ini index f2e2567..2ee42e1 100644 --- a/test.ini +++ b/test.ini @@ -9,14 +9,14 @@ smtp_server = localhost error_email_from = ckan@localhost [app:main] -scheming.dataset_schemas = ckanext.dcat.schemas:health_dcat_ap.yaml ckanext.fairdatapoint:tests/test_data/scheming/schemas/gdi_userportal.yaml -scheming.presets = ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml ckanext.fairdatapoint:tests/test_data/scheming/presets/gdi_presets.yaml +scheming.dataset_schemas = ckanext.fairdatapoint:tests/test_data/scheming/schemas/gdi_userportal.yaml +scheming.presets = ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml ckanext.fluent:presets.json ckanext.fairdatapoint:tests/test_data/scheming/presets/gdi_presets.yaml scheming.dataset_fallback = false use = config:../../src/ckan/test-core.ini # Insert any custom config settings to be used when running your extension's # tests here. These will override the one defined in CKAN core's test-core.ini -ckan.plugins = dcat scheming_datasets fairdatapoint +ckan.plugins = dcat scheming_datasets fluent fairdatapoint ckanext.dcat.rdf.profiles = euro_health_dcat_ap euro_dcat_ap_3 euro_dcat_ap_scheming fairdatapoint_dcat_ap # Logging configuration From fb035edef920a3584d9df9ce32ed3a53bec2a7f6 Mon Sep 17 00:00:00 2001 From: Hans-christian Date: Fri, 26 Sep 2025 11:16:54 +0200 Subject: [PATCH 05/11] Fix test --- .github/workflows/test.yml | 2 - .../scheming/schemas/gdi_userportal.yaml | 965 +----------------- test.ini | 6 +- 3 files changed, 54 insertions(+), 919 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1137cea..aee7203 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,8 +72,6 @@ jobs: fi cd .. - # Fluent extension - pip install -e 'git+https://github.com/ckan/ckanext-fluent.git#egg=ckanext-fluent' - name: Setup extension run: | sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini diff --git a/ckanext/fairdatapoint/tests/test_data/scheming/schemas/gdi_userportal.yaml b/ckanext/fairdatapoint/tests/test_data/scheming/schemas/gdi_userportal.yaml index 854a64b..690deaa 100644 --- a/ckanext/fairdatapoint/tests/test_data/scheming/schemas/gdi_userportal.yaml +++ b/ckanext/fairdatapoint/tests/test_data/scheming/schemas/gdi_userportal.yaml @@ -1,940 +1,77 @@ -# SPDX-FileCopyrightText: 2024 PNED G.I.E. +#SPDX-FileCopyrightText: 2024 PNED G.I.E. # -# SPDX-License-Identifier: Apache-2.0 +#SPDX-License-Identifier: Apache-2.0 scheming_version: 2 dataset_type: dataset -about: HealthDCAT-AP schema extended with GDI-specific fields -about_url: https://github.com/GenomicDataInfrastructure/gdi-userportal-ckan-docker - -form_languages: [en, nl] +about: DCAT-AP 3 compatible schema +about_url: http://github.com/ckan/ckanext-dcat dataset_fields: -- field_name: title_translated - label: Title - preset: fluent_core_translated - help_text: A descriptive title for the dataset. - -- field_name: name - label: URL - preset: dataset_slug - form_placeholder: eg. my-dataset - -- field_name: notes_translated - label: Description - preset: fluent_core_translated - form_snippet: fluent_markdown.html - display_snippet: fluent_markdown.html - help_text: A free-text account of the dataset. - -- field_name: tags_translated - label: Keywords - preset: fluent_tags - form_placeholder: eg. economy, mental health, government - help_text: Keywords or tags describing the dataset. Use commas to separate multiple values. - -- field_name: contact - label: Contact points - repeating_label: Contact point - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: name - label: Name - - - field_name: name_translated - label: Name (translations) - preset: fluent_core_translated - help_text: Name of the contact point in each language. - - - field_name: email - label: Email - display_snippet: email.html - - - field_name: identifier - label: Identifier - help_text: Unique identifier for the contact point. Such as a ROR ID. - - help_text: Contact information for enquiries about the dataset. - -- field_name: publisher - label: Publisher - repeating_label: Publisher - repeating_once: true - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: name - label: Name - - - field_name: name_translated - label: Name (translations) - preset: fluent_core_translated - help_text: Name of the entity or person who published the dataset in each language. - - - field_name: email - label: Email - display_snippet: email.html - - - field_name: url - label: URL - display_snippet: link.html - - - field_name: type - label: Type - - - field_name: identifier - label: Identifier - help_text: Unique identifier for the publisher, such as a ROR ID. - help_text: Entity responsible for making the dataset available. - -- field_name: creator - label: Creator - repeating_label: Creator - repeating_once: true - repeating_subfields: - - - field_name: uri - label: URI - help_text: URI of the creator, if available. - - - field_name: name - label: Name - help_text: Name of the entity or person who created the dataset. - - - field_name: name_translated - label: Name (translations) - preset: fluent_core_translated - help_text: Name of the entity or person who created the dataset in each language. - - - field_name: email - label: Email - display_snippet: email.html - help_text: Contact email of the creator. - - - field_name: url - label: URL - display_snippet: link.html - help_text: URL for more information about the creator. - - - field_name: type - label: Type - help_text: Type of creator (e.g., Organization, Person). - - - field_name: identifier - label: Identifier - help_text: Unique identifier for the creator, such as an ORCID or ROR ID. - -- field_name: license_id - label: License - form_snippet: license.html - help_text: License definitions and additional information can be found at http://opendefinition.org/. - -- field_name: owner_org - label: Organization - preset: dataset_organization - help_text: The CKAN organization the dataset belongs to. - -- field_name: url - label: Landing page - form_placeholder: http://example.com/dataset.json - display_snippet: link.html - help_text: Web page that can be navigated to gain access to the dataset, its distributions and/or additional information. +- field_name: has_version + label: + en: Has Version + nl: Bevat Versie + preset: multiple_text + help_inline: true + help_text: + en: "[dct:hasVersion] This property refers to a related Dataset that is a version, edition, or adaptation of the described Dataset." + nl: "[dct:hasVersion] Deze eigenschap verwijst naar een gerelateerde Dataset die een versie, editie of aanpassing is van de beschreven Dataset." -# Note: this will fall back to metadata_created if not present - field_name: issued - label: Release date + label: + en: Issued Date + nl: Uitgegeven Datum preset: datetime_flex - help_text: Date of publication of the dataset. + help_text: + en: "[dct:issued] This property contains the date of formal issuance (e.g., publication) of the Dataset." + nl: "[dct:issued] Deze eigenschap bevat de datum van formele uitgave (bijv. publicatie) van de Dataset." -# Note: this will fall back to metadata_modified if not present - field_name: modified - label: Modification date + label: + en: Modification Date + nl: Datum Wijziging preset: datetime_flex - help_text: Most recent date on which the dataset was changed, updated or modified. + help_text: + en: "[dct:modified] This property contains the most recent date on which the Dataset was changed or modified." + nl: "[dct:modified] Deze eigenschap bevat de meest recente datum waarop de Dataset is gewijzigd of gewijzigd." - field_name: temporal_start - label: Temporal start date - preset: datetime_flex + label: + en: Temporal Start Date + nl: Begindatum tijdsperiode help_inline: true - help_text: Start of the time period that the dataset covers. - -- field_name: temporal_end - label: Temporal end date + help_text: + en: "[dct:temporal] This property refers to a temporal period that the Dataset covers." + nl: "[dct:temporal] Deze eigenschap verwijst naar een tijdsperiode die door de Dataset wordt gedekt." preset: datetime_flex - help_inline: true - help_text: End of the time period that the dataset covers. - -- field_name: in_series - label: In series - form_snippet: multiple_select.html - display_snippet: multiple_choice.html - validators: ignore_missing series_validator - output_validators: scheming_multiple_choice_output - convert: convert_to_extras - choices_helper: in_series_choices - form_select_attrs: - data-module: autocomplete - class: ~ - help_text: Link this dataset to one or more dataset series. - -- field_name: version - label: Version - validators: ignore_missing unicode_safe package_version_validator - help_text: Version number or other version designation of the dataset. - -- field_name: version_notes - label: Version notes - preset: fluent_markdown - help_text: A description of the differences between this version and a previous version of the dataset. - -# Note: CKAN will generate a unique identifier for each dataset -- field_name: identifier - label: Identifier - help_text: A unique identifier of the dataset. - -- field_name: frequency - label: Frequency - help_text: The frequency at which dataset is published. - -- field_name: provenance - label: Provenance - preset: fluent_markdown - help_text: A statement about the lineage of the dataset. - -- field_name: dcat_type - label: Type - help_text: The type of the dataset. - # TODO: controlled vocabulary? - -- field_name: temporal_coverage - label: Temporal coverage - repeating_subfields: - - - field_name: start - label: Start - preset: datetime_flex - - - field_name: end - label: End - preset: datetime_flex - help_text: The temporal period or periods the dataset covers. - -- field_name: temporal_resolution - label: Temporal resolution - help_text: Minimum time period resolvable in the dataset. - -- field_name: spatial_coverage - label: Spatial coverage - repeating_subfields: - - field_name: uri - label: URI - - - field_name: text - label: Label - - - field_name: geom - label: Geometry - - - field_name: bbox - label: Bounding Box - - - field_name: centroid - label: Centroid - help_text: A geographic region that is covered by the dataset. - -- field_name: spatial_resolution_in_meters - label: Spatial resolution in meters - help_text: Minimum spatial separation resolvable in a dataset, measured in meters. - -- field_name: access_rights - label: Access rights - validators: ignore_missing unicode_safe - help_text: Information that indicates whether the dataset is Open Data, has access restrictions or is not public. - -- field_name: alternate_identifier - label: Other identifier - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: This property refers to a secondary identifier of the dataset, such as MAST/ADS, DataCite, DOI, etc. - -- field_name: theme - label: Theme - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: A category of the dataset. A Dataset may be associated with multiple themes. - -- field_name: language - label: Language - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: Language or languages of the dataset. - # TODO: language form snippet / validator / graph - -- field_name: documentation - label: Documentation - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: A page or document about this dataset. - -- field_name: conforms_to - label: Conforms to - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: An implementing rule or other specification that the dataset follows. - -- field_name: is_referenced_by - label: Is referenced by - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: A related resource, such as a publication, that references, cites, or otherwise points to the dataset. - -- field_name: analytics - label: Analytics - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: > - An analytics distribution of the dataset. - Publishers are encouraged to provide URLs pointing to API endpoints or document - repositories where users can access or request associated resources such as - technical reports of the dataset, quality measurements, usability indicators,... - or analytics services. - -- field_name: applicable_legislation - label: Applicable legislation - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: The legislation that mandates the creation or management of the dataset. - -- field_name: has_version - label: Has version - preset: multiple_text - validators: ignore_missing scheming_multiple_text +- field_name: temporal_end + label: + en: Temporal End Date + nl: Einddatum tijdsperiode help_inline: true - help_text: This property refers to a related Dataset that is a version, edition, or adaptation of the described Dataset. - -- field_name: code_values - label: Code values - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: Health classifications and their codes associated with the dataset. - -- field_name: coding_system - label: Coding system - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: > - Coding systems in use (e.g., ICD-10-CM, DGRs, SNOMED CT, ...). - To comply with HealthDCAT-AP, Wikidata URIs MUST be used. - -- field_name: purpose - label: Purpose - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: A free text statement of the purpose of the processing of data or personal data. - -- field_name: health_category - label: Health category - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: > - The health category to which this dataset belongs as described in the Commission Regulation on - the European Health Data Space laying down a list of categories of electronic data for - secondary use, Art.33. - -- field_name: health_theme - label: Health theme - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: > - A category of the Dataset or tag describing the Dataset. - -- field_name: legal_basis - label: Legal basis - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: The legal basis used to justify processing of personal data. - -- field_name: min_typical_age - label: Minimum typical age - validators: ignore_missing int_validator - form_snippet: number.html - help_text: Minimum typical age of the population within the dataset. - -- field_name: max_typical_age - label: Maximum typical age - validators: ignore_missing int_validator - form_snippet: number.html - help_text: Maximum typical age of the population within the dataset. - -- field_name: number_of_records - label: Number of records - validators: ignore_missing int_validator - form_snippet: number.html - help_text: Size of the dataset in terms of the number of records. - -- field_name: number_of_unique_individuals - label: Number of records for unique individuals. - validators: ignore_missing int_validator - form_snippet: number.html - help_text: Number of records for unique individuals. - -- field_name: personal_data - label: Personal data - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: Key elements that represent an individual in the dataset. - -- field_name: publisher_note - label: Publisher note - preset: fluent_markdown - help_text: A description of the publisher activities. - -- field_name: publisher_type - label: Publisher type - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: A type of organisation that makes the Dataset available. - -- field_name: trusted_data_holder - label: Trusted Data Holder - preset: select - choices: - - value: false - label: "No" - - value: true - label: "Yes" - validators: ignore_missing boolean_validator - help_text: Indicates whether the dataset is held by a trusted data holder. - output_validators: boolean_validator - -- field_name: population_coverage - label: Population coverage - preset: fluent_markdown - help_text: A definition of the population within the dataset. - -- field_name: retention_period - label: Retention period - repeating_subfields: - - - field_name: start - label: Start - preset: datetime_flex - - - field_name: end - label: End - preset: datetime_flex - - help_text: A temporal period which the dataset is available for secondary use. - -- field_name: hdab - label: Health data access body - repeating_label: Health data access body - repeating_once: true - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: name - label: Name - - - field_name: name_translated - label: Name (translations) - preset: fluent_core_translated - help_text: Name of the health data access body in each language. - - - field_name: email - label: Email - display_snippet: email.html - - - field_name: url - label: URL - display_snippet: link.html - - - field_name: type - label: Type - - - field_name: identifier - label: Identifier - help_text: Unique identifier for the HDAB, such as a ROR ID. - help_text: Health Data Access Body supporting access to data in the Member State. - -- field_name: qualified_relation - label: Qualified relation - repeating_label: Relationship - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: relation - label: Relation - help_text: The resource related to the source resource. - - - field_name: role - label: Role - help_text: The function of an entity or agent with respect to another entity or resource. - help_text: A description of a relationship with another resource. - -- field_name: provenance_activity - label: Provenance activity - repeating_label: Provenance activity - repeating_once: true - repeating_subfields: - - field_name: uri - label: Activity URI - help_text: URI of the provenance activity (if available). - - field_name: label - label: Label - help_text: Human-readable label for the activity. - - field_name: type - label: Activity type - help_text: Type of the activity. - - field_name: seeAlso - label: See also - help_text: Related link for the activity. - - field_name: dct_type - label: Type - help_text: Type of the activity (URI). - - field_name: startedAtTime - label: Started at time - preset: datetime_flex - help_text: When the activity started (ISO 8601). - - field_name: wasAssociatedWith - label: Associated agent - repeating_label: Agent - repeating_once: true - repeating_subfields: - - field_name: uri - label: URI - - field_name: name - label: Name - - field_name: email - label: Email - display_snippet: email.html - - field_name: url - label: URL - display_snippet: link.html - - field_name: homepage - label: Homepage - display_snippet: link.html - - field_name: type - label: Type - - field_name: identifier - label: Identifier - - field_name: actedOnBehalfOf - label: Acted on behalf of - repeating_label: Organization - repeating_once: true - repeating_subfields: - - field_name: uri - label: URI - - field_name: name - label: Name - - field_name: email - label: Email - display_snippet: email.html - - field_name: url - label: URL - display_snippet: link.html - - field_name: type - label: Type - - field_name: identifier - label: Identifier - help_text: Structured provenance activity information, including agents and organizations. - -- field_name: qualified_attribution - label: Qualified attribution - repeating_label: Attribution - repeating_once: true - repeating_subfields: - - field_name: agent - label: Agent - repeating_label: Agent - repeating_once: true - repeating_subfields: - - field_name: uri - label: URI - - field_name: name - label: Name - - - field_name: name_translated - label: Name (translations) - preset: fluent_core_translated - help_text: Agent name in each language. - - - field_name: email - label: Email - display_snippet: email.html - - field_name: url - label: URL - display_snippet: link.html - - field_name: homepage - label: Homepage - display_snippet: link.html - - field_name: type - label: Type - - field_name: identifier - label: Identifier - - field_name: role - label: Role - help_text: Role of the agent (e.g., data processor, contributor). - help_text: Structured qualified attribution information including agent and role. - -- field_name: quality_annotation - label: Quality annotations - repeating_label: Quality annotation - repeating_subfields: - - field_name: body - label: Body - help_text: Content of the quality annotation (e.g., URL to certificate, measurement value, assessment result). - - field_name: target - label: Target - help_text: Aspect of the dataset being annotated (e.g., URI or description of what is being assessed). - - field_name: motivated_by - label: Motivated by - help_text: Motivation or reason for the quality annotation. - help_text: Quality annotations following DQV and Web Annotation standards. - -# Note: if not provided, this will be autogenerated -- field_name: uri - label: URI - help_text: An URI for this dataset (if not provided it will be autogenerated). + help_text: + en: "[dct:temporal] This property refers to a temporal period that the Dataset covers." + nl: "[dct:temporal] Deze eigenschap verwijst naar een tijdsperiode die door de Dataset wordt gedekt." + preset: datetime_flex resource_fields: - -- field_name: url - label: URL - preset: resource_url_upload - -- field_name: name_translated - label: Name - preset: fluent_core_translated - help_text: A descriptive title for the resource. - -- field_name: description_translated - label: Description - preset: fluent_core_translated - form_snippet: fluent_markdown.html - display_snippet: fluent_markdown.html - help_text: A free-text account of the resource. - -- field_name: format - label: Format - preset: resource_format_autocomplete - help_text: File format. If not provided it will be guessed. - -- field_name: mimetype - label: Media type - validators: if_empty_guess_format ignore_missing unicode_safe - help_text: Media type for this format. If not provided it will be guessed. - -- field_name: compress_format - label: Compress format - help_text: The format of the file in which the data is contained in a compressed form. - -- field_name: package_format - label: Package format - help_text: The format of the file in which one or more data files are grouped together. - -- field_name: size - label: Size - validators: ignore_missing int_validator - form_snippet: number.html - display_snippet: file_size.html - help_text: File size in bytes. - -- field_name: hash - label: Hash - help_text: Checksum of the downloaded file. - -- field_name: hash_algorithm - label: Hash Algorithm - help_text: Algorithm used to calculate to checksum. - -- field_name: rights - label: Rights - preset: fluent_markdown - help_text: Some statement about the rights associated with the resource. - -- field_name: availability - label: Availability - help_text: Indicates how long it is planned to keep the resource available. - -- field_name: status - label: Status - preset: select - choices: - - value: http://purl.org/adms/status/Completed - label: Completed - - value: http://purl.org/adms/status/UnderDevelopment - label: Under Development - - value: http://purl.org/adms/status/Deprecated - label: Deprecated - - value: http://purl.org/adms/status/Withdrawn - label: Withdrawn - help_text: The status of the resource in the context of maturity lifecycle. - -- field_name: license - label: License - help_text: License in which the resource is made available. If not provided will be inherited from the dataset. - -# Note: this falls back to the standard resource url field -- field_name: access_url - label: Access URL - help_text: URL that gives access to the dataset (defaults to the standard resource URL). - -# Note: this falls back to the standard resource url field -- field_name: download_url - label: Download URL - display_snippet: link.html - help_text: URL that provides a direct link to a downloadable file (defaults to the standard resource URL). - - field_name: issued - label: Release date + label: + en: Issued Date + nl: Uitgegeven Datum preset: datetime_flex - help_text: Date of publication of the resource. + help_text: + en: "[dct:issued] This property contains the date of formal issuance (e.g., publication) of the Resource." + nl: "[dct:issued] Deze eigenschap bevat de datum van formele uitgave (bijv. publicatie) van de Resource." - field_name: modified - label: Modification date + label: + en: Modification Date + nl: Datum Wijziging preset: datetime_flex - help_text: Most recent date on which the resource was changed, updated or modified. - -- field_name: retention_period - label: Retention period - repeating_subfields: - - field_name: start - label: Start - preset: datetime_flex - - - field_name: end - label: End - preset: datetime_flex - help_text: Temporal period during which the resource remains available for use. - -- field_name: temporal_resolution - label: Temporal resolution - help_text: Minimum time period resolvable in the distribution. - -- field_name: spatial_resolution_in_meters - label: Spatial resolution in meters - help_text: Minimum spatial separation resolvable in the distribution, measured in meters. - -- field_name: language - label: Language - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: Language or languages of the resource. - -- field_name: documentation - label: Documentation - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: A page or document about this resource. - -- field_name: conforms_to - label: Conforms to - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: An established schema to which the described resource conforms. - -- field_name: applicable_legislation - label: Applicable legislation - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: The legislation that mandates the creation or management of the resource. - -- field_name: access_services - label: Access services - repeating_label: Access service - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: title - label: Title - - - field_name: description - label: Description - form_snippet: markdown.html - help_text: A free-text account of the data service. - - - field_name: endpoint_description - label: Endpoint description - - - field_name: endpoint_url - label: Endpoint URL - preset: multiple_text - - - field_name: serves_dataset - label: Serves dataset - preset: multiple_text - validators: ignore_missing scheming_multiple_text - - - field_name: access_rights - label: Access rights - validators: ignore_missing unicode_safe - help_text: Information regarding access or restrictions based on privacy, security, or other policies. - - - field_name: conforms_to - label: Conforms to - preset: multiple_text - validators: ignore_missing scheming_multiple_text - - - field_name: format - label: Format - preset: multiple_text - validators: ignore_missing scheming_multiple_text - - - field_name: identifier - label: Identifier - - - field_name: language - label: Language - preset: multiple_text - validators: ignore_missing scheming_multiple_text - - - field_name: rights - label: Rights - form_snippet: markdown.html - help_text: Rights statement for the data service. - - - field_name: landing_page - label: Landing page - preset: multiple_text - validators: ignore_missing scheming_multiple_text - - - field_name: keyword - label: Keywords - preset: tag_string_autocomplete - form_placeholder: eg. economy, mental health, government - help_text: Keywords or tags describing the data service. Use commas to separate multiple values. - - - field_name: applicable_legislation - label: Applicable legislation - preset: multiple_text - validators: ignore_missing scheming_multiple_text - help_text: The legislation that mandates the creation or management of the data service. - - - field_name: contact - label: Contact point - repeating_label: Contact point - repeating_once: true - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: name - label: Name - - - field_name: name_translated - label: Name (translations) - preset: fluent_core_translated - help_text: Name of the contact point in each language. - - - field_name: email - label: Email - display_snippet: email.html - - - field_name: identifier - label: Identifier - help_text: Unique identifier for the contact point, such as a ROR ID. - - - field_name: url - label: URL - display_snippet: link.html - help_text: Contact information for enquiries about the data service. - - - field_name: creator - label: Creator - repeating_label: Creator - repeating_subfields: - - - field_name: uri - label: URI - help_text: URI of the creator, if available. - - - field_name: name - label: Name - help_text: Name of the entity or person who created the data service. - - - field_name: name_translated - label: Name (translations) - preset: fluent_core_translated - help_text: Name of the entity or person who created the data service in each language. - - - field_name: email - label: Email - display_snippet: email.html - help_text: Contact email of the creator. - - - field_name: url - label: URL - display_snippet: link.html - help_text: URL for more information about the creator. - - - field_name: type - label: Type - help_text: Type of creator (e.g., Organization, Person). - - - field_name: identifier - label: Identifier - help_text: Unique identifier for the creator, such as an ORCID or ROR ID. - - - field_name: publisher - label: Publisher - repeating_label: Publisher - repeating_once: true - repeating_subfields: - - - field_name: uri - label: URI - - - field_name: name - label: Name - - - field_name: name_translated - label: Name (translations) - preset: fluent_core_translated - help_text: Name of the entity or person who publishes the data service in each language. - - - field_name: email - label: Email - display_snippet: email.html - - - field_name: url - label: URL - display_snippet: link.html - - - field_name: type - label: Type - - - field_name: identifier - label: Identifier - help_text: Unique identifier for the publisher, such as a ROR ID. - help_text: Entity responsible for making the data service available. - - - field_name: license - label: License - help_text: License in which the data service is made available. - - - field_name: modified - label: Modification date - preset: datetime_flex - help_text: Most recent date on which the data service was changed, updated or modified. - - help_text: A data service that gives access to the resource. - -# Note: if not provided, this will be autogenerated -- field_name: uri - label: URI - help_text: An URI for this resource (if not provided it will be autogenerated). + help_text: + en: "[dct:modified] This property contains the most recent date on which the Resource was changed or modified." + nl: "[dct:modified] Deze eigenschap bevat de meest recente datum waarop de Resource is gewijzigd of gewijzigd." \ No newline at end of file diff --git a/test.ini b/test.ini index 2ee42e1..f2e2567 100644 --- a/test.ini +++ b/test.ini @@ -9,14 +9,14 @@ smtp_server = localhost error_email_from = ckan@localhost [app:main] -scheming.dataset_schemas = ckanext.fairdatapoint:tests/test_data/scheming/schemas/gdi_userportal.yaml -scheming.presets = ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml ckanext.fluent:presets.json ckanext.fairdatapoint:tests/test_data/scheming/presets/gdi_presets.yaml +scheming.dataset_schemas = ckanext.dcat.schemas:health_dcat_ap.yaml ckanext.fairdatapoint:tests/test_data/scheming/schemas/gdi_userportal.yaml +scheming.presets = ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml ckanext.fairdatapoint:tests/test_data/scheming/presets/gdi_presets.yaml scheming.dataset_fallback = false use = config:../../src/ckan/test-core.ini # Insert any custom config settings to be used when running your extension's # tests here. These will override the one defined in CKAN core's test-core.ini -ckan.plugins = dcat scheming_datasets fluent fairdatapoint +ckan.plugins = dcat scheming_datasets fairdatapoint ckanext.dcat.rdf.profiles = euro_health_dcat_ap euro_dcat_ap_3 euro_dcat_ap_scheming fairdatapoint_dcat_ap # Logging configuration From 5e83d33cd50c5881a7beeb9c0c2cf2dddfdfa593 Mon Sep 17 00:00:00 2001 From: Hans-christian Date: Fri, 26 Sep 2025 11:30:22 +0200 Subject: [PATCH 06/11] fix UT --- .../fairdatapoint/tests/test_processors.py | 142 ++++++++++++------ ckanext/fairdatapoint/tests/test_profiles.py | 112 +++++++++----- 2 files changed, 172 insertions(+), 82 deletions(-) diff --git a/ckanext/fairdatapoint/tests/test_processors.py b/ckanext/fairdatapoint/tests/test_processors.py index 10d8747..3833866 100644 --- a/ckanext/fairdatapoint/tests/test_processors.py +++ b/ckanext/fairdatapoint/tests/test_processors.py @@ -2,13 +2,11 @@ # # SPDX-License-Identifier: AGPL-3.0-only -import pytest -from datetime import datetime -from dateutil.tz import tzutc +import json from pathlib import Path from unittest.mock import patch -from docopt import extras +import pytest from rdflib import Graph from ckanext.fairdatapoint.harvesters.domain.fair_data_point_record_to_package_converter import ( FairDataPointRecordToPackageConverter) @@ -39,6 +37,10 @@ def test_fdp_record_converter_catalog(self, parser_catalogs): record=data, series_mapping=None) assert parser_catalogs.called + @staticmethod + def _extras_to_dict(extras_list): + return {item["key"]: item["value"] for item in extras_list} + def test_fdp_record_converter_dataset_dict(self): fdp_record_to_package = FairDataPointRecordToPackageConverter(profile="fairdatapoint_dcat_ap") data = Graph().parse(Path(TEST_DATA_DIRECTORY, "Project_27866022694497978_out.ttl")).serialize() @@ -47,17 +49,64 @@ def test_fdp_record_converter_dataset_dict(self): "http://purl.org/zonmw/generic/10006;" "dataset=https://covid19initiatives.health-ri.nl/p/Project/27866022694497978", record=data, series_mapping=None) - expected_dataset = dict(extras=[], uri="https://covid19initiatives.health-ri.nl/p/Project/27866022694497978", - resources=[], title="COVID-NL cohort MUMC+", - notes="Clinical data of MUMC COVID-NL cohort", tags=[], - license_id="", identifier="27866022694497978", - has_version=[ - "https://repo.metadatacenter.org/template-instances/2836bf1c-76e9-44e7-a65e-80e9ca63025a"], - contact=[{'email': '', 'identifier': 'https://orcid.org/0000-0002-4348-707X', 'name': 'N.K. De Vries','uri': '', 'url': ''} - ], creator=[{'email': '', 'identifier': '', 'name': '', 'type': '', 'uri': 'https://orcid.org/0000-0002-0180-3636', 'url': ''}], - publisher=[{'email': '','identifier': '','name': '','type': '','uri': 'https://opal.health-ri.nl/pub', 'url': ''}], - temporal_start='2020-01-01', temporal_end='2025-12-31') - assert actual_dataset == expected_dataset + extras_dict = self._extras_to_dict(actual_dataset["extras"]) + + assert actual_dataset["resources"] == [] + assert actual_dataset["title"] == "COVID-NL cohort MUMC+" + assert actual_dataset["notes"] == "Clinical data of MUMC COVID-NL cohort" + assert actual_dataset["tags"] == [] + assert actual_dataset["license_id"] == "" + assert actual_dataset["has_version"] == [ + "https://repo.metadatacenter.org/template-instances/2836bf1c-76e9-44e7-a65e-80e9ca63025a" + ] + assert actual_dataset["contact"] == [ + { + "email": "", + "identifier": "https://orcid.org/0000-0002-4348-707X", + "name": "N.K. De Vries", + "uri": "", + "url": "", + } + ] + assert actual_dataset["creator"] == [ + { + "email": "", + "identifier": "", + "name": "", + "type": "", + "uri": "https://orcid.org/0000-0002-0180-3636", + "url": "", + } + ] + assert actual_dataset["publisher"] == [ + { + "email": "", + "identifier": "", + "name": "", + "type": "", + "uri": "https://opal.health-ri.nl/pub", + "url": "", + } + ] + assert actual_dataset["temporal_start"] == "2020-01-01" + assert actual_dataset["temporal_end"] == "2025-12-31" + assert actual_dataset["retention_period"] == [] + + assert extras_dict["identifier"] == "27866022694497978" + assert ( + extras_dict["uri"] + == "https://covid19initiatives.health-ri.nl/p/Project/27866022694497978" + ) + assert extras_dict["contact_name"] == "N.K. De Vries" + assert ( + extras_dict["contact_identifier"] + == "https://orcid.org/0000-0002-4348-707X" + ) + assert ( + extras_dict["publisher_uri"] == "https://opal.health-ri.nl/pub" + ) + assert extras_dict["creator_uri"] == "https://orcid.org/0000-0002-0180-3636" + assert extras_dict["homepage"] == "http://localhost:5000" def test_fdp_record_converter_catalog_dict(self): fdp_record_to_package = FairDataPointRecordToPackageConverter(profile="fairdatapoint_dcat_ap") @@ -66,33 +115,40 @@ def test_fdp_record_converter_catalog_dict(self): guid="catalog=https://fair.healthinformationportal.eu/catalog/1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d", record=data, series_mapping=None) + extras_dict = self._extras_to_dict(actual["extras"]) - expected = { - "uri": "https://fair.healthinformationportal.eu/catalog/1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d", - "access_rights": "https://fair.healthinformationportal.eu/catalog/" - "1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d#accessRights", - "conforms_to": ["https://fair.healthinformationportal.eu/profile/" - "a0949e72-4466-4d53-8900-9436d1049a4b"], - "extras": [], - "has_version": ["1.0"], - "issued": '2023-10-06T10:12:55.614000+00:00', - "language": ["http://id.loc.gov/vocabulary/iso639-1/en"], - "license_id": "", - "modified": '2023-10-06T10:12:55.614000+00:00', - 'publisher': [ - { - 'email': '', - 'identifier': '', - "name": "Automatic", - 'type': '', - 'uri': '', - 'url': '', - }, - ], - - "resources": [], - "tags": [], - "title": "Slovenia National Node" - } + assert actual["has_version"] == ["1.0"] + assert actual["issued"] == "2023-10-06T10:12:55.614000+00:00" + assert actual["modified"] == "2023-10-06T10:12:55.614000+00:00" + assert actual["license_id"] == "" + assert actual["publisher"] == [ + { + "email": "", + "identifier": "", + "name": "Automatic", + "type": "", + "uri": "", + "url": "", + } + ] + assert actual["resources"] == [] + assert actual["tags"] == [] + assert actual["title"] == "Slovenia National Node" + assert actual["retention_period"] == [] - assert actual == expected + assert ( + extras_dict["uri"] + == "https://fair.healthinformationportal.eu/catalog/1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d" + ) + assert ( + extras_dict["access_rights"] + == "https://fair.healthinformationportal.eu/catalog/1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d#accessRights" + ) + assert json.loads(extras_dict["conforms_to"]) == [ + "https://fair.healthinformationportal.eu/profile/a0949e72-4466-4d53-8900-9436d1049a4b" + ] + assert json.loads(extras_dict["language"]) == [ + "http://id.loc.gov/vocabulary/iso639-1/en" + ] + assert extras_dict["publisher_name"] == "Automatic" + assert extras_dict["homepage"] == "http://localhost:5000" diff --git a/ckanext/fairdatapoint/tests/test_profiles.py b/ckanext/fairdatapoint/tests/test_profiles.py index 3ec28fe..d69554a 100644 --- a/ckanext/fairdatapoint/tests/test_profiles.py +++ b/ckanext/fairdatapoint/tests/test_profiles.py @@ -2,15 +2,16 @@ # # SPDX-License-Identifier: AGPL-3.0-only -import pytest -from datetime import datetime -from dateutil.tz import tzutc +import json from pathlib import Path + +import pytest from rdflib import Graph -from ckanext.fairdatapoint.profiles import validate_tags + from ckanext.fairdatapoint.harvesters.domain.fair_data_point_record_to_package_converter import ( - FairDataPointRecordToPackageConverter + FairDataPointRecordToPackageConverter, ) +from ckanext.fairdatapoint.profiles import validate_tags TEST_DATA_DIRECTORY = Path(Path(__file__).parent.resolve(), "test_data") @@ -39,38 +40,71 @@ def test_parse_dataset(): guid="catalog=https://health-ri.sandbox.semlab-leiden.nl/catalog/5c85cb9f-be4a-406c-ab0a-287fa787caa0;" "dataset=https://health-ri.sandbox.semlab-leiden.nl/dataset/d9956191-1aff-4181-ac8b-16b829135ed5", record=data, series_mapping=None) - expected = { - 'extras': [], - 'resources': [ - {'name': 'Clinical data for [PUBLIC] Low-Grade Gliomas (UCSF, Science 2014)', - 'description': 'Clinical data for [PUBLIC] Low-Grade Gliomas (UCSF, Science 2014)', - 'access_url': 'https://cbioportal.health-ri.nl/study/clinicalData?id=lgg_ucsf_2014', - 'license': 'http://rdflicense.appspot.com/rdflicense/cc-by-nc-nd3.0', - 'url': 'https://cbioportal.health-ri.nl/study/clinicalData?id=lgg_ucsf_2014', - 'uri': 'https://health-ri.sandbox.semlab-leiden.nl/distribution/931ed9c4-ad23-47ff-b121-2eb428e57423', - 'distribution_ref': 'https://health-ri.sandbox.semlab-leiden.nl/distribution/931ed9c4-ad23-47ff-b121-2eb428e57423'}, - {'name': 'Mutations', - 'description': 'Mutation data from whole exome sequencing of 23 grade II glioma tumor/normal pairs. (MAF)', - 'access_url': 'https://cbioportal.health-ri.nl/study/summary?id=lgg_ucsf_2014', - 'license': 'http://rdflicense.appspot.com/rdflicense/cc-by-nc-nd3.0', - 'url': 'https://cbioportal.health-ri.nl/study/summary?id=lgg_ucsf_2014', - 'uri': 'https://health-ri.sandbox.semlab-leiden.nl/distribution/ad00299f-6efb-42aa-823d-5ff2337f38f7', - 'distribution_ref': 'https://health-ri.sandbox.semlab-leiden.nl/distribution/ad00299f-6efb-42aa-823d-5ff2337f38f7'} - ], - 'title': '[PUBLIC] Low-Grade Gliomas (UCSF, Science 2014)', - 'notes': 'Whole exome sequencing of 23 grade II glioma tumor/normal pairs.', - 'url': 'https://cbioportal.health-ri.nl/study/summary?id=lgg_ucsf_2014', - 'tags': [{'name': 'CNS Brain'}, {'name': 'Diffuse Glioma'}, {'name': 'Glioma'}], - 'license_id': '', - 'issued': '2019-10-30 23:00:00', - 'modified': '2019-10-30 23:00:00', - 'identifier': 'lgg_ucsf_2014', - 'language': ['http://id.loc.gov/vocabulary/iso639-1/en'], - 'conforms_to': ['https://health-ri.sandbox.semlab-leiden.nl/profile/2f08228e-1789-40f8-84cd-28e3288c3604'], - 'publisher': [ - {'email': '', 'identifier': '', 'name': '', 'type': '', 'uri': 'https://www.health-ri.nl', 'url': ''}], - 'uri': 'https://health-ri.sandbox.semlab-leiden.nl/dataset/d9956191-1aff-4181-ac8b-16b829135ed5', - 'is_referenced_by': ['https://pubmed.ncbi.nlm.nih.gov/24336570'] # Make this a list to match 'actual' - } + extras_dict = {item["key"]: item["value"] for item in actual["extras"]} + + expected_resources = [ + { + "name": "Clinical data for [PUBLIC] Low-Grade Gliomas (UCSF, Science 2014)", + "description": "Clinical data for [PUBLIC] Low-Grade Gliomas (UCSF, Science 2014)", + "access_url": "https://cbioportal.health-ri.nl/study/clinicalData?id=lgg_ucsf_2014", + "license": "http://rdflicense.appspot.com/rdflicense/cc-by-nc-nd3.0", + "url": "https://cbioportal.health-ri.nl/study/clinicalData?id=lgg_ucsf_2014", + "uri": "https://health-ri.sandbox.semlab-leiden.nl/distribution/931ed9c4-ad23-47ff-b121-2eb428e57423", + "distribution_ref": "https://health-ri.sandbox.semlab-leiden.nl/distribution/931ed9c4-ad23-47ff-b121-2eb428e57423", + }, + { + "name": "Mutations", + "description": "Mutation data from whole exome sequencing of 23 grade II glioma tumor/normal pairs. (MAF)", + "access_url": "https://cbioportal.health-ri.nl/study/summary?id=lgg_ucsf_2014", + "license": "http://rdflicense.appspot.com/rdflicense/cc-by-nc-nd3.0", + "url": "https://cbioportal.health-ri.nl/study/summary?id=lgg_ucsf_2014", + "uri": "https://health-ri.sandbox.semlab-leiden.nl/distribution/ad00299f-6efb-42aa-823d-5ff2337f38f7", + "distribution_ref": "https://health-ri.sandbox.semlab-leiden.nl/distribution/ad00299f-6efb-42aa-823d-5ff2337f38f7", + }, + ] + + assert len(actual["resources"]) == len(expected_resources) + for actual_resource, expected_resource in zip(actual["resources"], expected_resources): + assert actual_resource["retention_period"] == [] + for field, value in expected_resource.items(): + assert actual_resource[field] == value + + assert actual["title"] == "[PUBLIC] Low-Grade Gliomas (UCSF, Science 2014)" + assert actual["notes"] == "Whole exome sequencing of 23 grade II glioma tumor/normal pairs." + assert actual["url"] == "https://cbioportal.health-ri.nl/study/summary?id=lgg_ucsf_2014" + assert actual["tags"] == [ + {"name": "CNS Brain"}, + {"name": "Diffuse Glioma"}, + {"name": "Glioma"}, + ] + assert actual["license_id"] == "" + assert actual["issued"] == "2019-10-30 23:00:00" + assert actual["modified"] == "2019-10-30 23:00:00" + assert actual["publisher"] == [ + { + "email": "", + "identifier": "", + "name": "", + "type": "", + "uri": "https://www.health-ri.nl", + "url": "", + } + ] + assert actual["retention_period"] == [] - assert actual == expected + assert extras_dict["identifier"] == "lgg_ucsf_2014" + assert json.loads(extras_dict["language"]) == [ + "http://id.loc.gov/vocabulary/iso639-1/en" + ] + assert json.loads(extras_dict["conforms_to"]) == [ + "https://health-ri.sandbox.semlab-leiden.nl/profile/2f08228e-1789-40f8-84cd-28e3288c3604" + ] + assert extras_dict["publisher_uri"] == "https://www.health-ri.nl" + assert ( + extras_dict["uri"] + == "https://health-ri.sandbox.semlab-leiden.nl/dataset/d9956191-1aff-4181-ac8b-16b829135ed5" + ) + assert extras_dict["homepage"] == "http://localhost:5000" + assert json.loads(extras_dict["is_referenced_by"]) == [ + "https://pubmed.ncbi.nlm.nih.gov/24336570" + ] From c299bd43d36d861a5215ae6b4ce3dd899a8eb3bd Mon Sep 17 00:00:00 2001 From: Hans-Christian Date: Fri, 26 Sep 2025 11:33:37 +0200 Subject: [PATCH 07/11] Apply suggestion from @sourcery-ai[bot] Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> --- ckanext/fairdatapoint/profiles.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ckanext/fairdatapoint/profiles.py b/ckanext/fairdatapoint/profiles.py index ac24583..82d07ea 100644 --- a/ckanext/fairdatapoint/profiles.py +++ b/ckanext/fairdatapoint/profiles.py @@ -91,9 +91,12 @@ def _sanitize_tags_translated(self, tags_translated: Dict[str, List[str]]) -> Di sanitized[lang] = [tag['name'] for tag in cleaned] if len(values) != len(sanitized[lang]): + removed_tags = [v for v in values if v not in sanitized[lang]] log.warning( - 'Removed invalid tags for language %s during multilingual sanitation', - lang + 'Removed invalid tags for language %s during multilingual sanitation. Original: %r, Removed: %r', + lang, + values, + removed_tags ) return sanitized From b38fe9443bd5797cc030a8a96fa82af5174b017e Mon Sep 17 00:00:00 2001 From: Hans-christian Date: Fri, 26 Sep 2025 11:37:56 +0200 Subject: [PATCH 08/11] patch sonar cloud --- .github/workflows/test.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index aee7203..89f55e1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -85,8 +85,9 @@ jobs: coverage xml -o coverage.xml - name: Install unzip run: apt-get update && apt-get install -y unzip - - name: SonarCloud Scan - uses: sonarsource/sonarcloud-github-action@v5 + - name: Sonar scan + uses: SonarSource/sonarqube-scan-action@v6 env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + SONAR_HOST_URL: https://sonarcloud.io From 60b5c4bcaae25137b3177f0c90902f559cf80b86 Mon Sep 17 00:00:00 2001 From: Hans-christian Date: Fri, 26 Sep 2025 12:06:53 +0200 Subject: [PATCH 09/11] fix run --- .github/workflows/test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 89f55e1..dbdde52 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -44,6 +44,7 @@ jobs: - uses: actions/checkout@v5 - name: REUSE Compliance Check uses: fsfe/reuse-action@v5 + - name: Install requirements (common) run: | pip install -r requirements.txt @@ -80,12 +81,16 @@ jobs: - name: Run tests run: | pytest --ckan-ini=test.ini --cov=ckanext.fairdatapoint --disable-warnings ckanext/fairdatapoint + - name: Set SONAR_TOKEN env + run: echo "SONAR_TOKEN=${{ secrets.SONAR_TOKEN }}" >> $GITHUB_ENV - name: Generate coverage report run: | coverage xml -o coverage.xml - name: Install unzip + if: ${{ env.SONAR_TOKEN != '' }} run: apt-get update && apt-get install -y unzip - name: Sonar scan + if: ${{ env.SONAR_TOKEN != '' }} uses: SonarSource/sonarqube-scan-action@v6 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From d3213f7cb5ba2125956b64c0912393b0680f5bda Mon Sep 17 00:00:00 2001 From: Hans-christian Date: Fri, 26 Sep 2025 12:14:46 +0200 Subject: [PATCH 10/11] Bump packages --- .github/workflows/main.yml | 2 +- .github/workflows/release.yml | 6 +++--- .github/workflows/test.yml | 2 -- requirements.txt | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c8e27d2..8b55501 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,7 +18,7 @@ jobs: permissions: contents: read steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: oss-review-toolkit/ort-ci-github-action@v1 with: allow-dynamic-versions: "true" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5c5796c..3d93cf5 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: oss-review-toolkit/ort-ci-github-action@v1 with: allow-dynamic-versions: "true" @@ -33,7 +33,7 @@ jobs: new_tag: ${{ steps.tagging.outputs.new_tag }} steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 @@ -73,7 +73,7 @@ jobs: needs: versioning steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Install GitHub CLI run: sudo apt-get install -y gh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dbdde52..b16c4a5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -87,10 +87,8 @@ jobs: run: | coverage xml -o coverage.xml - name: Install unzip - if: ${{ env.SONAR_TOKEN != '' }} run: apt-get update && apt-get install -y unzip - name: Sonar scan - if: ${{ env.SONAR_TOKEN != '' }} uses: SonarSource/sonarqube-scan-action@v6 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/requirements.txt b/requirements.txt index 1c08861..39b2b08 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: AGPL-3.0-only -rdflib~=7.1.0 +rdflib~=7.2.1 setuptools~=80.9.0 nose~=1.3.7 requests~=2.32.3 \ No newline at end of file From 1492b15f5f909ba62de9413a051617e1737c1e05 Mon Sep 17 00:00:00 2001 From: Hans-christian Date: Fri, 26 Sep 2025 12:21:19 +0200 Subject: [PATCH 11/11] Remove token set --- .github/workflows/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ce508d0..93fe8b4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,6 +72,7 @@ jobs: pip install -r requirements.txt fi cd .. + - name: Setup extension run: | sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini @@ -80,8 +81,6 @@ jobs: - name: Run tests run: | pytest --ckan-ini=test.ini --cov=ckanext.fairdatapoint --disable-warnings ckanext/fairdatapoint - - name: Set SONAR_TOKEN env - run: echo "SONAR_TOKEN=${{ secrets.SONAR_TOKEN }}" >> $GITHUB_ENV - name: Generate coverage report run: | coverage xml -o coverage.xml