Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ SRC = src
DEST = project
PYMODEL = $(SRC)/$(SCHEMA_NAME)/datamodel
DOCDIR = docs
DOCTEMPLATES = $(SRC)/docs/templates
DOCTEMPLATES = $(SRC)/docs/doc-templates
EXAMPLEDIR = examples

# Use += to append variables from the variables file
Expand Down Expand Up @@ -178,7 +178,7 @@ $(DOCDIR):

gendoc: $(DOCDIR)
cp -rf $(SRC)/docs/files/* $(DOCDIR) ; \
$(RUN) gen-doc ${GEN_DOC_ARGS} -d $(DOCDIR) $(SOURCE_SCHEMA_PATH)
$(RUN) gen-doc ${GEN_DOC_ARGS} -d $(DOCDIR) --template-directory $(DOCTEMPLATES) $(SOURCE_SCHEMA_PATH)

testdoc: gendoc serve

Expand All @@ -194,7 +194,7 @@ git-add: .cruft.json
git-commit:
git commit -m 'chore: make setup was run' -a
git-status:
git status
git statusfrom os import makedirs, path, sep

# only necessary if setting up via cookiecutter
.cruft.json:
Expand Down
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ pip install uv

### Getting Started

Note: either 'make' or 'just' can be used to run the commands below.

1. Clone the repository:
```bash
git clone https://github.com/biolink/ingest-metadata.git
Expand All @@ -53,6 +55,22 @@ uv sync
make test # or use the `just test` command
```

4. Creating a New Ingest Metadata file

```bash
# Create a new Ingest Metadata file from the template
make new-metadata INFORES=infores:example

# This creates src/docs/metadata/mydatasource_metadata.yaml
# Edit the file to fill in your specific information
```

or using the equivalent **`just`** command (note the reversed order of arguments):

```bash
just INFORES=infores:example new-metadata
```

### Development Commands

To run the commands, you may use good old make or the command runner [just](https://github.com/casey/just/) which is a better choice on Windows.
Expand Down
3 changes: 2 additions & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ src := "src"
dest := "project"
pymodel := src / schema_name / "datamodel"
docdir := "docs"
doctemplates := "{{src}}/docs/doc-templates"
exampledir := "examples"

# Main project manager is 'uv'
Expand Down Expand Up @@ -150,7 +151,7 @@ lint:
# Generate documentation
_gendoc: _ensure_docdir
cp -r {{src}}/docs/files/* {{docdir}}
{{run}} gen-doc {{gen_doc_args}} -d {{docdir}} {{source_schema_path}}
{{run}} gen-doc {{gen_doc_args}} -d {{docdir}} --template-directory {{doctemplates}} {{source_schema_path}}

# Build docs and run test server
testdoc: _gendoc _serve
Expand Down
21 changes: 20 additions & 1 deletion project.Makefile
Original file line number Diff line number Diff line change
@@ -1 +1,20 @@
## Add your own custom Makefile targets here
## This is included by the main Makefile.

# Create a new Ingest Metadata Specification YAML from template
# Usage: make new-metadata INFORES=infores:ctd
new-metadata:
ifndef INFORES
$(error INFORES is required. Usage: make new-metadata INFORES=infores:example")
endif
$(RUN) python $(SRC)/scripts/create_metadata.py --infores "$(INFORES)"

# Validate all RIG files against the schema
validate-metadata:
@echo "Validating Ingest Metadata Specification files against schema..."
@for ingest_spec in $(SRC)/docs/metadata/*.yaml; do \
if [ -f "$$ingest_spec" ]; then \
echo "Validating $$ingest_spec"; \
$(RUN) linkml-validate --schema $(SOURCE_SCHEMA_PATH) "$$ingest_spec"; \
fi; \
done
@echo "✓ All Ingest Metadata Specification files validated successfully"
24 changes: 23 additions & 1 deletion project.justfile
Original file line number Diff line number Diff line change
@@ -1 +1,23 @@
## Add your own just recipes here. This is imported by the main justfile.
## This is imported by the main justfile.

INFORES:= ""

# Create a new Ingest Metadata Specification YAML from template
# Usage: just INFORES=infores:ctd new-metadata
new-metadata:
@if [[ -z "{{INFORES}}" ]]; then \
echo "INFORES is required. Usage: just INFORES=infores:example new-metadata"; \
else \
{{run}} python {{src}}/scripts/create_metadata.py --infores "{{INFORES}}"; \
fi

# Validate all Ingest Metadata Specification files against the schema
validate-metadata:
@echo "Validating Ingest Metadata Specification files against schema..."
@for ingest_spec in {{src}}/docs/metadata/*.yaml; do \
if [ -f "$ingest_spec" ]; then \
echo "Validating $ingest_spec"; \
{{run}} linkml-validate --schema {{source_schema_path}} "$ingest_spec"; \
fi; \
done
@echo "✓ All Ingest Metadata Specification files validated (with any errors as indicated)"
File renamed without changes.
File renamed without changes.
File renamed without changes.
7 changes: 7 additions & 0 deletions src/docs/files/example-metadata.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Composing a Ingest Metadata File

An Ingest Metadata File is a YAML file which specifies the metadata of a knowledge source ingest into a knowledge graph representation within the Translator Ingests repository.

## Overview

See [Ingest Metadata File template](ingest_metadata_template.yaml) for details.
37 changes: 37 additions & 0 deletions src/docs/files/ingest_metadata_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# IngestMetadataFile schema based content.
# See schema at src/ingest_metadata/schema/ingest_metadata.yaml
file_name: # (required, range = string)
file_created_by: # (optional, multivalued, range = string)
file_creation_date: # (required, range = date)

# Information about the code used to execute the ingest task
# (e.g., a specific version/branch of code used)
ingest_code_url: # (required, range = URIorCURIE)
ingest_code_version: # (required, range = string)

# Information about when/how the source was accessed in performing the ingest
source_infores_id: # (required, range = URIorCURIE)
source_data_version: # (required, range = string)
source_access_date: # (required, range = date)
source_access_urls: # (optional, multivalued, range = URIorCURIE)
source_file_names: # (optional, multivalued, range = string)

# Information about the target KGX graph produced by this execution of the source ingest task
target_name: # (required, range = string)
target_creation_date: # (required, range = date)
target_data_url: # (required, range = URIorCURIE)
target_data_version: # (required, range = string)
target_format: # (required, range = string)
target_model: # (required, range = string) # This will be Biolink Model for all Translator graphs
target_model_url: # (optional, range = string)
target_data_model_version: # (required, range = string) # e.g. "4.2.6-rc5"
node_normalizer: # (optional, range = string)
node_normalizer_version: # (optional, range = string)
node_normalizer_url: # (optional, range = string) # e.g. "https://github.com/TranslatorSRI/NodeNormalization"

# Metrics about the content of the specific KGX graph produced by the ingest task
total_edge_count: # (optional, range = integer)
total_node_count: # (optional, range = integer)
orphan_node_count: # (optional, range = integer)
node_categories: # (optional, multivalued, range = string)
edge_predicates: # (optional, multivalued, range = string)
109 changes: 109 additions & 0 deletions src/scripts/create_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python3
"""
Script to create a new Ingest Metadata Specification (metadata) from the template.
"""

from os import makedirs, path, sep
import sys
from pathlib import Path
import yaml
from datetime import datetime
import click

METADATA_FILE_DIRECTORY = Path(__file__).parent.parent / "docs" / "metadata"

def load_template(template_path):
"""Load the metadata template from the YAML file."""
with open(template_path, 'r') as f:
return yaml.safe_load(f)


def create_metadata(infores_id, output_file, output_path, template_path):
"""
Create a new Ingest Metadata Specification from the template, with user-specified values.

:param infores_id: Associated with the primary knowledge source
:param output_file: file name of the metadata file
:param output_path: full path to the metadata file
:param template_path: full path to the template file
:return: None
"""
# Load template
template = load_template(template_path)

# Update template with user values
template['file_name'] = output_file
template['source_infores_id'] = infores_id
template['file_creation_date'] = datetime.now().strftime('%Y-%m-%d')

# Write the new metadata file
with open(output_path, 'w') as f:
yaml.dump(template, f, default_flow_style=False, sort_keys=False, indent=2)

click.echo(f"Created new Ingest Metadata file: {output_file}")
click.echo(f" InfoRes ID: {infores_id}")
click.echo(f"\nNext steps:")
click.echo(f"1. Edit {output_path} to fill in the template sections")
click.echo(f"2. See src{sep}docs{sep}files{sep}example-metadata.md for detailed guidance")


@click.command()
@click.option(
'--infores',
required=True,
help='InfoRes identifier for the data source (e.g., infores:ctd)'
)
@click.option(
'--output',
help='Output filename for the new metadata (default: based on infores ID)'
)
@click.option(
'--template',
default=f"src{sep}docs{sep}files{sep}ingest_metadata_template.yaml",
help=f"Path to the metadata template file (default: src{sep}docs{sep}files{sep}metadata_template.yaml)"
)
def main(infores, output, template):
"""Create a new Ingest Metadata file from the template.

Examples:

\b
create_metadata.py --infores "infores:ctd"
create_metadata.py --infores "infores:pharmgkb" --output "my_own_pharmgkb_metadata.yaml"
"""

# Validate infores format
if not infores.startswith('infores:'):
click.echo("Error: InfoRes ID must start with 'infores:'", err=True)
sys.exit(1)

# Generate output filename if not provided
if not output:
# Extract a source file name from infores ID and create the output filename
source_name = infores.replace('infores:', '').replace(':', '_')
output = f"{source_name}_metadata.yaml"

# Sanity check: ensure the Ingest Metadata file directory exists
makedirs(path.abspath(METADATA_FILE_DIRECTORY), exist_ok=True)
output_path = f"{METADATA_FILE_DIRECTORY}{sep}{output}"

# Check if template exists
if not path.exists(template):
click.echo(f"Error: Template file not found: {template}", err=True)
sys.exit(1)

# Check if an output file already exists
if path.exists(output_path):
if not click.confirm(f"File {output_path} already exists. Overwrite?"):
click.echo("Aborted.")
sys.exit(0)

try:
create_metadata(infores, output, output_path, template)
except Exception as e:
click.echo(f"Error creating Ingest Metadata Specification: {e}", err=True)
sys.exit(1)


if __name__ == '__main__':
main()
Loading