Skip to content

Commit 3e10bdb

Browse files
hsheth2shirshanka
authored andcommitted
feat(ingest): support reading config file from stdin (datahub-project#5847)
1 parent f5dedf4 commit 3e10bdb

File tree

2 files changed

+28
-20
lines changed

2 files changed

+28
-20
lines changed

metadata-ingestion/src/datahub/cli/ingest_cli.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import json
55
import logging
66
import os
7-
import pathlib
87
import sys
98
from datetime import datetime
109
from typing import Optional
@@ -46,7 +45,7 @@ def ingest() -> None:
4645
@click.option(
4746
"-c",
4847
"--config",
49-
type=click.Path(exists=True, dir_okay=False),
48+
type=click.Path(dir_okay=False),
5049
help="Config file in .toml or .yaml format.",
5150
required=True,
5251
)
@@ -182,12 +181,14 @@ async def run_func_check_upgrade(pipeline: Pipeline) -> None:
182181
# main function begins
183182
logger.info("DataHub CLI version: %s", datahub_package.nice_version_name())
184183

185-
config_file = pathlib.Path(config)
186184
pipeline_config = load_config_file(
187-
config_file, squirrel_original_config=True, squirrel_field="__raw_config"
185+
config,
186+
squirrel_original_config=True,
187+
squirrel_field="__raw_config",
188+
allow_stdin=True,
188189
)
189-
raw_pipeline_config = pipeline_config["__raw_config"]
190-
pipeline_config = {k: v for k, v in pipeline_config.items() if k != "__raw_config"}
190+
raw_pipeline_config = pipeline_config.pop("__raw_config")
191+
191192
if test_source_connection:
192193
_test_source_connection(report_to, pipeline_config)
193194

metadata-ingestion/src/datahub/configuration/config_loader.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import io
22
import pathlib
33
import re
4+
import sys
45
from typing import Any, Dict, Union
56

67
from expandvars import UnboundVariable, expandvars
@@ -54,26 +55,32 @@ def load_config_file(
5455
config_file: Union[pathlib.Path, str],
5556
squirrel_original_config: bool = False,
5657
squirrel_field: str = "__orig_config",
58+
allow_stdin: bool = False,
5759
) -> dict:
58-
if isinstance(config_file, str):
59-
config_file = pathlib.Path(config_file)
60-
if not config_file.is_file():
61-
raise ConfigurationError(f"Cannot open config file {config_file}")
62-
6360
config_mech: ConfigurationMechanism
64-
if config_file.suffix in [".yaml", ".yml"]:
61+
if allow_stdin and config_file == "-":
62+
# If we're reading from stdin, we assume that the input is a YAML file.
6563
config_mech = YamlConfigurationMechanism()
66-
elif config_file.suffix == ".toml":
67-
config_mech = TomlConfigurationMechanism()
64+
raw_config_file = sys.stdin.read()
6865
else:
69-
raise ConfigurationError(
70-
"Only .toml and .yml are supported. Cannot process file type {}".format(
71-
config_file.suffix
66+
if isinstance(config_file, str):
67+
config_file = pathlib.Path(config_file)
68+
if not config_file.is_file():
69+
raise ConfigurationError(f"Cannot open config file {config_file}")
70+
71+
if config_file.suffix in {".yaml", ".yml"}:
72+
config_mech = YamlConfigurationMechanism()
73+
elif config_file.suffix == ".toml":
74+
config_mech = TomlConfigurationMechanism()
75+
else:
76+
raise ConfigurationError(
77+
"Only .toml and .yml are supported. Cannot process file type {}".format(
78+
config_file.suffix
79+
)
7280
)
73-
)
7481

75-
with config_file.open() as raw_config_fp:
76-
raw_config_file = raw_config_fp.read()
82+
raw_config_file = config_file.read_text()
83+
7784
config_fp = io.StringIO(raw_config_file)
7885
raw_config = config_mech.load_config(config_fp)
7986
config = resolve_env_variables(raw_config)

0 commit comments

Comments
 (0)