Skip to content

Commit dfeced8

Browse files
authored
fix(ingest): hide internal profiler.allow_deny_patterns from config (#5619)
1 parent c5c8e15 commit dfeced8

File tree

6 files changed

+12
-14
lines changed

6 files changed

+12
-14
lines changed

metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ def gen_dataset_containers(
490490

491491
def add_table_to_dataset_container(
492492
self, dataset_urn: str, db_name: str, schema: str
493-
) -> Iterable[Union[MetadataWorkUnit]]:
493+
) -> Iterable[MetadataWorkUnit]:
494494
schema_container_key = self.gen_dataset_key(db_name, schema)
495495
container_workunits = add_dataset_to_container(
496496
container_key=schema_container_key,
@@ -755,7 +755,7 @@ def gen_dataset_workunits(
755755
self.report.report_workunit(wu)
756756

757757
if isinstance(table, BigqueryTable) and self.config.profiling.enabled:
758-
if self.config.profiling.allow_deny_patterns.allowed(
758+
if self.config.profiling._allow_deny_patterns.allowed(
759759
datahub_dataset_name.raw_table_name()
760760
):
761761
# Emit the profile work unit

metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def _get_columns_to_profile(self) -> List[str]:
258258
ignored_columns: List[str] = []
259259
for col in self.dataset.get_table_columns():
260260
# We expect the allow/deny patterns to specify '<table_pattern>.<column_pattern>'
261-
if not self.config.allow_deny_patterns.allowed(
261+
if not self.config._allow_deny_patterns.allowed(
262262
f"{self.dataset_name}.{col}"
263263
):
264264
ignored_columns.append(col)

metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,8 @@ class GEProfilingConfig(ConfigModel):
7575
description="Whether to profile for the sample values for all columns.",
7676
)
7777

78-
allow_deny_patterns: AllowDenyPattern = Field(
78+
_allow_deny_patterns: AllowDenyPattern = pydantic.PrivateAttr(
7979
default=AllowDenyPattern.allow_all(),
80-
description="regex patterns for filtering of tables or table columns to profile.",
8180
)
8281
max_number_of_fields_to_profile: Optional[pydantic.PositiveInt] = Field(
8382
default=None,

metadata-ingestion/src/datahub/ingestion/source/s3/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def validate_platform(cls, values: Dict) -> Dict:
118118
def ensure_profiling_pattern_is_passed_to_profiling(
119119
cls, values: Dict[str, Any]
120120
) -> Dict[str, Any]:
121-
profiling = values.get("profiling")
121+
profiling: Optional[DataLakeProfilerConfig] = values.get("profiling")
122122
if profiling is not None and profiling.enabled:
123-
profiling.allow_deny_patterns = values["profile_patterns"]
123+
profiling._allow_deny_patterns = values["profile_patterns"]
124124
return values

metadata-ingestion/src/datahub/ingestion/source/s3/profiling.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ class DataLakeProfilerConfig(ConfigModel):
7070
description="Whether to perform profiling at table-level only or include column-level profiling as well.",
7171
)
7272

73-
allow_deny_patterns: AllowDenyPattern = Field(
74-
default=AllowDenyPattern.allow_all(), description=""
73+
_allow_deny_patterns: AllowDenyPattern = pydantic.PrivateAttr(
74+
default=AllowDenyPattern.allow_all(),
7575
)
7676

7777
max_number_of_fields_to_profile: Optional[pydantic.PositiveInt] = Field(
@@ -209,7 +209,7 @@ def __init__(
209209
# get column distinct counts
210210
for column in dataframe.columns:
211211

212-
if not self.profiling_config.allow_deny_patterns.allowed(column):
212+
if not self.profiling_config._allow_deny_patterns.allowed(column):
213213
self.ignored_columns.append(column)
214214
continue
215215

metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
)
4949
from datahub.ingestion.api.common import PipelineContext
5050
from datahub.ingestion.api.workunit import MetadataWorkUnit
51+
from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
5152
from datahub.ingestion.source.state.checkpoint import Checkpoint
5253
from datahub.ingestion.source.state.sql_common_state import (
5354
BaseSQLAlchemyCheckpointState,
@@ -269,8 +270,6 @@ class SQLAlchemyConfig(StatefulIngestionConfigBase):
269270
default=True, description="Whether tables should be ingested."
270271
)
271272

272-
from datahub.ingestion.source.ge_data_profiler import GEProfilingConfig
273-
274273
profiling: GEProfilingConfig = GEProfilingConfig()
275274
# Custom Stateful Ingestion settings
276275
stateful_ingestion: Optional[SQLAlchemyStatefulIngestionConfig] = None
@@ -290,9 +289,9 @@ def view_pattern_is_table_pattern_unless_specified(
290289
def ensure_profiling_pattern_is_passed_to_profiling(
291290
cls, values: Dict[str, Any]
292291
) -> Dict[str, Any]:
293-
profiling = values.get("profiling")
292+
profiling: Optional[GEProfilingConfig] = values.get("profiling")
294293
if profiling is not None and profiling.enabled:
295-
profiling.allow_deny_patterns = values["profile_pattern"]
294+
profiling._allow_deny_patterns = values["profile_pattern"]
296295
return values
297296

298297
@abstractmethod

0 commit comments

Comments
 (0)