Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
7096abb
Updated goldens mce -> mcp
askumar27 Sep 4, 2025
a46abaa
Migrated to SDKv2, converted to emit only MCPs, bug fix in golden data
askumar27 Sep 4, 2025
56d95bf
Updated golden
askumar27 Sep 4, 2025
178facc
process_dashboard refactored
askumar27 Sep 5, 2025
24fd277
Refactored emitting independent looks
askumar27 Sep 5, 2025
7089332
Refactored extract_independent_looks
askumar27 Sep 5, 2025
9154655
Refactored chart uniqueness check in LookerDashboardSource to use a s…
askumar27 Sep 5, 2025
2610a6c
Updated golden to remove browsepaths no longer emitted in SDKv2, char…
askumar27 Sep 5, 2025
6f5916a
Updated golden test for Looker to reflect changes in aspect names and…
askumar27 Sep 5, 2025
ec3c50a
Updated golden to remove browsepaths no longer emitted in SDKv2, char…
askumar27 Sep 5, 2025
65a942d
Updated golden to remove browsepaths no longer emitted in SDKv2, char…
askumar27 Sep 5, 2025
19dc9c5
Updated golden to remove browsepaths no longer emitted in SDKv2, char…
askumar27 Sep 5, 2025
82b6f31
Removed redundant check for dashboard patterns
askumar27 Sep 5, 2025
f4aed0a
Bug fix in mocking, updated golden to remove browsepaths no longer em…
askumar27 Sep 5, 2025
fa918ce
Refactored chart uniqueness check in LookerDashboardSource to utilize…
askumar27 Sep 5, 2025
affec1f
Updated golden file and test mocks
askumar27 Sep 5, 2025
016431c
Updated test_looker_ingest_stateful test and golden files
askumar27 Sep 5, 2025
96264dc
Updates golden for test_independent_looks_ingest_with_personal_folder
askumar27 Sep 5, 2025
fc22150
Updated goldens for test_independent_looks_ingest_without_personal_fo…
askumar27 Sep 5, 2025
ec498ed
Updated test_file_path_in_view_naming_pattern to mock users and goldens
askumar27 Sep 5, 2025
4209ffc
Updates test_looker_ingest_multi_model_explores test goldens and adde…
askumar27 Sep 5, 2025
233ec87
Updated test_folder_path_pattern test goldens and mock data
askumar27 Sep 5, 2025
d5cc292
Updated test_group_label_tags test goldens and mocked users
askumar27 Sep 5, 2025
6e37bfd
Updated goldens for test_file_path_in_view_naming_pattern test
askumar27 Sep 5, 2025
9e124d8
Bug fix incorrect set membership check
askumar27 Sep 5, 2025
c93d288
Improve type hints for better clarity and type safety
askumar27 Sep 5, 2025
cf32a66
Fixed type checks for covarience errors
askumar27 Sep 6, 2025
5e5e178
Lint fixes: updated syntax for older py versions
askumar27 Sep 9, 2025
6c25aa5
Removed optional fields from golden
askumar27 Sep 9, 2025
c10b185
Lint fix: Updated imports from private classes
askumar27 Sep 9, 2025
402e057
Lint fixes: organised imports
askumar27 Sep 9, 2025
bb3369d
Removed empty browsepaths from golden files
askumar27 Sep 9, 2025
5025983
Updated as per TOT SDK changes for invarient types
askumar27 Sep 11, 2025
8b5f270
Added comments and streamlined workunit emissions for tags and usage …
askumar27 Sep 11, 2025
c327688
review comment fixes
askumar27 Sep 18, 2025
8ea3880
Refactor Looker ingestion to utilize Dataset SDK
askumar27 Sep 18, 2025
8426cfa
Adjusted tests to reflect changes from SDK and added data platform in…
askumar27 Sep 18, 2025
9647402
Removed StatusClass from extra aspects in Looker ingestion and update…
askumar27 Sep 18, 2025
f9fb90d
Review comments: moved assigning tags to the constructor and removed …
askumar27 Sep 18, 2025
4ad5165
Removed dead code
askumar27 Sep 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
PlatformResourceKey,
)
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.mcp_builder import ContainerKey, create_embed_mcp
from datahub.emitter.mcp_builder import ContainerKey
from datahub.ingestion.api.report import Report
from datahub.ingestion.api.source import SourceReport
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
Expand Down Expand Up @@ -72,7 +72,6 @@
UpstreamClass,
UpstreamLineage,
)
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
ArrayTypeClass,
Expand All @@ -90,21 +89,18 @@
)
from datahub.metadata.schema_classes import (
BrowsePathEntryClass,
BrowsePathsClass,
BrowsePathsV2Class,
ContainerClass,
DatasetPropertiesClass,
EmbedClass,
EnumTypeClass,
FineGrainedLineageClass,
GlobalTagsClass,
SchemaMetadataClass,
StatusClass,
SubTypesClass,
TagAssociationClass,
TagPropertiesClass,
TagSnapshotClass,
)
from datahub.metadata.urns import TagUrn
from datahub.sdk.dataset import Dataset
from datahub.sql_parsing.sqlglot_lineage import ColumnRef
from datahub.utilities.lossy_collections import LossyList, LossySet
from datahub.utilities.url_util import remove_port_from_url
Expand Down Expand Up @@ -1286,50 +1282,28 @@ def _to_metadata_events(
reporter: SourceReport,
base_url: str,
extract_embed_urls: bool,
) -> Optional[List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]]:
# We only generate MCE-s for explores that contain from clauses and do NOT contain joins
# All other explores (passthrough explores and joins) end in correct resolution of lineage, and don't need additional nodes in the graph.

dataset_snapshot = DatasetSnapshot(
urn=self.get_explore_urn(config),
aspects=[], # we append to this list later on
)

model_key = gen_model_key(config, self.model_name)
browse_paths = BrowsePathsClass(paths=[self.get_explore_browse_path(config)])
container = ContainerClass(container=model_key.as_urn())
dataset_snapshot.aspects.append(browse_paths)
dataset_snapshot.aspects.append(StatusClass(removed=False))

custom_properties = {
"project": self.project_name,
"model": self.model_name,
"looker.explore.label": self.label,
"looker.explore.name": self.name,
"looker.explore.file": self.source_file,
}
dataset_props = DatasetPropertiesClass(
name=str(self.label) if self.label else LookerUtil._display_name(self.name),
description=self.description,
customProperties={
k: str(v) for k, v in custom_properties.items() if v is not None
},
)
dataset_props.externalUrl = self._get_url(base_url)
) -> Dataset:
"""
Generate a Dataset metadata event for this Looker Explore.

dataset_snapshot.aspects.append(dataset_props)
Only generates datasets for explores that contain FROM clauses and do NOT contain joins.
Passthrough explores and joins are handled via lineage and do not need additional nodes.
"""
upstream_lineage = None
view_name_to_urn_map: Dict[str, str] = {}

if self.upstream_views is not None:
assert self.project_name is not None
upstreams = []
upstreams: list[UpstreamClass] = []
observed_lineage_ts = datetime.datetime.now(tz=datetime.timezone.utc)

for view_ref in sorted(self.upstream_views):
# set file_path to ViewFieldType.UNKNOWN if file_path is not available to keep backward compatibility
# if we raise error on file_path equal to None then existing test-cases will fail as mock data
# doesn't have required attributes.
file_path: str = (
cast(str, self.upstream_views_file_path[view_ref.include])
if self.upstream_views_file_path[view_ref.include] is not None
if self.upstream_views_file_path.get(view_ref.include) is not None
else ViewFieldValue.NOT_AVAILABLE.value
)

Expand All @@ -1356,7 +1330,7 @@ def _to_metadata_events(
)
view_name_to_urn_map[view_ref.include] = view_urn

fine_grained_lineages = []
fine_grained_lineages: list[FineGrainedLineageClass] = []
if config.extract_column_level_lineage:
for field in self.fields or []:
# Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs
Expand Down Expand Up @@ -1397,52 +1371,58 @@ def _to_metadata_events(
)

upstream_lineage = UpstreamLineage(
upstreams=upstreams, fineGrainedLineages=fine_grained_lineages or None
upstreams=upstreams,
fineGrainedLineages=fine_grained_lineages or None,
)
dataset_snapshot.aspects.append(upstream_lineage)

schema_metadata = None
if self.fields is not None:
schema_metadata = LookerUtil._get_schema(
platform_name=config.platform_name,
schema_name=self.name,
view_fields=self.fields,
reporter=reporter,
)
if schema_metadata is not None:
dataset_snapshot.aspects.append(schema_metadata)

mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
mcp = MetadataChangeProposalWrapper(
entityUrn=dataset_snapshot.urn,
aspect=SubTypesClass(typeNames=[DatasetSubTypes.LOOKER_EXPLORE]),
)

proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
mce,
mcp,
]

# Add tags
explore_tag_urns: List[TagAssociationClass] = [
TagAssociationClass(tag=TagUrn(tag).urn()) for tag in self.tags
]
if explore_tag_urns:
dataset_snapshot.aspects.append(GlobalTagsClass(explore_tag_urns))
extra_aspects: List[Union[GlobalTagsClass, EmbedClass]] = []
Copy link
Contributor

@sgomezvillamor sgomezvillamor Sep 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is we need to add tags via extra_aspects?
Dataset in SDK v2 inherits the HasTags, so we could use add_tags instead.
Even we have tags field in the Dataset constructor

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! Updated.


# If extracting embeds is enabled, produce an MCP for embed URL.
explore_tag_urns: List[TagUrn] = [TagUrn(tag) for tag in self.tags]
if extract_embed_urls:
embed_mcp = create_embed_mcp(
dataset_snapshot.urn, self._get_embed_url(base_url)
)
proposals.append(embed_mcp)
extra_aspects.append(EmbedClass(renderUrl=self._get_embed_url(base_url)))

proposals.append(
MetadataChangeProposalWrapper(
entityUrn=dataset_snapshot.urn,
aspect=container,
)
)
custom_properties: Dict[str, Optional[str]] = {
"project": self.project_name,
"model": self.model_name,
"looker.explore.label": self.label,
"looker.explore.name": self.name,
"looker.explore.file": self.source_file,
}

return proposals
return Dataset(
platform=config.platform_name,
name=config.explore_naming_pattern.replace_variables(
self.get_mapping(config)
),
display_name=str(self.label)
if self.label
else LookerUtil._display_name(self.name),
description=self.description,
subtype=DatasetSubTypes.LOOKER_EXPLORE,
env=config.env,
platform_instance=config.platform_instance,
custom_properties={
k: str(v) for k, v in custom_properties.items() if v is not None
},
external_url=self._get_url(base_url),
upstreams=upstream_lineage,
schema=schema_metadata,
parent_container=[
"Explore",
gen_model_key(config, self.model_name).as_urn(),
],
tags=explore_tag_urns if explore_tag_urns else None,
extra_aspects=extra_aspects,
)


def gen_project_key(config: LookerCommonConfig, project_name: str) -> LookMLProjectKey:
Expand Down
Loading
Loading