Skip to content

Commit abddc01

Browse files
authored
fix(ingest): fix doc generation import ordering issue with postgres (#5846)
Relying on the correct import directly, rather than going through SQLAlchemy's import wrapper (in their dialect.py) allows us to bypass this strange error in doc generation.
1 parent 13e411e commit abddc01

File tree

2 files changed

+24
-24
lines changed

2 files changed

+24
-24
lines changed

metadata-ingestion/scripts/docgen.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
SourceCapability,
1919
SupportStatus,
2020
)
21-
from datahub.ingestion.api.registry import PluginRegistry
22-
from datahub.ingestion.api.source import Source
21+
from datahub.ingestion.source.source_registry import source_registry
2322

2423
logger = logging.getLogger(__name__)
2524

@@ -500,11 +499,7 @@ def generate(
500499
file_contents,
501500
)
502501

503-
source_registry = PluginRegistry[Source]()
504-
source_registry.register_from_entrypoint("datahub.ingestion.source.plugins")
505-
506-
# This source is always enabled
507-
for plugin_name in sorted(source_registry._mapping.keys()):
502+
for plugin_name in sorted(source_registry.mapping.keys()):
508503
if source and source != plugin_name:
509504
continue
510505

@@ -526,8 +521,9 @@ def generate(
526521
get_additional_deps_for_extra(extra_plugin) if extra_plugin else []
527522
)
528523
except Exception as e:
529-
print(f"Failed to process {plugin_name} due to exception")
530-
print(repr(e))
524+
logger.warning(
525+
f"Failed to process {plugin_name} due to exception {e}", exc_info=e
526+
)
531527
metrics["plugins"]["failed"] = metrics["plugins"].get("failed", 0) + 1
532528

533529
if source_type and hasattr(source_type, "get_config_class"):

metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@
2222
from urllib.parse import quote_plus
2323

2424
import pydantic
25+
import sqlalchemy.dialects.postgresql.base
2526
from pydantic.fields import Field
26-
from sqlalchemy import create_engine, dialects, inspect
27+
from sqlalchemy import create_engine, inspect
2728
from sqlalchemy.engine.reflection import Inspector
2829
from sqlalchemy.exc import ProgrammingError
2930
from sqlalchemy.sql import sqltypes as types
@@ -350,20 +351,23 @@ class SqlWorkUnit(MetadataWorkUnit):
350351
types.DATETIME: TimeTypeClass,
351352
types.TIMESTAMP: TimeTypeClass,
352353
types.JSON: RecordTypeClass,
353-
dialects.postgresql.base.BYTEA: BytesTypeClass,
354-
dialects.postgresql.base.DOUBLE_PRECISION: NumberTypeClass,
355-
dialects.postgresql.base.INET: StringTypeClass,
356-
dialects.postgresql.base.MACADDR: StringTypeClass,
357-
dialects.postgresql.base.MONEY: NumberTypeClass,
358-
dialects.postgresql.base.OID: StringTypeClass,
359-
dialects.postgresql.base.REGCLASS: BytesTypeClass,
360-
dialects.postgresql.base.TIMESTAMP: TimeTypeClass,
361-
dialects.postgresql.base.TIME: TimeTypeClass,
362-
dialects.postgresql.base.INTERVAL: TimeTypeClass,
363-
dialects.postgresql.base.BIT: BytesTypeClass,
364-
dialects.postgresql.base.UUID: StringTypeClass,
365-
dialects.postgresql.base.TSVECTOR: BytesTypeClass,
366-
dialects.postgresql.base.ENUM: EnumTypeClass,
354+
# Because the postgresql dialect is used internally by many other dialects,
355+
# we add some postgres types here. This is ok to do because the postgresql
356+
# dialect is built-in to sqlalchemy.
357+
sqlalchemy.dialects.postgresql.base.BYTEA: BytesTypeClass,
358+
sqlalchemy.dialects.postgresql.base.DOUBLE_PRECISION: NumberTypeClass,
359+
sqlalchemy.dialects.postgresql.base.INET: StringTypeClass,
360+
sqlalchemy.dialects.postgresql.base.MACADDR: StringTypeClass,
361+
sqlalchemy.dialects.postgresql.base.MONEY: NumberTypeClass,
362+
sqlalchemy.dialects.postgresql.base.OID: StringTypeClass,
363+
sqlalchemy.dialects.postgresql.base.REGCLASS: BytesTypeClass,
364+
sqlalchemy.dialects.postgresql.base.TIMESTAMP: TimeTypeClass,
365+
sqlalchemy.dialects.postgresql.base.TIME: TimeTypeClass,
366+
sqlalchemy.dialects.postgresql.base.INTERVAL: TimeTypeClass,
367+
sqlalchemy.dialects.postgresql.base.BIT: BytesTypeClass,
368+
sqlalchemy.dialects.postgresql.base.UUID: StringTypeClass,
369+
sqlalchemy.dialects.postgresql.base.TSVECTOR: BytesTypeClass,
370+
sqlalchemy.dialects.postgresql.base.ENUM: EnumTypeClass,
367371
# When SQLAlchemy is unable to map a type into its internal hierarchy, it
368372
# assigns the NullType by default. We want to carry this warning through.
369373
types.NullType: NullTypeClass,

0 commit comments

Comments
 (0)