Skip to content

Commit

Permalink
feat(sdk): support urn types in Urn.from_string (#12347)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Jan 15, 2025
1 parent cfe65cc commit 32cbc7d
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 5 deletions.
33 changes: 28 additions & 5 deletions metadata-ingestion/src/datahub/utilities/urns/_urn_base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import functools
import urllib.parse
from abc import abstractmethod
from typing import ClassVar, Dict, List, Optional, Type
from typing import ClassVar, Dict, List, Optional, Type, Union

from deprecated import deprecated
from typing_extensions import Self
Expand Down Expand Up @@ -86,12 +86,24 @@ def entity_ids(self) -> List[str]:
return self._entity_ids

@classmethod
def from_string(cls, urn_str: str) -> Self:
"""
Creates an Urn from its string representation.
def from_string(cls, urn_str: Union[str, "Urn"], /) -> Self:
"""Create an Urn from its string representation.
When called against the base Urn class, this method will return a more specific Urn type where possible.
>>> from datahub.metadata.urns import DatasetUrn, Urn
>>> urn_str = 'urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)'
>>> urn = Urn.from_string(urn_str)
>>> assert isinstance(urn, DatasetUrn)
When called against a specific Urn type (e.g. DatasetUrn.from_string), this method can
also be used for type narrowing.
>>> urn_str = 'urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)'
>>> assert DatasetUrn.from_string(urn_str)
Args:
urn_str: The string representation of the Urn.
urn_str: The string representation of the urn. Also accepts an existing Urn instance.
Returns:
Urn of the given string representation.
Expand All @@ -100,6 +112,17 @@ def from_string(cls, urn_str: str) -> Self:
InvalidUrnError: If the string representation is in invalid format.
"""

if isinstance(urn_str, Urn):
if issubclass(cls, _SpecificUrn) and isinstance(urn_str, cls):
# Fast path - we're already the right type.

# I'm not really sure why we need a type ignore here, but mypy doesn't really
# understand the isinstance check above.
return urn_str # type: ignore

# Fall through, so that we can convert a generic Urn to a specific Urn type.
urn_str = urn_str.urn()

# TODO: Add handling for url encoded urns e.g. urn%3A ...

if not urn_str.startswith("urn:li:"):
Expand Down
41 changes: 41 additions & 0 deletions metadata-ingestion/tests/unit/urns/test_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@

import pytest

import datahub.utilities.urns._urn_base
from datahub.metadata.urns import (
CorpUserUrn,
DataPlatformUrn,
DatasetUrn,
SchemaFieldUrn,
Urn,
)
from datahub.testing.doctest import assert_doctest
from datahub.utilities.urns.error import InvalidUrnError

pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand Down Expand Up @@ -87,6 +89,10 @@ def test_urn_type_dispatch_1() -> None:
with pytest.raises(InvalidUrnError, match="Passed an urn of type corpuser"):
DatasetUrn.from_string("urn:li:corpuser:foo")

urn2 = DatasetUrn.from_string(urn)
assert isinstance(urn2, DatasetUrn)
assert urn2 == urn


def test_urn_type_dispatch_2() -> None:
urn = "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod),job_id)"
Expand All @@ -96,6 +102,41 @@ def test_urn_type_dispatch_2() -> None:
CorpUserUrn.from_string(urn)


def test_urn_type_dispatch_3() -> None:
# Creating a "generic" Urn.
urn = Urn("dataset", ["urn:li:dataPlatform:abc", "def", "PROD"])
assert isinstance(urn, Urn)

urn2 = DatasetUrn.from_string(urn)
assert isinstance(urn2, DatasetUrn)
assert urn2 == urn

with pytest.raises(
InvalidUrnError,
match="Passed an urn of type dataset to the from_string method of CorpUserUrn",
):
CorpUserUrn.from_string(urn)


def test_urn_type_dispatch_4() -> None:
# A generic urn of a new entity type.
urn_str = "urn:li:new_entity_type:(abc,def)"

urn = Urn.from_string(urn_str)
assert type(urn) is Urn
assert urn == Urn("new_entity_type", ["abc", "def"])
assert urn.urn() == urn_str

urn2 = Urn.from_string(urn)
assert type(urn2) is Urn
assert urn2 == urn
assert urn2.urn() == urn_str


def test_urn_doctest() -> None:
assert_doctest(datahub.utilities.urns._urn_base)


def _load_urns(file_name: pathlib.Path) -> List[str]:
urns = [
line.strip()
Expand Down

0 comments on commit 32cbc7d

Please sign in to comment.