Skip to content

Commit

Permalink
Force MaybeDependency to have a Dependency OR list[Problem], no…
Browse files Browse the repository at this point in the history
…t neither nor both (#3635)

## Changes
Force `MaybeDependency` to have a `Dependency` OR `list[Problem]`, not
neither nor both. This enforcement triggered to handle the known
libraries during import registration.

### Linked issues
<!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes,
fixed, resolve, resolves, resolved. See
https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword
-->

Resolves #3585
Breaks up #3626
Progresses #1527

### Functionality

- [x] modified code linting related logic

### Tests

- [x] added and modified unit tests

---------

Co-authored-by: Liran Bareket <[email protected]>
  • Loading branch information
JCZuurmond and FastLee authored Feb 13, 2025
1 parent 54d37bc commit 7403ac8
Show file tree
Hide file tree
Showing 16 changed files with 338 additions and 242 deletions.
13 changes: 8 additions & 5 deletions src/databricks/labs/ucx/source_code/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
MaybeDependency,
StubContainer,
)
from databricks.labs.ucx.source_code.known import KnownList
from databricks.labs.ucx.source_code.known import KnownDependency, KnownList
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.linters.python import PythonCodeAnalyzer

Expand Down Expand Up @@ -168,6 +168,11 @@ def resolve_file(self, path_lookup, path: Path) -> MaybeDependency:
return MaybeDependency(None, [problem])

def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency:
"""Resolve an import by name.
1. Check the known modules.
2. Check the import on the path lookup.
"""
maybe = self._resolve_allow_list(name)
if maybe is not None:
return maybe
Expand All @@ -181,10 +186,8 @@ def _resolve_allow_list(self, name: str) -> MaybeDependency | None:
if not compatibility.known:
logger.debug(f"Resolving unknown import: {name}")
return None
if not compatibility.problems:
return MaybeDependency(None, [])
# TODO move to linter, see https://github.com/databrickslabs/ucx/issues/1527
return MaybeDependency(None, compatibility.problems)
dependency = KnownDependency(name, compatibility.problems)
return MaybeDependency(dependency, [])

def _resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency | None:
if not name:
Expand Down
44 changes: 36 additions & 8 deletions src/databricks/labs/ucx/source_code/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,19 @@ def register_notebook(self, path: Path, inherit_context: bool) -> list[Dependenc
def register_import(self, name: str) -> list[DependencyProblem]:
if not name:
return [DependencyProblem('import-empty', 'Empty import name')]
maybe = self._resolver.resolve_import(self.path_lookup, name)
if not maybe.dependency:
return maybe.problems
maybe_graph = self.register_dependency(maybe.dependency)
maybe_dependency = self._resolver.resolve_import(self.path_lookup, name)
if maybe_dependency.problems:
return maybe_dependency.problems
assert maybe_dependency.dependency
maybe_graph = self.register_dependency(maybe_dependency.dependency)
return maybe_graph.problems

def register_file(self, path: Path) -> list[DependencyProblem]:
maybe = self._resolver.resolve_file(self.path_lookup, path)
if not maybe.dependency:
return maybe.problems
maybe_graph = self.register_dependency(maybe.dependency)
maybe_dependency = self._resolver.resolve_file(self.path_lookup, path)
if maybe_dependency.problems:
return maybe_dependency.problems
assert maybe_dependency.dependency
maybe_graph = self.register_dependency(maybe_dependency.dependency)
return maybe_graph.problems

def register_dependency(self, dependency: Dependency) -> MaybeGraph:
Expand Down Expand Up @@ -398,8 +400,34 @@ def resolve_file(self, path_lookup, path: Path) -> MaybeDependency:

@dataclass
class MaybeDependency:
"""A class:`Dependency` or a :class:`Failure`.
The `MaybeDependency` is designed to either contain a `dependency` OR
`problems`, never both or neither. Typically, a `Dependency` is
constructed by a resolver yielding a `MaybeDependency` with
`list[Problems]` if the dependency could NOT be resolved, otherwise it
yields the `Dependency`, resulting in code that looks like:
``` python
maybe_dependency = resolver.resolve_import(path_lookup, module_name)
if maybe_dependency.problems:
# Handle failure and return early
assert maybe_dependency.dependency, "Dependency should be given when no problems are given."
# Use dependency
```
"""

dependency: Dependency | None
"""The dependency"""

problems: list[DependencyProblem]
"""The problems during constructing the dependency"""

def __post_init__(self):
if not self.dependency and not self.problems:
raise ValueError(f"Dependency or problems should be given: {self}")
if self.dependency and self.problems:
raise ValueError(f"Dependency and problems should not be both given: {self}")


class DependencyResolver:
Expand Down
49 changes: 42 additions & 7 deletions src/databricks/labs/ucx/source_code/known.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@
from databricks.labs.blueprint.entrypoint import get_logger

from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
from databricks.labs.ucx.source_code.base import CurrentSessionState
from databricks.labs.ucx.source_code.graph import Dependency, DependencyProblem
from databricks.labs.ucx.source_code.base import Advice, CurrentSessionState
from databricks.labs.ucx.source_code.graph import (
Dependency,
DependencyLoader,
StubContainer,
)
from databricks.labs.ucx.source_code.path_lookup import PathLookup

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -47,7 +51,7 @@
@dataclass
class Compatibility:
known: bool
problems: list[DependencyProblem]
problems: list[KnownProblem]


@dataclass(unsafe_hash=True, frozen=True, eq=True, order=True)
Expand All @@ -58,6 +62,10 @@ class KnownProblem:
def as_dict(self):
return {'code': self.code, 'message': self.message}

def as_advice(self) -> Advice:
# TODO: Pass on the complete Advice (https://github.com/databrickslabs/ucx/issues/3625)
return Advice(self.code, self.message, -1, -1, -1, -1)


UNKNOWN = Compatibility(False, [])
_DEFAULT_ENCODING = sys.getdefaultencoding()
Expand All @@ -70,10 +78,10 @@ def __init__(self):
known = self._get_known()
for distribution_name, modules in known.items():
specific_modules_first = sorted(modules.items(), key=lambda x: x[0], reverse=True)
for module_ref, problems in specific_modules_first:
module_problems = [DependencyProblem(**_) for _ in problems]
self._module_problems[module_ref] = module_problems
self._library_problems[distribution_name].extend(module_problems)
for module_ref, raw_problems in specific_modules_first:
problems = [KnownProblem(**_) for _ in raw_problems]
self._module_problems[module_ref] = problems
self._library_problems[distribution_name].extend(problems)
for name in sys.stdlib_module_names:
self._module_problems[name] = []

Expand Down Expand Up @@ -255,6 +263,33 @@ def __repr__(self):
return f"<DistInfoPackage {self._path}>"


class KnownLoader(DependencyLoader):
"""Always load as `StubContainer`.
This loader is used in combination with the KnownList to load known dependencies and their known problems.
"""

def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> StubContainer:
"""Load the dependency."""
_ = path_lookup
if not isinstance(dependency, KnownDependency):
raise RuntimeError("Only KnownDependency is supported")
# Known library paths do not need to be resolved
return StubContainer(dependency.path)


class KnownDependency(Dependency):
"""A dependency for known libraries, see :class:KnownList."""

def __init__(self, module_name: str, problems: list[KnownProblem]):
known_url = "https://github.com/databrickslabs/ucx/blob/main/src/databricks/labs/ucx/source_code/known.json"
# Note that Github does not support navigating JSON files, hence the #<module_name> does nothing.
# https://docs.github.com/en/repositories/working-with-files/using-files/navigating-code-on-github
super().__init__(KnownLoader(), Path(f"{known_url}#{module_name}"), inherits_context=False)
self._module_name = module_name
self.problems = problems


if __name__ == "__main__":
logger = get_logger(__file__) # this only works for __main__
KnownList.rebuild(Path.cwd())
6 changes: 6 additions & 0 deletions src/databricks/labs/ucx/source_code/linters/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
)
from databricks.labs.ucx.source_code.files import LocalFile
from databricks.labs.ucx.source_code.graph import Dependency
from databricks.labs.ucx.source_code.known import KnownDependency
from databricks.labs.ucx.source_code.linters.base import PythonLinter
from databricks.labs.ucx.source_code.linters.context import LinterContext
from databricks.labs.ucx.source_code.linters.imports import SysPathChange, UnresolvedPath
Expand Down Expand Up @@ -228,6 +229,11 @@ def __init__(

def lint(self) -> Iterable[Advice]:
"""Lint the file."""
if isinstance(self._dependency, KnownDependency):
# TODO: Pass on the right advice type (https://github.com/databrickslabs/ucx/issues/3625)
advices = [problem.as_advice().as_advisory() for problem in self._dependency.problems]
yield from advices
return
source_container = self._dependency.load(self._path_lookup)
if not source_container:
# The linter only reports **linting** errors, not loading errors
Expand Down
1 change: 1 addition & 0 deletions src/databricks/labs/ucx/source_code/linters/folders.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def lint_path(self, path: Path) -> Iterable[LocatedAdvice]:
problems = container.build_dependency_graph(graph)
for problem in problems:
yield problem.as_located_advice()
return
walker = LintingWalker(graph, self._path_lookup, self._context_factory)
yield from walker

Expand Down
9 changes: 7 additions & 2 deletions src/databricks/labs/ucx/source_code/python_libraries.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,17 @@ def register_library(self, path_lookup: PathLookup, *libraries: str) -> list[Dep
"""We delegate to pip to install the library and augment the path look-up to resolve the library at import.
This gives us the flexibility to install any library that is not in the allow-list, and we don't have to
bother about parsing cross-version dependencies in our code."""
known_url = "https://github.com/databrickslabs/ucx/blob/main/src/databricks/labs/ucx/source_code/known.json"
if len(libraries) == 0:
return []
if len(libraries) == 1: # Multiple libraries might be installation flags
compatibility = self._allow_list.distribution_compatibility(libraries[0])
library = libraries[0]
compatibility = self._allow_list.distribution_compatibility(library)
if compatibility.known:
return compatibility.problems
# TODO: Pass in the line number and column number https://github.com/databrickslabs/ucx/issues/3625
path = Path(f"{known_url}#{library}")
problems = [DependencyProblem(p.code, p.message, path) for p in compatibility.problems]
return problems
return self._install_library(path_lookup, *libraries)

@cached_property
Expand Down
46 changes: 3 additions & 43 deletions tests/unit/source_code/linters/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pytest
from databricks.labs.blueprint.tui import MockPrompts


from databricks.sdk.service.workspace import Language

from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
Expand All @@ -19,7 +21,7 @@
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver

from tests.unit import locate_site_packages, _samples_path
from tests.unit import locate_site_packages


def test_file_linter_lints_file() -> None:
Expand Down Expand Up @@ -155,48 +157,6 @@ def test_notebook_migrator_supported_language_no_diagnostics(mock_path_lookup) -
assert not migrator.apply(path)


@pytest.fixture()
def local_code_linter(mock_path_lookup, migration_index):
notebook_loader = NotebookLoader()
file_loader = FileLoader()
folder_loader = FolderLoader(notebook_loader, file_loader)
pip_resolver = PythonLibraryResolver()
session_state = CurrentSessionState()
import_file_resolver = ImportFileResolver(file_loader)
resolver = DependencyResolver(
pip_resolver,
NotebookResolver(NotebookLoader()),
import_file_resolver,
import_file_resolver,
mock_path_lookup,
)
return LocalCodeLinter(
notebook_loader,
file_loader,
folder_loader,
mock_path_lookup,
session_state,
resolver,
lambda: LinterContext(migration_index),
)


def test_linter_walks_directory(mock_path_lookup, local_code_linter) -> None:
mock_path_lookup.append_path(Path(_samples_path(SourceContainer)))
path = Path(__file__).parent / "../samples" / "simulate-sys-path"
advices = list(local_code_linter.lint_path(path))
assert not advices
assert len(mock_path_lookup.successfully_resolved_paths) > 10


def test_linter_lints_children_in_context(mock_path_lookup, local_code_linter) -> None:
mock_path_lookup.append_path(Path(_samples_path(SourceContainer)))
path = Path(__file__).parent.parent / "samples" / "parent-child-context"
advices = list(local_code_linter.lint_path(path))
assert not advices
assert mock_path_lookup.successfully_resolved_paths == {path, Path("parent.py"), Path("child.py")}


def test_triple_dot_import() -> None:
file_resolver = ImportFileResolver(FileLoader())
path_lookup = create_autospec(PathLookup)
Expand Down
Loading

0 comments on commit 7403ac8

Please sign in to comment.