Skip to content

Commit

Permalink
dev: remove black in favor of ruff for formatting (#12378)
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Jan 18, 2025
1 parent 654728e commit 262dd76
Show file tree
Hide file tree
Showing 208 changed files with 1,366 additions and 1,326 deletions.
10 changes: 6 additions & 4 deletions metadata-ingestion-modules/airflow-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,15 @@ task lint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " +
"source ${venv_name}/bin/activate && set -x && " +
"black --check --diff src/ tests/ && " +
"ruff check src/ tests/ && " +
"ruff format --check src/ tests/ && " +
"mypy --show-traceback --show-error-codes src/ tests/"
}
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"black src/ tests/ && " +
"ruff check --fix src/ tests/"
"mypy src/ tests/ "
"ruff check --fix src/ tests/ && " +
"ruff format src/ tests/ "
}

// HACK: Some of the Airflow constraint files conflict with packages that we install (e.g. black).
Expand Down Expand Up @@ -119,5 +118,8 @@ clean {
delete venv_name
delete 'build'
delete 'dist'
delete '.ruff_cache'
delete '.mypy_cache'
delete '.pytest_cache'
}
clean.dependsOn cleanPythonCache
48 changes: 24 additions & 24 deletions metadata-ingestion-modules/airflow-plugin/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@
build-backend = "setuptools.build_meta"
requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"]

[tool.black]
extend-exclude = '''
# A regex preceded with ^/ will apply only to files and directories
# in the root of the project.
^/tmp
'''
include = '\.pyi?$'
[tool.ruff]
line-length = 88
target-version = "py38"
exclude = [
".git",
"venv",
".tox",
"__pycache__",
]

[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"

[tool.ruff.lint.isort]
combine-as-imports = true
Expand All @@ -28,31 +36,23 @@ required-imports = []
classes = ["typing"]

[tool.ruff.lint]
select = [
"B",
"C90",
"E",
"F",
"I", # For isort
"TID",
extend-select = [
"B", # flake8-bugbear
"C90", # mccabe complexity
"E", # pycodestyle errors
"F", # pyflakes
"G010", # logging.warn -> logging.warning
"I", # isort
"TID", # flake8-tidy-imports
]
ignore = [
# Ignore line length violations (handled by Black)
"E501",
# Ignore whitespace before ':' (matches Black)
"E203",
"E203",
# Allow usages of functools.lru_cache
"B019",
# Allow function call in argument defaults
"B008",
"E501", # Line length violations (handled by formatter)
]

[tool.ruff.lint.mccabe]
max-complexity = 15

[tool.ruff.lint.flake8-tidy-imports]
# Disallow all relative imports.
ban-relative-imports = "all"

[tool.ruff.lint.per-file-ignores]
Expand Down
1 change: 0 additions & 1 deletion metadata-ingestion-modules/airflow-plugin/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ def get_long_description():
dev_requirements = {
*base_requirements,
*mypy_stubs,
"black==22.12.0",
"coverage>=5.1",
"ruff==0.9.2",
"mypy==1.10.1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ def __init__(self):

self.task_to_extractor.extractors["AthenaOperator"] = AthenaOperatorExtractor

self.task_to_extractor.extractors[
"BigQueryInsertJobOperator"
] = BigQueryInsertJobOperatorExtractor
self.task_to_extractor.extractors["BigQueryInsertJobOperator"] = (
BigQueryInsertJobOperatorExtractor
)

self._graph: Optional["DataHubGraph"] = None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,9 @@ def _extract_lineage(
if sql_parsing_result:
if error := sql_parsing_result.debug_info.error:
logger.info(f"SQL parsing error: {error}", exc_info=error)
datajob.properties[
"datahub_sql_parser_error"
] = f"{type(error).__name__}: {error}"
datajob.properties["datahub_sql_parser_error"] = (
f"{type(error).__name__}: {error}"
)
if not sql_parsing_result.debug_info.table_error:
input_urns.extend(sql_parsing_result.in_tables)
output_urns.extend(sql_parsing_result.out_tables)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,9 @@ def get_task_inlets_advanced(task: BaseOperator, context: Any) -> Iterable[Any]:

if task_inlets and isinstance(task_inlets, list):
inlets = []
task_ids = (
{o for o in task_inlets if isinstance(o, str)}
.union(op.task_id for op in task_inlets if isinstance(op, BaseOperator))
.intersection(task.get_flat_relative_ids(upstream=True))
)
task_ids = {o for o in task_inlets if isinstance(o, str)}.union(
op.task_id for op in task_inlets if isinstance(op, BaseOperator)
).intersection(task.get_flat_relative_ids(upstream=True))

from airflow.lineage import AUTO
from cattr import structure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
This example demonstrates how to emit lineage to DataHub within an Airflow DAG.
"""

from datetime import timedelta

from airflow import DAG
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,13 +273,21 @@ def _run_airflow(
subprocess.check_call(
[
# fmt: off
"airflow", "users", "create",
"--username", "airflow",
"--password", "airflow",
"--firstname", "admin",
"--lastname", "admin",
"--role", "Admin",
"--email", "[email protected]",
"airflow",
"users",
"create",
"--username",
"airflow",
"--password",
"airflow",
"--firstname",
"admin",
"--lastname",
"admin",
"--role",
"Admin",
"--email",
"[email protected]",
# fmt: on
],
env=environment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,7 @@ def test_lineage_backend(mock_emit, inlets, outlets, capture_executions):
},
), mock.patch("airflow.models.BaseOperator.xcom_pull"), mock.patch(
"airflow.models.BaseOperator.xcom_push"
), patch_airflow_connection(
datahub_rest_connection_config
):
), patch_airflow_connection(datahub_rest_connection_config):
func = mock.Mock()
func.__name__ = "foo"

Expand Down Expand Up @@ -275,7 +273,10 @@ def test_lineage_backend(mock_emit, inlets, outlets, capture_executions):
if AIRFLOW_VERSION < packaging.version.parse("2.2.0"):
ti = TaskInstance(task=op2, execution_date=DEFAULT_DATE)
# Ignoring type here because DagRun state is just a sring at Airflow 1
dag_run = DagRun(state="success", run_id=f"scheduled_{DEFAULT_DATE.isoformat()}") # type: ignore
dag_run = DagRun(
state="success", # type: ignore[arg-type]
run_id=f"scheduled_{DEFAULT_DATE.isoformat()}",
)
else:
from airflow.utils.state import DagRunState

Expand Down
12 changes: 7 additions & 5 deletions metadata-ingestion-modules/dagster-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,15 @@ task installDev(type: Exec, dependsOn: [install]) {
task lint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"black --check --diff src/ tests/ examples/ && " +
"ruff check src/ tests/ && " +
"ruff check src/ tests/ examples/ && " +
"ruff format --check src/ tests/ && " +
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
}
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && " +
"black src/ tests/ examples/ && " +
"ruff check --fix src/ tests/"
"mypy src/ tests/ examples/"
"ruff check --fix src/ tests/ examples/ && " +
"ruff format src/ tests/ examples/ "
}

task installDevTest(type: Exec, dependsOn: [installDev]) {
Expand Down Expand Up @@ -105,5 +104,8 @@ clean {
delete venv_name
delete 'build'
delete 'dist'
delete '.ruff_cache'
delete '.mypy_cache'
delete '.pytest_cache'
}
clean.dependsOn cleanPythonCache
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
job,
op,
)

from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.utilities.urns.dataset_urn import DatasetUrn

from datahub_dagster_plugin.client.dagster_generator import (
DagsterGenerator,
DatasetLineage,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
define_asset_job,
multi_asset,
)

from datahub.ingestion.graph.config import DatahubClientConfig
from datahub.utilities.urns.dataset_urn import DatasetUrn

from datahub_dagster_plugin.sensors.datahub_sensors import (
DatahubDagsterSourceConfig,
make_datahub_sensor,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dagster import Definitions
from datahub.ingestion.graph.client import DatahubClientConfig

from datahub.ingestion.graph.client import DatahubClientConfig
from datahub_dagster_plugin.sensors.datahub_sensors import (
DatahubDagsterSourceConfig,
make_datahub_sensor,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dagster import Definitions, In, Out, PythonObjectDagsterType, job, op

from datahub.ingestion.graph.config import DatahubClientConfig
from datahub.utilities.urns.dataset_urn import DatasetUrn

from datahub_dagster_plugin.sensors.datahub_sensors import (
DatahubDagsterSourceConfig,
make_datahub_sensor,
Expand Down
48 changes: 24 additions & 24 deletions metadata-ingestion-modules/dagster-plugin/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@
build-backend = "setuptools.build_meta"
requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"]

[tool.black]
extend-exclude = '''
# A regex preceded with ^/ will apply only to files and directories
# in the root of the project.
^/tmp
'''
include = '\.pyi?$'
[tool.ruff]
line-length = 88
target-version = "py38"
exclude = [
".git",
"venv",
".tox",
"__pycache__",
]

[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"

[tool.ruff.lint.isort]
combine-as-imports = true
Expand All @@ -28,31 +36,23 @@ required-imports = []
classes = ["typing"]

[tool.ruff.lint]
select = [
"B",
"C90",
"E",
"F",
"I", # For isort
"TID",
extend-select = [
"B", # flake8-bugbear
"C90", # mccabe complexity
"E", # pycodestyle errors
"F", # pyflakes
"G010", # logging.warn -> logging.warning
"I", # isort
"TID", # flake8-tidy-imports
]
ignore = [
# Ignore line length violations (handled by Black)
"E501",
# Ignore whitespace before ':' (matches Black)
"E203",
"E203",
# Allow usages of functools.lru_cache
"B019",
# Allow function call in argument defaults
"B008",
"E501", # Line length violations (handled by formatter)
]

[tool.ruff.lint.mccabe]
max-complexity = 15

[tool.ruff.lint.flake8-tidy-imports]
# Disallow all relative imports.
ban-relative-imports = "all"

[tool.ruff.lint.per-file-ignores]
Expand Down
1 change: 0 additions & 1 deletion metadata-ingestion-modules/dagster-plugin/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def get_long_description():
"dagster-aws >= 0.11.0",
"dagster-snowflake >= 0.11.0",
"dagster-snowflake-pandas >= 0.11.0",
"black==22.12.0",
"coverage>=5.1",
"ruff==0.9.2",
"mypy>=1.4.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -507,15 +507,15 @@ def generate_datajob(
job_property_bag: Dict[str, str] = {}
if input_datasets:
self.logger.info(
f"Input datasets for {op_def_snap.name} are { list(input_datasets.get(op_def_snap.name, []))}"
f"Input datasets for {op_def_snap.name} are {list(input_datasets.get(op_def_snap.name, []))}"
)
inlets.update(input_datasets.get(op_def_snap.name, []))

datajob.inlets = list(inlets)

if output_datasets:
self.logger.info(
f"Output datasets for {op_def_snap.name} are { list(output_datasets.get(op_def_snap.name, []))}"
f"Output datasets for {op_def_snap.name} are {list(output_datasets.get(op_def_snap.name, []))}"
)
datajob.outlets = list(output_datasets.get(op_def_snap.name, []))

Expand Down Expand Up @@ -606,7 +606,7 @@ def emit_job_run(
if run.status not in status_result_map:
raise Exception(
f"Job run status should be either complete, failed or cancelled and it was "
f"{run.status }"
f"{run.status}"
)

if run_stats.start_time is not None:
Expand Down Expand Up @@ -673,7 +673,7 @@ def emit_op_run(
if run_step_stats.status not in status_result_map:
raise Exception(
f"Step run status should be either complete, failed or cancelled and it was "
f"{run_step_stats.status }"
f"{run_step_stats.status}"
)

if run_step_stats.start_time is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@ def get_dagster_environment(
and context.dagster_run.job_code_origin.repository_origin
and context.dagster_run.job_code_origin.repository_origin.code_pointer
):

code_pointer = (
context.dagster_run.job_code_origin.repository_origin.code_pointer
)
Expand Down
Loading

0 comments on commit 262dd76

Please sign in to comment.