From 78e72c9445db4e78dcda2562e251beea4f1ad470 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 22 Jan 2025 05:53:13 -0500 Subject: [PATCH] Apply import ordering in ruff check (#1001) --- benchmarks/db-benchmark/groupby-datafusion.py | 12 +-- benchmarks/db-benchmark/join-datafusion.py | 6 +- benchmarks/tpch/tpch.py | 3 +- dev/release/generate-changelog.py | 5 +- examples/export.py | 1 - .../python/tests/_test_table_provider.py | 2 +- examples/import.py | 3 +- examples/python-udaf.py | 5 +- examples/python-udf-comparisons.py | 6 +- examples/python-udf.py | 3 +- examples/python-udwf.py | 7 +- examples/query-pyarrow-data.py | 3 +- examples/sql-parquet-s3.py | 1 + examples/sql-to-pandas.py | 1 - examples/sql-using-python-udaf.py | 2 +- examples/sql-using-python-udf.py | 2 +- examples/tpch/_tests.py | 6 +- examples/tpch/convert_data_to_parquet.py | 3 +- examples/tpch/q01_pricing_summary_report.py | 3 +- examples/tpch/q02_minimum_cost_supplier.py | 3 +- examples/tpch/q03_shipping_priority.py | 3 +- examples/tpch/q04_order_priority_checking.py | 4 +- examples/tpch/q05_local_supplier_volume.py | 5 +- .../tpch/q06_forecasting_revenue_change.py | 4 +- examples/tpch/q07_volume_shipping.py | 4 +- examples/tpch/q08_market_share.py | 4 +- .../tpch/q09_product_type_profit_measure.py | 3 +- examples/tpch/q10_returned_item_reporting.py | 4 +- .../q11_important_stock_identification.py | 3 +- examples/tpch/q12_ship_mode_order_priority.py | 4 +- examples/tpch/q13_customer_distribution.py | 3 +- examples/tpch/q14_promotion_effect.py | 4 +- examples/tpch/q15_top_supplier.py | 4 +- .../tpch/q16_part_supplier_relationship.py | 3 +- examples/tpch/q17_small_quantity_order.py | 3 +- examples/tpch/q18_large_volume_customer.py | 3 +- examples/tpch/q19_discounted_revenue.py | 3 +- examples/tpch/q20_potential_part_promotion.py | 4 +- .../tpch/q21_suppliers_kept_orders_waiting.py | 3 +- examples/tpch/q22_global_sales_opportunity.py | 3 +- pyproject.toml | 4 +- python/datafusion/__init__.py | 30 +++---- python/datafusion/catalog.py | 4 +- python/datafusion/common.py | 3 +- python/datafusion/context.py | 23 ++--- python/datafusion/dataframe.py | 21 +++-- python/datafusion/expr.py | 5 +- python/datafusion/functions.py | 16 ++-- python/datafusion/input/location.py | 2 +- python/datafusion/plan.py | 4 +- python/datafusion/record_batch.py | 3 +- python/datafusion/substrait.py | 8 +- python/datafusion/udf.py | 9 +- python/tests/conftest.py | 2 +- python/tests/test_aggregation.py | 1 - python/tests/test_config.py | 2 +- python/tests/test_context.py | 3 +- python/tests/test_dataframe.py | 5 +- python/tests/test_functions.py | 6 +- python/tests/test_imports.py | 89 +++++++++---------- python/tests/test_indexing.py | 1 - python/tests/test_input.py | 1 + python/tests/test_plans.py | 2 +- python/tests/test_sql.py | 5 +- python/tests/test_store.py | 1 - python/tests/test_substrait.py | 3 +- python/tests/test_udaf.py | 1 - python/tests/test_udf.py | 2 +- python/tests/test_udwf.py | 6 +- 69 files changed, 221 insertions(+), 189 deletions(-) diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py index 960c8ba9a..04bf7a149 100644 --- a/benchmarks/db-benchmark/groupby-datafusion.py +++ b/benchmarks/db-benchmark/groupby-datafusion.py @@ -15,21 +15,23 @@ # specific language governing permissions and limitations # under the License. -import os import gc +import os import timeit + import datafusion as df +import pyarrow from datafusion import ( - col, - functions as f, RuntimeEnvBuilder, SessionConfig, SessionContext, + col, +) +from datafusion import ( + functions as f, ) -import pyarrow from pyarrow import csv as pacsv - print("# groupby-datafusion.py", flush=True) exec(open("./_helpers/helpers.py").read()) diff --git a/benchmarks/db-benchmark/join-datafusion.py b/benchmarks/db-benchmark/join-datafusion.py index 811ad8707..b45ebf632 100755 --- a/benchmarks/db-benchmark/join-datafusion.py +++ b/benchmarks/db-benchmark/join-datafusion.py @@ -15,15 +15,15 @@ # specific language governing permissions and limitations # under the License. -import os import gc +import os import timeit + import datafusion as df -from datafusion import functions as f from datafusion import col +from datafusion import functions as f from pyarrow import csv as pacsv - print("# join-datafusion.py", flush=True) exec(open("./_helpers/helpers.py").read()) diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py index daa831b55..fb86b12b6 100644 --- a/benchmarks/tpch/tpch.py +++ b/benchmarks/tpch/tpch.py @@ -16,9 +16,10 @@ # under the License. import argparse -from datafusion import SessionContext import time +from datafusion import SessionContext + def bench(data_path, query_path): with open("results.csv", "w") as results: diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index 0f07457d0..2564eea86 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -16,11 +16,12 @@ # limitations under the License. import argparse -import sys -from github import Github import os import re import subprocess +import sys + +from github import Github def print_pulls(repo_name, title, pulls): diff --git a/examples/export.py b/examples/export.py index cc02de52b..c7a387bcb 100644 --- a/examples/export.py +++ b/examples/export.py @@ -17,7 +17,6 @@ import datafusion - # create a context ctx = datafusion.SessionContext() diff --git a/examples/ffi-table-provider/python/tests/_test_table_provider.py b/examples/ffi-table-provider/python/tests/_test_table_provider.py index 56c05e4fa..0db3ec561 100644 --- a/examples/ffi-table-provider/python/tests/_test_table_provider.py +++ b/examples/ffi-table-provider/python/tests/_test_table_provider.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. +import pyarrow as pa from datafusion import SessionContext from ffi_table_provider import MyTableProvider -import pyarrow as pa def test_table_loading(): diff --git a/examples/import.py b/examples/import.py index c9d2e8cb6..7b5ab5082 100644 --- a/examples/import.py +++ b/examples/import.py @@ -16,10 +16,9 @@ # under the License. import datafusion -import pyarrow as pa import pandas as pd import polars as pl - +import pyarrow as pa # Create a context ctx = datafusion.SessionContext() diff --git a/examples/python-udaf.py b/examples/python-udaf.py index ed705f5a9..538f69571 100644 --- a/examples/python-udaf.py +++ b/examples/python-udaf.py @@ -15,11 +15,10 @@ # specific language governing permissions and limitations # under the License. +import datafusion import pyarrow import pyarrow.compute -import datafusion -from datafusion import udaf, Accumulator -from datafusion import col +from datafusion import Accumulator, col, udaf class MyAccumulator(Accumulator): diff --git a/examples/python-udf-comparisons.py b/examples/python-udf-comparisons.py index 9a84dd730..c5d5ec8dd 100644 --- a/examples/python-udf-comparisons.py +++ b/examples/python-udf-comparisons.py @@ -15,11 +15,13 @@ # specific language governing permissions and limitations # under the License. -from datafusion import SessionContext, col, lit, udf, functions as F import os +import time + import pyarrow as pa import pyarrow.compute as pc -import time +from datafusion import SessionContext, col, lit, udf +from datafusion import functions as F path = os.path.dirname(os.path.abspath(__file__)) filepath = os.path.join(path, "./tpch/data/lineitem.parquet") diff --git a/examples/python-udf.py b/examples/python-udf.py index 30edd4198..fb2bc253e 100644 --- a/examples/python-udf.py +++ b/examples/python-udf.py @@ -16,7 +16,8 @@ # under the License. import pyarrow -from datafusion import udf, SessionContext, functions as f +from datafusion import SessionContext, udf +from datafusion import functions as f def is_null(array: pyarrow.Array) -> pyarrow.Array: diff --git a/examples/python-udwf.py b/examples/python-udwf.py index 55de2bdc7..32f8fadaa 100644 --- a/examples/python-udwf.py +++ b/examples/python-udwf.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. -import pyarrow as pa import datafusion -from datafusion import udwf, functions as f, col, lit -from datafusion.udf import WindowEvaluator +import pyarrow as pa +from datafusion import col, lit, udwf +from datafusion import functions as f from datafusion.expr import WindowFrame +from datafusion.udf import WindowEvaluator # This example creates five different examples of user defined window functions in order # to demonstrate the variety of ways a user may need to implement. diff --git a/examples/query-pyarrow-data.py b/examples/query-pyarrow-data.py index 83e6884a7..e3456fb5b 100644 --- a/examples/query-pyarrow-data.py +++ b/examples/query-pyarrow-data.py @@ -16,9 +16,8 @@ # under the License. import datafusion -from datafusion import col import pyarrow - +from datafusion import col # create a context ctx = datafusion.SessionContext() diff --git a/examples/sql-parquet-s3.py b/examples/sql-parquet-s3.py index 61f1e0c50..866e2ac68 100644 --- a/examples/sql-parquet-s3.py +++ b/examples/sql-parquet-s3.py @@ -16,6 +16,7 @@ # under the License. import os + import datafusion from datafusion.object_store import AmazonS3 diff --git a/examples/sql-to-pandas.py b/examples/sql-to-pandas.py index 3e99b22de..34f7bde1b 100644 --- a/examples/sql-to-pandas.py +++ b/examples/sql-to-pandas.py @@ -17,7 +17,6 @@ from datafusion import SessionContext - # Create a DataFusion context ctx = SessionContext() diff --git a/examples/sql-using-python-udaf.py b/examples/sql-using-python-udaf.py index 7ccf5d3cb..60ab8d134 100644 --- a/examples/sql-using-python-udaf.py +++ b/examples/sql-using-python-udaf.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import udaf, SessionContext, Accumulator import pyarrow as pa +from datafusion import Accumulator, SessionContext, udaf # Define a user-defined aggregation function (UDAF) diff --git a/examples/sql-using-python-udf.py b/examples/sql-using-python-udf.py index d6bbe3ab0..2f0a0b67d 100644 --- a/examples/sql-using-python-udf.py +++ b/examples/sql-using-python-udf.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import udf, SessionContext import pyarrow as pa +from datafusion import SessionContext, udf # Define a user-defined function (UDF) diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 3ce9cdfe5..c4d872085 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -15,10 +15,12 @@ # specific language governing permissions and limitations # under the License. -import pytest from importlib import import_module + import pyarrow as pa -from datafusion import DataFrame, col, lit, functions as F +import pytest +from datafusion import DataFrame, col, lit +from datafusion import functions as F from util import get_answer_file diff --git a/examples/tpch/convert_data_to_parquet.py b/examples/tpch/convert_data_to_parquet.py index cb0b2f0bd..73097fac5 100644 --- a/examples/tpch/convert_data_to_parquet.py +++ b/examples/tpch/convert_data_to_parquet.py @@ -23,8 +23,9 @@ """ import os -import pyarrow + import datafusion +import pyarrow ctx = datafusion.SessionContext() diff --git a/examples/tpch/q01_pricing_summary_report.py b/examples/tpch/q01_pricing_summary_report.py index cb9485a7a..3f97f00dc 100644 --- a/examples/tpch/q01_pricing_summary_report.py +++ b/examples/tpch/q01_pricing_summary_report.py @@ -30,7 +30,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path ctx = SessionContext() diff --git a/examples/tpch/q02_minimum_cost_supplier.py b/examples/tpch/q02_minimum_cost_supplier.py index c4ccf8ad3..7390d0892 100644 --- a/examples/tpch/q02_minimum_cost_supplier.py +++ b/examples/tpch/q02_minimum_cost_supplier.py @@ -30,7 +30,8 @@ """ import datafusion -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # This is the part we're looking for. Values selected here differ from the spec in order to run diff --git a/examples/tpch/q03_shipping_priority.py b/examples/tpch/q03_shipping_priority.py index 5ebab13c0..fc1231e0a 100644 --- a/examples/tpch/q03_shipping_priority.py +++ b/examples/tpch/q03_shipping_priority.py @@ -27,7 +27,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path SEGMENT_OF_INTEREST = "BUILDING" diff --git a/examples/tpch/q04_order_priority_checking.py b/examples/tpch/q04_order_priority_checking.py index 8bf02cb83..426338aea 100644 --- a/examples/tpch/q04_order_priority_checking.py +++ b/examples/tpch/q04_order_priority_checking.py @@ -27,8 +27,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # Ideally we could put 3 months into the interval. See note below. diff --git a/examples/tpch/q05_local_supplier_volume.py b/examples/tpch/q05_local_supplier_volume.py index 413a4acb9..fa2b01dea 100644 --- a/examples/tpch/q05_local_supplier_volume.py +++ b/examples/tpch/q05_local_supplier_volume.py @@ -30,11 +30,12 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path - DATE_OF_INTEREST = "1994-01-01" INTERVAL_DAYS = 365 REGION_OF_INTEREST = "ASIA" diff --git a/examples/tpch/q06_forecasting_revenue_change.py b/examples/tpch/q06_forecasting_revenue_change.py index eaf9b0c29..1de5848b1 100644 --- a/examples/tpch/q06_forecasting_revenue_change.py +++ b/examples/tpch/q06_forecasting_revenue_change.py @@ -30,8 +30,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # Variables from the example query diff --git a/examples/tpch/q07_volume_shipping.py b/examples/tpch/q07_volume_shipping.py index 18c290d9c..a84cf728a 100644 --- a/examples/tpch/q07_volume_shipping.py +++ b/examples/tpch/q07_volume_shipping.py @@ -29,8 +29,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # Variables of interest to query over diff --git a/examples/tpch/q08_market_share.py b/examples/tpch/q08_market_share.py index 7138ab65a..d46df30f2 100644 --- a/examples/tpch/q08_market_share.py +++ b/examples/tpch/q08_market_share.py @@ -28,8 +28,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path supplier_nation = lit("BRAZIL") diff --git a/examples/tpch/q09_product_type_profit_measure.py b/examples/tpch/q09_product_type_profit_measure.py index aa47d76c0..e2abbd095 100644 --- a/examples/tpch/q09_product_type_profit_measure.py +++ b/examples/tpch/q09_product_type_profit_measure.py @@ -30,7 +30,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path part_color = lit("green") diff --git a/examples/tpch/q10_returned_item_reporting.py b/examples/tpch/q10_returned_item_reporting.py index 94b398c1d..ed822e264 100644 --- a/examples/tpch/q10_returned_item_reporting.py +++ b/examples/tpch/q10_returned_item_reporting.py @@ -30,8 +30,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path DATE_START_OF_QUARTER = "1993-10-01" diff --git a/examples/tpch/q11_important_stock_identification.py b/examples/tpch/q11_important_stock_identification.py index 707265e16..22829ab7c 100644 --- a/examples/tpch/q11_important_stock_identification.py +++ b/examples/tpch/q11_important_stock_identification.py @@ -27,7 +27,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path NATION = "GERMANY" diff --git a/examples/tpch/q12_ship_mode_order_priority.py b/examples/tpch/q12_ship_mode_order_priority.py index def2a6c30..f1d894940 100644 --- a/examples/tpch/q12_ship_mode_order_priority.py +++ b/examples/tpch/q12_ship_mode_order_priority.py @@ -30,8 +30,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path SHIP_MODE_1 = "MAIL" diff --git a/examples/tpch/q13_customer_distribution.py b/examples/tpch/q13_customer_distribution.py index 67365a96a..93f082ea3 100644 --- a/examples/tpch/q13_customer_distribution.py +++ b/examples/tpch/q13_customer_distribution.py @@ -28,7 +28,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path WORD_1 = "special" diff --git a/examples/tpch/q14_promotion_effect.py b/examples/tpch/q14_promotion_effect.py index cd26ee2bd..d62f76e3c 100644 --- a/examples/tpch/q14_promotion_effect.py +++ b/examples/tpch/q14_promotion_effect.py @@ -27,8 +27,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path DATE = "1995-09-01" diff --git a/examples/tpch/q15_top_supplier.py b/examples/tpch/q15_top_supplier.py index 0bc316f7a..c321048f2 100644 --- a/examples/tpch/q15_top_supplier.py +++ b/examples/tpch/q15_top_supplier.py @@ -27,8 +27,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path DATE = "1996-01-01" diff --git a/examples/tpch/q16_part_supplier_relationship.py b/examples/tpch/q16_part_supplier_relationship.py index a6a0c43eb..65043ffda 100644 --- a/examples/tpch/q16_part_supplier_relationship.py +++ b/examples/tpch/q16_part_supplier_relationship.py @@ -29,7 +29,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path BRAND = "Brand#45" diff --git a/examples/tpch/q17_small_quantity_order.py b/examples/tpch/q17_small_quantity_order.py index d7b43d498..6d76fe506 100644 --- a/examples/tpch/q17_small_quantity_order.py +++ b/examples/tpch/q17_small_quantity_order.py @@ -28,7 +28,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path BRAND = "Brand#23" diff --git a/examples/tpch/q18_large_volume_customer.py b/examples/tpch/q18_large_volume_customer.py index 165fce033..834d181c9 100644 --- a/examples/tpch/q18_large_volume_customer.py +++ b/examples/tpch/q18_large_volume_customer.py @@ -26,7 +26,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path QUANTITY = 300 diff --git a/examples/tpch/q19_discounted_revenue.py b/examples/tpch/q19_discounted_revenue.py index 4aed0cbae..2b87e1120 100644 --- a/examples/tpch/q19_discounted_revenue.py +++ b/examples/tpch/q19_discounted_revenue.py @@ -27,7 +27,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, udf, functions as F +from datafusion import SessionContext, col, lit, udf +from datafusion import functions as F from util import get_data_path items_of_interest = { diff --git a/examples/tpch/q20_potential_part_promotion.py b/examples/tpch/q20_potential_part_promotion.py index d720cdce6..a25188d31 100644 --- a/examples/tpch/q20_potential_part_promotion.py +++ b/examples/tpch/q20_potential_part_promotion.py @@ -28,8 +28,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path COLOR_OF_INTEREST = "forest" diff --git a/examples/tpch/q21_suppliers_kept_orders_waiting.py b/examples/tpch/q21_suppliers_kept_orders_waiting.py index 27cf816fa..9bbaad779 100644 --- a/examples/tpch/q21_suppliers_kept_orders_waiting.py +++ b/examples/tpch/q21_suppliers_kept_orders_waiting.py @@ -26,7 +26,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path NATION_OF_INTEREST = "SAUDI ARABIA" diff --git a/examples/tpch/q22_global_sales_opportunity.py b/examples/tpch/q22_global_sales_opportunity.py index 72dce5289..c4d115b74 100644 --- a/examples/tpch/q22_global_sales_opportunity.py +++ b/examples/tpch/q22_global_sales_opportunity.py @@ -26,7 +26,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path NATION_CODES = [13, 31, 23, 29, 30, 18, 17] diff --git a/pyproject.toml b/pyproject.toml index 6e8acfe71..32bb28d21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ features = ["substrait"] # Enable docstring linting using the google style guide [tool.ruff.lint] -select = ["E4", "E7", "E9", "F", "D", "W"] +select = ["E4", "E7", "E9", "F", "D", "W", "I"] [tool.ruff.lint.pydocstyle] convention = "google" @@ -100,4 +100,4 @@ docs = [ "pickleshare>=0.7.5", "sphinx-autoapi>=3.4.0", "setuptools>=75.3.0", -] \ No newline at end of file +] diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 2d8db42c8..85aefcce7 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -26,36 +26,28 @@ except ImportError: import importlib_metadata -from .context import ( - SessionContext, - SessionConfig, - RuntimeEnvBuilder, - SQLOptions, -) - -from .catalog import Catalog, Database, Table +from . import functions, object_store, substrait # The following imports are okay to remain as opaque to the user. from ._internal import Config - -from .record_batch import RecordBatchStream, RecordBatch - -from .udf import ScalarUDF, AggregateUDF, Accumulator, WindowUDF - +from .catalog import Catalog, Database, Table from .common import ( DFSchema, ) - +from .context import ( + RuntimeEnvBuilder, + SessionConfig, + SessionContext, + SQLOptions, +) from .dataframe import DataFrame - from .expr import ( Expr, WindowFrame, ) - -from .plan import LogicalPlan, ExecutionPlan - -from . import functions, object_store, substrait +from .plan import ExecutionPlan, LogicalPlan +from .record_batch import RecordBatch, RecordBatchStream +from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF __version__ = importlib_metadata.version(__name__) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index acd28f33d..703037665 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -19,10 +19,10 @@ from __future__ import annotations -import datafusion._internal as df_internal - from typing import TYPE_CHECKING +import datafusion._internal as df_internal + if TYPE_CHECKING: import pyarrow diff --git a/python/datafusion/common.py b/python/datafusion/common.py index 7db8333f2..a2298c634 100644 --- a/python/datafusion/common.py +++ b/python/datafusion/common.py @@ -16,9 +16,10 @@ # under the License. """Common data types used throughout the DataFusion project.""" -from ._internal import common as common_internal from enum import Enum +from ._internal import common as common_internal + # TODO these should all have proper wrapper classes DFSchema = common_internal.DFSchema diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 3c284c9f9..864ef1c8b 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -19,26 +19,29 @@ from __future__ import annotations -from ._internal import SessionConfig as SessionConfigInternal -from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal -from ._internal import SQLOptions as SQLOptionsInternal -from ._internal import SessionContext as SessionContextInternal +from typing import TYPE_CHECKING, Any, Protocol + +from typing_extensions import deprecated from datafusion.catalog import Catalog, Table from datafusion.dataframe import DataFrame from datafusion.expr import Expr, SortExpr, sort_list_to_raw_sort_list from datafusion.record_batch import RecordBatchStream -from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF +from datafusion.udf import AggregateUDF, ScalarUDF, WindowUDF -from typing import Any, TYPE_CHECKING, Protocol -from typing_extensions import deprecated +from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal +from ._internal import SessionConfig as SessionConfigInternal +from ._internal import SessionContext as SessionContextInternal +from ._internal import SQLOptions as SQLOptionsInternal if TYPE_CHECKING: - import pyarrow + import pathlib + import pandas import polars - import pathlib - from datafusion.plan import LogicalPlan, ExecutionPlan + import pyarrow + + from datafusion.plan import ExecutionPlan, LogicalPlan class ArrowStreamExportable(Protocol): diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index b0c1abdad..7413a5fa3 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -20,31 +20,36 @@ """ from __future__ import annotations + import warnings from typing import ( + TYPE_CHECKING, Any, Iterable, List, - TYPE_CHECKING, Literal, - overload, Optional, Union, + overload, ) -from datafusion.record_batch import RecordBatchStream + from typing_extensions import deprecated -from datafusion.plan import LogicalPlan, ExecutionPlan + +from datafusion.plan import ExecutionPlan, LogicalPlan +from datafusion.record_batch import RecordBatchStream if TYPE_CHECKING: - import pyarrow as pa - import pandas as pd - import polars as pl import pathlib from typing import Callable, Sequence + import pandas as pd + import polars as pl + import pyarrow as pa + +from enum import Enum + from datafusion._internal import DataFrame as DataFrameInternal from datafusion.expr import Expr, SortExpr, sort_or_default -from enum import Enum # excerpt from deltalake diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 16add16f4..68ddd7c9a 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -22,12 +22,13 @@ from __future__ import annotations -from typing import Any, Optional, Type, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Optional, Type import pyarrow as pa -from datafusion.common import DataTypeMap, NullTreatment, RexType from typing_extensions import deprecated +from datafusion.common import DataTypeMap, NullTreatment, RexType + from ._internal import expr as expr_internal from ._internal import functions as functions_internal diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index c0097c6ab..7c2fa9a8f 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -18,21 +18,21 @@ from __future__ import annotations +from typing import Any, Optional + +import pyarrow as pa + from datafusion._internal import functions as f +from datafusion.common import NullTreatment +from datafusion.context import SessionContext from datafusion.expr import ( CaseBuilder, Expr, - WindowFrame, SortExpr, - sort_list_to_raw_sort_list, + WindowFrame, expr_list_to_raw_expr_list, + sort_list_to_raw_sort_list, ) -from datafusion.context import SessionContext -from datafusion.common import NullTreatment - -from typing import Any, Optional - -import pyarrow as pa __all__ = [ "abs", diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py index b274539fc..a8252b53c 100644 --- a/python/datafusion/input/location.py +++ b/python/datafusion/input/location.py @@ -17,8 +17,8 @@ """The default input source for DataFusion.""" -import os import glob +import os from typing import Any from datafusion.common import DataTypeMap, SqlTable diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py index a71965f41..133fc446d 100644 --- a/python/datafusion/plan.py +++ b/python/datafusion/plan.py @@ -19,9 +19,9 @@ from __future__ import annotations -import datafusion._internal as df_internal +from typing import TYPE_CHECKING, Any, List -from typing import List, Any, TYPE_CHECKING +import datafusion._internal as df_internal if TYPE_CHECKING: from datafusion.context import SessionContext diff --git a/python/datafusion/record_batch.py b/python/datafusion/record_batch.py index 75e58998f..772cd9089 100644 --- a/python/datafusion/record_batch.py +++ b/python/datafusion/record_batch.py @@ -27,9 +27,10 @@ if TYPE_CHECKING: import pyarrow - import datafusion._internal as df_internal import typing_extensions + import datafusion._internal as df_internal + class RecordBatch: """This class is essentially a wrapper for :py:class:`pyarrow.RecordBatch`.""" diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py index dea47acca..402184d3f 100644 --- a/python/datafusion/substrait.py +++ b/python/datafusion/substrait.py @@ -23,13 +23,15 @@ from __future__ import annotations -from ._internal import substrait as substrait_internal - +import pathlib from typing import TYPE_CHECKING + from typing_extensions import deprecated -import pathlib + from datafusion.plan import LogicalPlan +from ._internal import substrait as substrait_internal + if TYPE_CHECKING: from datafusion.context import SessionContext diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index d9d994b22..c97f453d0 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -19,14 +19,15 @@ from __future__ import annotations -import datafusion._internal as df_internal -from datafusion.expr import Expr -from typing import Callable, TYPE_CHECKING, TypeVar from abc import ABCMeta, abstractmethod -from typing import List, Optional from enum import Enum +from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar + import pyarrow +import datafusion._internal as df_internal +from datafusion.expr import Expr + if TYPE_CHECKING: _R = TypeVar("_R", bound=pyarrow.DataType) diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 1cc07e500..9548fbfe4 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. +import pyarrow as pa import pytest from datafusion import SessionContext -import pyarrow as pa from pyarrow.csv import write_csv diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py index 243a8c3c9..5ef46131b 100644 --- a/python/tests/test_aggregation.py +++ b/python/tests/test_aggregation.py @@ -18,7 +18,6 @@ import numpy as np import pyarrow as pa import pytest - from datafusion import SessionContext, column, lit from datafusion import functions as f from datafusion.common import NullTreatment diff --git a/python/tests/test_config.py b/python/tests/test_config.py index 12d9fc3ff..c1d7f97e1 100644 --- a/python/tests/test_config.py +++ b/python/tests/test_config.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import Config import pytest +from datafusion import Config @pytest.fixture diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 10e8ad0e9..91046e6b8 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -14,15 +14,14 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import datetime as dt import gzip import os -import datetime as dt import pathlib import pyarrow as pa import pyarrow.dataset as ds import pytest - from datafusion import ( DataFrame, RuntimeEnvBuilder, diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index a1a871e9a..5bc3fb094 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -18,11 +18,8 @@ from typing import Any import pyarrow as pa -from pyarrow.csv import write_csv import pyarrow.parquet as pq import pytest - -from datafusion import functions as f from datafusion import ( DataFrame, SessionContext, @@ -30,7 +27,9 @@ column, literal, ) +from datafusion import functions as f from datafusion.expr import Window +from pyarrow.csv import write_csv @pytest.fixture diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index add170c17..ad6aa7c0a 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -15,15 +15,13 @@ # specific language governing permissions and limitations # under the License. import math +from datetime import datetime import numpy as np import pyarrow as pa import pytest -from datetime import datetime - -from datafusion import SessionContext, column +from datafusion import SessionContext, column, literal, string_literal from datafusion import functions as f -from datafusion import literal, string_literal np.seterr(invalid="ignore") diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 6ea77b15f..0c155cbde 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -15,72 +15,69 @@ # specific language governing permissions and limitations # under the License. -import pytest - import datafusion +import pytest from datafusion import ( AggregateUDF, DataFrame, - SessionContext, ScalarUDF, + SessionContext, functions, ) - from datafusion.common import ( DFSchema, ) - from datafusion.expr import ( - Expr, - Column, - Literal, - BinaryExpr, - AggregateFunction, - Projection, - TableScan, - Filter, - Limit, Aggregate, - Sort, - Analyze, - Join, - JoinType, - JoinConstraint, - Union, - Like, - ILike, - SimilarTo, - ScalarVariable, + AggregateFunction, Alias, - Not, - IsNotNull, - IsTrue, - IsFalse, - IsUnknown, - IsNotTrue, - IsNotFalse, - IsNotUnknown, - Negative, - InList, - Exists, - Subquery, - InSubquery, - ScalarSubquery, - GroupingSet, - Placeholder, + Analyze, + Between, + BinaryExpr, Case, Cast, - TryCast, - SubqueryAlias, - Between, - Explain, - Extension, + Column, CreateMemoryTable, CreateView, Distinct, DropTable, - Repartition, + Exists, + Explain, + Expr, + Extension, + Filter, + GroupingSet, + ILike, + InList, + InSubquery, + IsFalse, + IsNotFalse, + IsNotNull, + IsNotTrue, + IsNotUnknown, + IsTrue, + IsUnknown, + Join, + JoinConstraint, + JoinType, + Like, + Limit, + Literal, + Negative, + Not, Partitioning, + Placeholder, + Projection, + Repartition, + ScalarSubquery, + ScalarVariable, + SimilarTo, + Sort, + Subquery, + SubqueryAlias, + TableScan, + TryCast, + Union, ) diff --git a/python/tests/test_indexing.py b/python/tests/test_indexing.py index 8ca3eab19..5b0d08610 100644 --- a/python/tests/test_indexing.py +++ b/python/tests/test_indexing.py @@ -17,7 +17,6 @@ import pyarrow as pa import pytest - from datafusion import SessionContext diff --git a/python/tests/test_input.py b/python/tests/test_input.py index fb53d86e5..806471357 100644 --- a/python/tests/test_input.py +++ b/python/tests/test_input.py @@ -16,6 +16,7 @@ # under the License. import os + from datafusion.input.location import LocationInputPlugin diff --git a/python/tests/test_plans.py b/python/tests/test_plans.py index 0283a4e6a..396acbe97 100644 --- a/python/tests/test_plans.py +++ b/python/tests/test_plans.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import SessionContext, LogicalPlan, ExecutionPlan import pytest +from datafusion import ExecutionPlan, LogicalPlan, SessionContext # Note: We must use CSV because memory tables are currently not supported for diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index a2521dd09..862f745bf 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -19,12 +19,11 @@ import numpy as np import pyarrow as pa -from pyarrow.csv import write_csv import pyarrow.dataset as ds import pytest +from datafusion import col, udf from datafusion.object_store import Http - -from datafusion import udf, col +from pyarrow.csv import write_csv from . import generic as helpers diff --git a/python/tests/test_store.py b/python/tests/test_store.py index f85b28311..53ffc3acf 100644 --- a/python/tests/test_store.py +++ b/python/tests/test_store.py @@ -18,7 +18,6 @@ import os import pytest - from datafusion import SessionContext diff --git a/python/tests/test_substrait.py b/python/tests/test_substrait.py index 2071c8f3b..feada7cde 100644 --- a/python/tests/test_substrait.py +++ b/python/tests/test_substrait.py @@ -16,10 +16,9 @@ # under the License. import pyarrow as pa - +import pytest from datafusion import SessionContext from datafusion import substrait as ss -import pytest @pytest.fixture diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index 8f31748e0..0005a3da8 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -20,7 +20,6 @@ import pyarrow as pa import pyarrow.compute as pc import pytest - from datafusion import Accumulator, column, udaf diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py index 568a66dbb..3a5dce6d6 100644 --- a/python/tests/test_udf.py +++ b/python/tests/test_udf.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. -from datafusion import udf, column import pyarrow as pa import pytest +from datafusion import column, udf @pytest.fixture diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py index 2099ac9bc..0ffa04179 100644 --- a/python/tests/test_udwf.py +++ b/python/tests/test_udwf.py @@ -17,10 +17,10 @@ import pyarrow as pa import pytest - -from datafusion import SessionContext, column, udwf, lit, functions as f -from datafusion.udf import WindowEvaluator +from datafusion import SessionContext, column, lit, udwf +from datafusion import functions as f from datafusion.expr import WindowFrame +from datafusion.udf import WindowEvaluator class ExponentialSmoothDefault(WindowEvaluator):