diff --git a/pysparkling/sql/column.py b/pysparkling/sql/column.py index 7deca5bf..822212ee 100644 --- a/pysparkling/sql/column.py +++ b/pysparkling/sql/column.py @@ -688,6 +688,7 @@ def data_type(self, schema): try: return schema[self.expr].dataType except KeyError: + # pylint: disable=raise-missing-from raise AnalysisException( f"cannot resolve '`{self.expr}`' given input columns: {schema.fields};" ) diff --git a/pysparkling/sql/expressions/arrays.py b/pysparkling/sql/expressions/arrays.py index 1758202d..edf901c2 100644 --- a/pysparkling/sql/expressions/arrays.py +++ b/pysparkling/sql/expressions/arrays.py @@ -1,4 +1,5 @@ -from ..types import BooleanType, ArrayType, NullType, MapType, IntegerType, StringType, StructType +from ..column import Column +from ..types import ArrayType, BooleanType, IntegerType, MapType, NullType, StringType, StructType from ..utils import AnalysisException from .expressions import BinaryOperation, Expression, UnaryExpression @@ -337,7 +338,7 @@ def __init__(self, columns): def eval(self, row, schema): return [ - {i: v for i, v in enumerate(combination)} + dict(enumerate(combination)) for combination in zip( *(c.eval(row, schema) for c in self.columns) ) diff --git a/pysparkling/sql/expressions/dates.py b/pysparkling/sql/expressions/dates.py index 22407375..f68389b3 100644 --- a/pysparkling/sql/expressions/dates.py +++ b/pysparkling/sql/expressions/dates.py @@ -5,7 +5,7 @@ from ...utils import parse_tz from ..casts import get_time_formatter, get_unix_timestamp_parser -from ..types import DateType, FloatType, TimestampType, IntegerType, DoubleType, StringType, LongType +from ..types import DateType, DoubleType, FloatType, IntegerType, LongType, StringType, TimestampType from .expressions import Expression, UnaryExpression from .operators import Cast @@ -35,6 +35,7 @@ def args(self): def data_type(self, schema): return DateType() + class DateAdd(Expression): pretty_name = "date_add" @@ -56,6 +57,7 @@ def args(self): def data_type(self, schema): return DateType() + class DateSub(Expression): pretty_name = "date_sub" diff --git a/pysparkling/sql/expressions/explodes.py b/pysparkling/sql/expressions/explodes.py index c55ffdef..6612dd77 100644 --- a/pysparkling/sql/expressions/explodes.py +++ b/pysparkling/sql/expressions/explodes.py @@ -1,4 +1,4 @@ -from ..types import DataType, IntegerType, StructField, StructType +from ..types import IntegerType, StructField, StructType from .expressions import UnaryExpression diff --git a/pysparkling/sql/expressions/jsons.py b/pysparkling/sql/expressions/jsons.py index fa2ef94a..96de60d7 100644 --- a/pysparkling/sql/expressions/jsons.py +++ b/pysparkling/sql/expressions/jsons.py @@ -1,9 +1,9 @@ import json -from ..types import StringType from ...utils import get_json_encoder from ..internal_utils.options import Options from ..internal_utils.readers.jsonreader import JSONReader +from ..types import StringType from .expressions import Expression diff --git a/pysparkling/sql/expressions/mappers.py b/pysparkling/sql/expressions/mappers.py index bde7127d..657e9d5f 100644 --- a/pysparkling/sql/expressions/mappers.py +++ b/pysparkling/sql/expressions/mappers.py @@ -6,8 +6,10 @@ from ...utils import half_even_round, half_up_round, MonotonicallyIncreasingIDGenerator, XORShiftRandom from ..internal_utils.column import resolve_column -from ..types import create_row, StringType, NullType, BooleanType, DoubleType, LongType, IntegerType, ArrayType, \ - StructType, StructField, MapType, BinaryType +from ..types import ( + ArrayType, BinaryType, BooleanType, create_row, DoubleType, IntegerType, LongType, MapType, NullType, StringType, + StructField, StructType +) from ..utils import AnalysisException from .expressions import Expression, NullSafeColumnOperation, UnaryExpression from .operators import Cast diff --git a/pysparkling/sql/expressions/operators.py b/pysparkling/sql/expressions/operators.py index 2dc01229..70310b6a 100644 --- a/pysparkling/sql/expressions/operators.py +++ b/pysparkling/sql/expressions/operators.py @@ -1,5 +1,5 @@ from ..casts import get_caster -from ..types import Row, StructType, DoubleType, BooleanType, StringType, largest_numeric_type +from ..types import BooleanType, DoubleType, largest_numeric_type, Row, StringType, StructType from .expressions import BinaryOperation, Expression, NullSafeBinaryOperation, TypeSafeBinaryOperation, UnaryExpression diff --git a/pysparkling/sql/expressions/strings.py b/pysparkling/sql/expressions/strings.py index a9f7a97d..3f5311dd 100644 --- a/pysparkling/sql/expressions/strings.py +++ b/pysparkling/sql/expressions/strings.py @@ -1,7 +1,7 @@ import string from ...utils import levenshtein_distance -from ..types import StringType, IntegerType +from ..types import IntegerType, StringType from .expressions import Expression, UnaryExpression from .operators import Cast diff --git a/pysparkling/sql/types.py b/pysparkling/sql/types.py index 9cb4ac89..5447b52b 100644 --- a/pysparkling/sql/types.py +++ b/pysparkling/sql/types.py @@ -26,7 +26,7 @@ from sqlparser.internalparser import SqlParsingError -from .utils import require_minimum_pandas_version, AnalysisException +from .utils import AnalysisException, require_minimum_pandas_version __all__ = [ "DataType", "NullType", "StringType", "BinaryType", "BooleanType", "DateType", @@ -1144,20 +1144,19 @@ def merge_decimal_types(p1, s1, p2, s2, operation): if operation in ("add", "minus"): result_scale = max(s1, s2) return DecimalType.adjust_precision_scale(max(p1 - s1, p2 - s2) + result_scale + 1, result_scale) - elif operation == "multiply": + if operation == "multiply": return DecimalType.adjust_precision_scale(p1 + p2 + 1, s1 + s2) - elif operation == "divide": + if operation == "divide": result_scale = max(6, s1 + p2 + 1) return DecimalType.adjust_precision_scale(p1 - s1 + s2 + result_scale, result_scale) - elif operation == "mod": + if operation == "mod": result_scale = max(s1, s2) return DecimalType.adjust_precision_scale(min(p1 - s1, p2 - s2) + result_scale, result_scale) - elif operation in ("bitwise_or", "bitwise_and", "bitwise_xor"): + if operation in ("bitwise_or", "bitwise_and", "bitwise_xor"): if (p1, s1) != (p2, s2): raise AnalysisException("data type mismatch: differing types") return DecimalType.adjust_precision_scale(p1, s1) - else: - raise ValueError(f"Unknown operation {operation}") + raise ValueError(f"Unknown operation {operation}") def _need_converter(dataType):