Merge pull request #239 from nijanthanvijayakumar/feature-issue-237-r…

…emove-extensions Remove the extensions functions
mrpowers-io · Jul 14, 2024 · e1b66c8 · e1b66c8
2 parents b67fc98 + d3d497a
commit e1b66c8
Show file tree

Hide file tree

Showing 7 changed files with 16 additions and 255 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -98,8 +98,6 @@ ignore = [
 extend-exclude = ["tests", "docs"]
 
 [tool.ruff.per-file-ignores]
-"quinn/extensions/column_ext.py" = ["FBT003", "N802"]
-"quinn/extensions/__init__.py" = ["F401", "F403"]
 "quinn/__init__.py" = ["F401", "F403"]
 "quinn/functions.py" = ["FBT003"]
 "quinn/keyword_finder.py" = ["A002"]
diff --git a/quinn/dataframe_helpers.py b/quinn/dataframe_helpers.py
@@ -45,10 +45,10 @@ def column_to_list(df: DataFrame, col_name: str) -> list[Any]:
         return [row[0] for row in df.select(col_name).collect()]
 
     pyarrow_enabled = (
-            spark_session.conf.get(
-                "spark.sql.execution.arrow.pyspark.enabled",
-            )
-            == "true"
+        spark_session.conf.get(
+            "spark.sql.execution.arrow.pyspark.enabled",
+        )
+        == "true"
     )
 
     pyarrow_valid = pyarrow_enabled and sys.modules["pyarrow"].__version__ >= "0.17.0"
@@ -63,9 +63,9 @@ def column_to_list(df: DataFrame, col_name: str) -> list[Any]:
 
 
 def two_columns_to_dictionary(
-        df: DataFrame,
-        key_col_name: str,
-        value_col_name: str,
+    df: DataFrame,
+    key_col_name: str,
+    value_col_name: str,
 ) -> dict[str, Any]:
     """Collect two columns as dictionary when first column is key and second is value.
 
@@ -114,18 +114,18 @@ def show_output_to_df(show_output: str, spark: SparkSession) -> DataFrame:
     return spark.createDataFrame(pretty_data, pretty_column_names)
 
 
-def create_df(spark: SparkSession, rows_data, col_specs) -> DataFrame:  # noqa: ANN001
-    """Create a new DataFrame from the given data and column specs.
+def create_df(spark: SparkSession, rows_data: list[tuple], col_specs: list[tuple]) -> DataFrame:
+    """Creates a new DataFrame from the given data and column specifications.
 
-    The returned DataFrame s created using the StructType and StructField classes provided by PySpark.
+    The returned DataFrame created using the StructType and StructField classes provided by PySpark.
 
-    :param spark: SparkSession object
+    :param spark: SparkSession object to create the DataFrame
     :type spark: SparkSession
-    :param rows_data: the data used to create the DataFrame
-    :type rows_data: array-like
-    :param col_specs: list of tuples containing the name and type of the field
-    :type col_specs: list of tuples
-    :return: a new DataFrame
+    :param rows_data: The data used to populate the DataFrame, where each tuple represents a row.
+    :type rows_data: list[tuple]
+    :param col_specs: list of tuples containing the name and type of the field, i.e., specifications for the columns.
+    :type col_specs: list[tuple]
+    :return: A new DataFrame constructed from the provided rows and column specifications.
     :rtype: DataFrame
     """
     struct_fields = list(map(lambda x: StructField(*x), col_specs))  # noqa: C417

diff --git a/quinn/extensions/__init__.py b/quinn/extensions/__init__.py
diff --git a/quinn/extensions/dataframe_ext.py b/quinn/extensions/dataframe_ext.py
diff --git a/quinn/extensions/spark_session_ext.py b/quinn/extensions/spark_session_ext.py
diff --git a/tests/extensions/test_dataframe_ext.py b/tests/extensions/test_dataframe_ext.py
diff --git a/tests/extensions/test_spark_session_ext.py b/tests/extensions/test_spark_session_ext.py