Skip to content

Commit

Permalink
Merge pull request #239 from nijanthanvijayakumar/feature-issue-237-r…
Browse files Browse the repository at this point in the history
…emove-extensions

Remove the extensions functions
  • Loading branch information
SemyonSinchenko authored Jul 14, 2024
2 parents b67fc98 + d3d497a commit e1b66c8
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 255 deletions.
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,6 @@ ignore = [
extend-exclude = ["tests", "docs"]

[tool.ruff.per-file-ignores]
"quinn/extensions/column_ext.py" = ["FBT003", "N802"]
"quinn/extensions/__init__.py" = ["F401", "F403"]
"quinn/__init__.py" = ["F401", "F403"]
"quinn/functions.py" = ["FBT003"]
"quinn/keyword_finder.py" = ["A002"]
32 changes: 16 additions & 16 deletions quinn/dataframe_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ def column_to_list(df: DataFrame, col_name: str) -> list[Any]:
return [row[0] for row in df.select(col_name).collect()]

pyarrow_enabled = (
spark_session.conf.get(
"spark.sql.execution.arrow.pyspark.enabled",
)
== "true"
spark_session.conf.get(
"spark.sql.execution.arrow.pyspark.enabled",
)
== "true"
)

pyarrow_valid = pyarrow_enabled and sys.modules["pyarrow"].__version__ >= "0.17.0"
Expand All @@ -63,9 +63,9 @@ def column_to_list(df: DataFrame, col_name: str) -> list[Any]:


def two_columns_to_dictionary(
df: DataFrame,
key_col_name: str,
value_col_name: str,
df: DataFrame,
key_col_name: str,
value_col_name: str,
) -> dict[str, Any]:
"""Collect two columns as dictionary when first column is key and second is value.
Expand Down Expand Up @@ -114,18 +114,18 @@ def show_output_to_df(show_output: str, spark: SparkSession) -> DataFrame:
return spark.createDataFrame(pretty_data, pretty_column_names)


def create_df(spark: SparkSession, rows_data, col_specs) -> DataFrame: # noqa: ANN001
"""Create a new DataFrame from the given data and column specs.
def create_df(spark: SparkSession, rows_data: list[tuple], col_specs: list[tuple]) -> DataFrame:
"""Creates a new DataFrame from the given data and column specifications.
The returned DataFrame s created using the StructType and StructField classes provided by PySpark.
The returned DataFrame created using the StructType and StructField classes provided by PySpark.
:param spark: SparkSession object
:param spark: SparkSession object to create the DataFrame
:type spark: SparkSession
:param rows_data: the data used to create the DataFrame
:type rows_data: array-like
:param col_specs: list of tuples containing the name and type of the field
:type col_specs: list of tuples
:return: a new DataFrame
:param rows_data: The data used to populate the DataFrame, where each tuple represents a row.
:type rows_data: list[tuple]
:param col_specs: list of tuples containing the name and type of the field, i.e., specifications for the columns.
:type col_specs: list[tuple]
:return: A new DataFrame constructed from the provided rows and column specifications.
:rtype: DataFrame
"""
struct_fields = list(map(lambda x: StructField(*x), col_specs)) # noqa: C417
Expand Down
17 changes: 0 additions & 17 deletions quinn/extensions/__init__.py

This file was deleted.

29 changes: 0 additions & 29 deletions quinn/extensions/dataframe_ext.py

This file was deleted.

48 changes: 0 additions & 48 deletions quinn/extensions/spark_session_ext.py

This file was deleted.

118 changes: 0 additions & 118 deletions tests/extensions/test_dataframe_ext.py

This file was deleted.

25 changes: 0 additions & 25 deletions tests/extensions/test_spark_session_ext.py

This file was deleted.

0 comments on commit e1b66c8

Please sign in to comment.