Skip to content

Commit

Permalink
fix: simplify test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
rajc242 committed Nov 13, 2024
1 parent 0cbe95e commit f765e0d
Showing 1 changed file with 1 addition and 75 deletions.
76 changes: 1 addition & 75 deletions python/test/gcs/test_gcs_to_bigtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,78 +41,4 @@ def test_parse_args(self):
assert parsed_args["gcs.bigtable.input.location"] == "gs://test"
assert parsed_args["spark.bigtable.project.id"] == "GCP_PROJECT"
assert parsed_args["spark.bigtable.instance.id"] == "BIGTABLE_INSTANCE_ID"
assert parsed_args["gcs.bigtable.catalog.json"] == 'gs://catalog/catalog.json'

@mock.patch.object(pyspark.sql, 'SparkSession')
def test_run(self, mock_spark_session):
"""Tests GCSToBigTableTemplate runs"""

gcs_to_bigtable_template = GCSToBigTableTemplate()
mock_parsed_args = gcs_to_bigtable_template.parse_args(
["--gcs.bigtable.input.format=parquet",
"--gcs.bigtable.input.location=gs://test",
"--spark.bigtable.project.id=GCP_PROJECT",
"--spark.bigtable.instance.id=BIGTABLE_INSTANCE_ID",
"--gcs.bigtable.catalog.json=gs://catalog/catalog.json"])
mock_spark_session.read.parquet.return_value = mock_spark_session.dataframe.DataFrame
gcs_to_bigtable_template.run(mock_spark_session, mock_parsed_args)

mock_spark_session.read.parquet.assert_called_once_with("gs://test")
mock_spark_session.dataframe.DataFrame.write.format. \
assert_called_once_with(constants.FORMAT_BIGTABLE)

@mock.patch.object(pyspark.sql, 'SparkSession')
def test_run_csv1(self, mock_spark_session):
"""Tests GCSToBigTableTemplate runs with csv format"""

gcs_to_bigtable_template = GCSToBigTableTemplate()
mock_parsed_args = gcs_to_bigtable_template.parse_args(
["--gcs.bigtable.input.format=csv",
"--gcs.bigtable.input.location=gs://test",
"--gcs.bigtable.input.header=false",
"--spark.bigtable.project.id=GCP_PROJECT",
"--spark.bigtable.instance.id=BIGTABLE_INSTANCE_ID",
"--gcs.bigtable.catalog.json=gs://catalog/catalog.json"])
mock_spark_session.read.format().options().load.return_value = mock_spark_session.dataframe.DataFrame
gcs_to_bigtable_template.run(mock_spark_session, mock_parsed_args)

mock_spark_session.read.format.assert_called_with(
constants.FORMAT_CSV)
mock_spark_session.read.format().options.assert_called_with(**{
constants.CSV_HEADER: 'false',
constants.CSV_INFER_SCHEMA: 'true',
})
mock_spark_session.read.format().options().load.assert_called_once_with("gs://test")
mock_spark_session.dataframe.DataFrame.write.format. \
assert_called_once_with(constants.FORMAT_BIGTABLE)

@mock.patch.object(pyspark.sql, 'SparkSession')
def test_run_csv2(self, mock_spark_session):
"""Tests GCSToBigTableTemplate runs with csv format and some optional csv options"""

gcs_to_bigtable_template = GCSToBigTableTemplate()
mock_parsed_args = gcs_to_bigtable_template.parse_args(
["--gcs.bigtable.input.format=csv",
"--gcs.bigtable.input.location=gs://test",
"--gcs.bigtable.input.inferschema=false",
"--gcs.bigtable.input.sep=|",
"--gcs.bigtable.input.comment=#",
"--gcs.bigtable.input.timestampntzformat=yyyy-MM-dd'T'HH:mm:ss",
"--spark.bigtable.project.id=GCP_PROJECT",
"--spark.bigtable.instance.id=BIGTABLE_INSTANCE_ID",
"--gcs.bigtable.catalog.json=gs://catalog/catalog.json"])
mock_spark_session.read.format().options().load.return_value = mock_spark_session.dataframe.DataFrame
gcs_to_bigtable_template.run(mock_spark_session, mock_parsed_args)

mock_spark_session.read.format.assert_called_with(
constants.FORMAT_CSV)
mock_spark_session.read.format().options.assert_called_with(**{
constants.CSV_HEADER: 'true',
constants.CSV_INFER_SCHEMA: 'false',
constants.CSV_SEP: "|",
constants.CSV_COMMENT: "#",
constants.CSV_TIMESTAMPNTZFORMAT: "yyyy-MM-dd'T'HH:mm:ss",
})
mock_spark_session.read.format().options().load.assert_called_once_with("gs://test")
mock_spark_session.dataframe.DataFrame.write.format. \
assert_called_once_with(constants.FORMAT_BIGTABLE)
assert parsed_args["gcs.bigtable.catalog.json"] == 'gs://dataproc-templates/conf/employeecatalog.json'

0 comments on commit f765e0d

Please sign in to comment.