diff --git a/.gitignore b/.gitignore index 802ee54..dd48c61 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ .databricks/ .venv/ +.pytest_cache/ *.pyc __pycache__/ -.pytest_cache/ dist/ build/ covid_analysis.egg-info/ \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..ef7806b --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "databricks", + "request": "launch", + "name": "Unit Tests (on Databricks)", + "program": "${workspaceFolder}/jobs/pytest_databricks.py", + "args": ["./tests", "-p", "no:cacheprovider"], + "env": {} + } + ] +} diff --git a/jobs/pytest_databricks.py b/jobs/pytest_databricks.py new file mode 100644 index 0000000..f2e7893 --- /dev/null +++ b/jobs/pytest_databricks.py @@ -0,0 +1,18 @@ +import pytest +import os +import sys + + +def main(): + # Run all tests in the repository root. + repo_root = os.path.dirname(os.path.dirname(__file__)) + os.chdir(repo_root) + + # Skip writing pyc files on a readonly filesystem. + sys.dont_write_bytecode = True + + _ = pytest.main(sys.argv[1:]) + + +if __name__ == "__main__": + main() diff --git a/tests/spark_test.py b/tests/spark_test.py new file mode 100644 index 0000000..81c1075 --- /dev/null +++ b/tests/spark_test.py @@ -0,0 +1,18 @@ +from pyspark.sql import SparkSession +import pytest + + +@pytest.fixture +def spark() -> SparkSession: + """ + Create a spark session. Unit tests don't have access to the spark global + """ + return SparkSession.builder.getOrCreate() + + +def test_spark(spark): + """ + Example test that needs to run on the cluster to work + """ + data = spark.sql("select 1").collect() + assert data[0][0] == 1