From 019e03bdf4fe8241741a19c11709b9be8cbb58d8 Mon Sep 17 00:00:00 2001 From: Atreya Tadepalli Date: Fri, 22 Nov 2024 01:06:15 -0500 Subject: [PATCH] test --- mylib/extract.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mylib/extract.py b/mylib/extract.py index e3fb1e1..b6d5ab3 100644 --- a/mylib/extract.py +++ b/mylib/extract.py @@ -14,12 +14,18 @@ def extract_load(url="https://shorturl.at/5YexG", file_path="data/heart_failure.csv", directory="data"): - os.makedirs(os.path.dirname(file_path), exist_ok=True) + if os.path.dirname(file_path): + os.makedirs(os.path.dirname(file_path), exist_ok=True) with requests.get(url) as r: with open(file_path, 'wb') as f: f.write(r.content) - spark = SparkSession.builder.appName("Extract_Load").getOrCreate() + spark = SparkSession.builder \ + .appName("Extract_Load") \ + .config("spark.jars.packages", "io.delta:delta-core_2.12:2.3.0") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .getOrCreate() #hf_df=pd.read_csv(url) #print(hf_df.head()) #heart_failure_df=spark.createDataFrame(hf_df)