Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
atreyat12 committed Nov 22, 2024
1 parent 0589262 commit 019e03b
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions mylib/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,18 @@ def extract_load(url="https://shorturl.at/5YexG",
file_path="data/heart_failure.csv",
directory="data"):

os.makedirs(os.path.dirname(file_path), exist_ok=True)
if os.path.dirname(file_path):
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with requests.get(url) as r:
with open(file_path, 'wb') as f:
f.write(r.content)

spark = SparkSession.builder.appName("Extract_Load").getOrCreate()
spark = SparkSession.builder \
.appName("Extract_Load") \
.config("spark.jars.packages", "io.delta:delta-core_2.12:2.3.0") \
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
.config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
.getOrCreate()
#hf_df=pd.read_csv(url)
#print(hf_df.head())
#heart_failure_df=spark.createDataFrame(hf_df)
Expand Down

0 comments on commit 019e03b

Please sign in to comment.