-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocker-compose.yml
65 lines (57 loc) · 1.69 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
services:
# Spark Master service
spark-master:
image: bitnami/spark:latest
container_name: spark-master
environment:
- SPARK_MODE=master
ports:
- "7077:7077" # Spark master port
- "8080:8080" # Spark Web UI
volumes:
- "./SparkProcessing.py:/opt/bitnami/spark/SparkProcessing.py" # Mount the script
- "./large_sales_data.csv:/opt/bitnami/spark/large_sales_data.csv" # Mount sales data
- "C:/Users/hmdkr/spark/spark-3.5.3-bin-hadoop3/jars/postgresql-42.7.4.jar:/opt/bitnami/spark/jars/postgresql-42.7.4.jar" # Mount PostgreSQL JDBC driver
networks:
- spark-network
# Spark Worker service
spark-worker:
image: bitnami/spark:latest
container_name: spark-worker
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
depends_on:
- spark-master
networks:
- spark-network
# PostgreSQL service
postgres:
image: postgres:latest
container_name: postgres
environment:
POSTGRES_USER: hk3
POSTGRES_PASSWORD: admin123
POSTGRES_DB: salesdb
ports:
- "5432:5432" # PostgreSQL port
networks:
- spark-network
# Python service for running the job
python:
build: .
container_name: python-container
depends_on:
- spark-master
- postgres
volumes:
- ./SparkProcessing.py:/app/SparkProcessing.py # Mount the script
- ./large_sales_data.csv:/app/large_sales_data.csv # Mount sales data (if needed)
entrypoint: python /app/SparkProcessing.py # Automatically run the Spark job
environment:
- PYTHONUNBUFFERED=1
networks:
- spark-network
networks:
spark-network:
driver: bridge