-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcompose.yaml
More file actions
122 lines (115 loc) · 3.3 KB
/
compose.yaml
File metadata and controls
122 lines (115 loc) · 3.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
---
name: supply_chain_big_data
services:
kafka:
image: apache/kafka:3.8.1
ports:
- "9092:9091"
healthcheck:
test: [ "CMD-SHELL", "/opt/kafka/bin/kafka-cluster.sh cluster-id --bootstrap-server localhost:9092 || exit 1" ]
interval: 1s
timeout: 60s
retries: 60
environment:
KAFKA_NODE_ID: 1
KAFKA_PROCESS_ROLES: broker,controller
KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092,EXTERNAL://0.0.0.0:9091,CONTROLLER://0.0.0.0:9093
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,EXTERNAL://localhost:9092
KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,EXTERNAL:PLAINTEXT
KAFKA_CONTROLLER_QUORUM_VOTERS: 1@localhost:9093
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
KAFKA_NUM_PARTITIONS: 3
sql-database:
image: postgres:17.0
healthcheck:
test: [ "CMD-SHELL", "pg_isready -U postgres -d postgres" ]
interval: 10s
start_period: 30s
timeout: 10s
ports:
- "5432:5432"
shm_size: 128mb
volumes: # set up shared memory for speed
- type: tmpfs
target: /dev/shm
tmpfs:
size: 134217728
# - ./postgres_data/:/var/lib/postgresql/data # uncomment to save database locally, if database is started with data, the postgres startup script is not ran
- ./postgres_startup:/docker-entrypoint-initdb.d:z
environment:
POSTGRES_PASSWORD: supersecret
data-gen:
build: data_gen
healthcheck:
test:
bash -c "[ -f /run/produce.ready ]"
interval: 2s
timeout: 5s
retries: 480
depends_on:
kafka:
condition: service_healthy
sql-database:
condition: service_healthy
jobmanager:
build: processing
depends_on:
data-gen:
condition: service_healthy
ports:
- "8081:8081"
command: standalone-job --python /opt/__main__.py
environment:
- |
FLINK_PROPERTIES=
jobmanager.rpc.address: jobmanager
execution.shutdown-on-application-finish: false
taskmanager:
build: processing
depends_on:
- jobmanager
command: taskmanager
scale: 1
environment:
- |
FLINK_PROPERTIES=
jobmanager.rpc.address: jobmanager
taskmanager.numberOfTaskSlots: 2
spark:
build:
context: ./ml
dockerfile: Dockerfile
depends_on:
data-gen:
condition: service_healthy
kafka:
condition: service_healthy
sql-database:
condition: service_healthy
command: >
python /app/spark_api.py
ports:
- "9003:9003"
environment:
- SPARK_OPTS=--driver-java-options=-Duser.timezone=UTC
- PYSPARK_PYTHON=python3
dashboard:
build:
context: ./dashboard
dockerfile: Dockerfile
ports:
- "8501:8501"
depends_on:
data-gen:
condition: service_healthy
sql-database:
condition: service_healthy
kafka:
condition: service_healthy
environment:
- STREAMLIT_SERVER_PORT=8501
- STREAMLIT_DATABASE_URL=postgresql://postgres:supersecret@sql-database:5432/postgres