-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdocker-compose.yml
114 lines (102 loc) · 2.87 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
---
version: '3'
volumes:
atlasdata: null
services:
#<------------ ZOOKEEPER ------------>
zookeeper:
image: wurstmeister/zookeeper:3.4.6
ports:
- "2181:2181"
#<------------ KAFKA ------------>
kafka:
image: confluentinc/cp-kafka:5.4.0
hostname: kafka
ports:
- 29092:29092
- 9092:9092
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://kafka:29092,PLAINTEXT_HOST://${HOST_IP}:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
depends_on:
- zookeeper
kafka-schema-registry:
image: confluentinc/cp-schema-registry:5.3.6
hostname: kafka-schema-registry
ports:
- "8081:8081"
environment:
SCHEMA_REGISTRY_HOST_NAME: kafka-schema-registry
SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: "zookeeper:2181"
depends_on:
- zookeeper
- kafka
#<------------ SPARK ------------>
spark-master:
build: images/pyspark
hostname: "spark-master"
expose:
- "7077"
environment:
- SPARK_MODE=master
- ATLAS_HOST=atlas:21000
- ATLAS_USER=${ATLAS_USER}
- ATLAS_PASSWORD=${ATLAS_PASSWORD}
volumes:
- ./app/pii_schema_inf:/opt/app/pii_schema_inf
- ./app/custom_entities:/opt/app/custom_entities
- ./keys:/opt/app/keys
spark-slave:
build: images/pyspark
hostname: "spark-slave"
expose:
- "7077"
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_PORT=7075
- SPARK_WORKER_WEBUI_PORT=8082
- ATLAS_HOST=atlas:21000
- ATLAS_USER=${ATLAS_USER}
- ATLAS_PASSWORD=${ATLAS_PASSWORD}
volumes:
- ./app/pii_schema_inf:/opt/app/pii_schema_inf
- ./app/custom_entities:/opt/app/custom_entities
- ./keys:/opt/app/keys
depends_on:
- spark-master
#<------------ ATLAS ---------------->
atlas:
image: sansarip/apache-atlas
hostname: "atlas"
expose:
- "21000"
ports:
- "21000:21000"
volumes:
- atlasdata:/opt/apache-atlas-2.0.0
#<------------ PIPELINE WORKER ---------->
pipeline_worker:
build: "images/pipeline_worker"
hostname: "pipeline_worker"
tty: true
environment:
- ATLAS_HOST=atlas:21000
- ATLAS_USER=${ATLAS_USER}
- ATLAS_PASSWORD=${ATLAS_PASSWORD}
- SCHEMA_SERVER=http://kafka-schema-registry:8081
- KAFKA_SERVER=kafka:29092
volumes:
- ./app/deid:/opt/app/deid
- ./app/reid:/opt/app/reid
- ./app/custom_entities:/opt/app/custom_entities
- ./keys:/opt/app/keys
depends_on:
- kafka
- atlas
- kafka-schema-registry