Skip to content

Commit

Permalink
Merge pull request #1 from projecte-aina/docker-compose
Browse files Browse the repository at this point in the history
Docker compose
  • Loading branch information
PaulNdrei authored May 6, 2024
2 parents 3427089 + f89abb2 commit 5329359
Show file tree
Hide file tree
Showing 18 changed files with 428 additions and 186 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ k8s
*.webm
.gitignore
.dockerignore
.env
.github
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ __pycache__
media
*.webm
.idea
.env
18 changes: 9 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.9.7-slim-buster
FROM python:3.9-slim

RUN apt-get update && apt-get install --no-install-recommends -y ffmpeg bash curl git && \
rm -rf /var/lib/{apt,dpkg,cache,log}
Expand All @@ -7,17 +7,17 @@ RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash -
RUN apt-get install -y nodejs
RUN npm install --location=global nodemon

RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# RUN python -m venv /opt/venv
# ENV PATH="/opt/venv/bin:$PATH"

RUN mkdir -p /app
# RUN mkdir -p /app

RUN groupadd --gid 1001 --system datapipe && \
useradd --system --uid 1001 --gid 1001 datapipe && \
chown -R 1001:1001 /opt/venv && \
chown -R 1001:1001 /app
# RUN groupadd --gid 1001 --system datapipe && \
# useradd --system --uid 1001 --gid 1001 datapipe && \
# chown -R 1001:1001 /opt/venv && \
# chown -R 1001:1001 /app

USER 1001:1001
# USER 1001:1001

WORKDIR /app

Expand Down
15 changes: 12 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,22 @@ clean:
rm $(GRPC_SOURCES)

build:
docker build . -t paulandrei/datapipe:${VERSION}
docker build . -t projecteaina/datapipe:${VERSION}

push: build
docker push paulandrei/datapipe:${VERSION}
docker push projecteaina/datapipe:${VERSION}

deploy:
kustomize build k8s | kubectl apply -f -

undeploy:
kustomize build k8s | kubectl delete -f -
kustomize build k8s | kubectl delete -f -

deploy-docker:
docker compose --env-file .env up

undeploy-docker:
docker compose down

stop-docker:
docker compose stop
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ make deploy

## Start using datapipe

Access to any pod that was set up using paulandrei/datapipe image (example: converter-, fetcher-.. )
Access to any pod that was set up using projecteaina/datapipe image (example: converter-, fetcher-.. )
```bash
kubectl -n assistent exec -it fetcher-YOUR_POD_ID bash
```
Expand Down
228 changes: 228 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
services:

datapipe-db:
image: postgres:13.1
container_name: datapipe-db
restart: unless-stopped
environment:
- POSTGRES_DB=${POSTGRES_DB}
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
expose:
- "5432"
networks:
- db-network
volumes:
- ./k8s/postgresql/sql/tables.sql:/docker-entrypoint-initdb.d/db.sql
- datapipe-db-data:/var/lib/postgresql/data
healthcheck:
test:
[
"CMD-SHELL",
"pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}",
]
interval: 5s
timeout: 5s
retries: 5

# datapipe-db-backup:
# image: eeshugerman/postgres-backup-s3:13
# environment:
# SCHEDULE: ${SCHEDULE-@every 1h}
# BACKUP_KEEP_DAYS: ${BACKUP_KEEP_DAYS-30}
# S3_REGION: us-east-1
# S3_ENDPOINT: ${S3_ENDPOINT}
# S3_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID}
# S3_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY}
# S3_BUCKET: ${S3_BUCKET}
# S3_PREFIX: ${S3_PREFIX-datapipe_db_backups}
# POSTGRES_HOST: datapipe-db
# POSTGRES_DATABASE: ${POSTGRES_DB}
# POSTGRES_USER: ${POSTGRES_USER}
# POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
# networks:
# - db-network
# depends_on:
# datapipe-db:
# condition: service_healthy

datapipe-grafana:
image: grafana/grafana:7.5.4
container_name: datapipe-grafana
restart: unless-stopped
environment:
- GF_INSTALL_PLUGINS=grafana-piechart-panel,grafana-worldmap-panel
- GF_LOG_MODE=console
- GF_LOG_LEVEL=info
expose:
- "3000"
volumes:
- grafana-data:/var/lib/grafana

text2lang:
image: projecteaina/text2lang:latest
container_name: datapipe-text2lang
restart: unless-stopped
expose:
- "8000"

preprocessor:
image: projecteaina/datapipe:latest
container_name: datapipe-preprocessor
restart: unless-stopped
volumes:
- datapipe-data:/datapipe
command: "python -m preprocessor"
environment:
- PYTHONUNBUFFERED=1
- PG_HOST=datapipe-db
- PG_PASSWORD=${POSTGRES_PASSWORD}
- API_URL=http://text2lang:80/text2lang
networks:
- db-network
depends_on:
datapipe-db:
condition: service_healthy

fetcher:
image: projecteaina/datapipe:latest
restart: unless-stopped
volumes:
- datapipe-data:/datapipe
command: "fetcher"
deploy:
mode: replicated
replicas: 2
environment:
- PYTHONUNBUFFERED=1
- PG_HOST=datapipe-db
- PG_PASSWORD=${POSTGRES_PASSWORD}
- YT_AUDIO_DOWNLOAD_PATH=/datapipe/audio/youtube
- YT_CAPTION_DOWNLOAD_PATH=/datapipe/caption/youtube
- CCMA_AUDIO_DOWNLOAD_PATH=/datapipe/audio/ccma
- CCMA_VIDEO_DOWNLOAD_PATH=/datapipe/tmp/video/ccma
- CCMA_CAPTION_DOWNLOAD_PATH=/datapipe/caption/ccma
networks:
- db-network
depends_on:
datapipe-db:
condition: service_healthy

converter:
image: projecteaina/datapipe:latest
container_name: datapipe-converter
restart: unless-stopped
volumes:
- datapipe-data:/datapipe
command: "python -m converter"
environment:
- PYTHONUNBUFFERED=1
- PG_HOST=datapipe-db
- AUDIO_16_PATH=/datapipe/audio16
- CCMA_AUDIO_DOWNLOAD_PATH=/datapipe/audio/ccma
- CCMA_VIDEO_DOWNLOAD_PATH=/datapipe/tmp/video/ccma
- PG_PASSWORD=${POSTGRES_PASSWORD}
networks:
- db-network
depends_on:
datapipe-db:
condition: service_healthy

# vad-transcriber:
# image: projecteaina/datapipe:latest
# container_name: datapipe-vad-transcriber
# restart: unless-stopped
# volumes:
# - datapipe-data:/datapipe
# command: "python -m vosk_vad_transcriber"
# environment:
# - PYTHONUNBUFFERED=1
# - PG_HOST=datapipe-db
# - PG_PASSWORD=${POSTGRES_PASSWORD}
# - VOSK_SERVER_HOST=ona-vosk
# networks:
# - db-network
# depends_on:
# datapipe-db:
# condition: service_healthy

gender:
image: projecteaina/datapipe:latest
container_name: datapipe-gender
restart: unless-stopped
volumes:
- datapipe-data:/datapipe
command: "python -m gender"
environment:
- PYTHONUNBUFFERED=1
- PG_HOST=datapipe-db
- PG_PASSWORD=${POSTGRES_PASSWORD}
- CLIPS_PATH= /datapipe/clips
networks:
- db-network
depends_on:
datapipe-db:
condition: service_healthy

# splitter:
# image: projecteaina/datapipe:latest
# container_name: datapipe-splitter
# restart: unless-stopped
# volumes:
# - datapipe-data:/datapipe
# command: "python -m splitter"
# environment:
# - PYTHONUNBUFFERED=1
# - PG_HOST=datapipe-db
# - PG_PASSWORD=${POSTGRES_PASSWORD}
# - CLIPS_PATH= /datapipe/clips
# networks:
# - db-network
# depends_on:
# datapipe-db:
# condition: service_healthy

# ona-vosk:
# image: assistent/kaldi-catala:0.0.4
# container_name: ona-vosk
# restart: unless-stopped
# environment:
# - VOSK_SAMPLE_RATE=16000
# expose:
# - "5001"

# wav2vec2-catala:
# image: ghcr.io/ccoreilly/wav2vec2-catala-onnx:0.1.1
# container_name: wav2vec2-catala
# restart: unless-stopped
# expose:
# - "8000"

# wav2vec2-transcriber:
# image: projecteaina/datapipe:latest
# container_name: datapipe-wav2vec2-transcriber
# restart: unless-stopped
# volumes:
# - datapipe-data:/datapipe
# command: "python -m wav2vec2_transcriber"
# environment:
# - PYTHONUNBUFFERED=1
# - PG_HOST=datapipe-db
# - PG_PASSWORD=${POSTGRES_PASSWORD}
# - API_URL=http://wav2vec2-catala/recognize
# networks:
# - db-network
# depends_on:
# datapipe-db:
# condition: service_healthy


volumes:
datapipe-data:
datapipe-db-data:
grafana-data:

networks:
db-network:
# driver_opts:
# com.docker.network.driver.mtu: ${NETWORK_MTU}
2 changes: 1 addition & 1 deletion k8s/converter/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
claimName: datapipe-storage-local
containers:
- name: converter
image: paulandrei/datapipe:0.8.0
image: projecteaina/datapipe:latest
imagePullPolicy: IfNotPresent
command: [ "python", "-m", "converter" ]
volumeMounts:
Expand Down
2 changes: 1 addition & 1 deletion k8s/debug.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
claimName: datapipe-storage
containers:
- name: debug
image: paulandrei/datapipe:0.8.0
image: projecteaina/datapipe:latest
imagePullPolicy: IfNotPresent
command: [ "sleep" ]
args: ["infinity"]
Expand Down
2 changes: 1 addition & 1 deletion k8s/fetcher/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
claimName: datapipe-storage-local
containers:
- name: fetcher
image: paulandrei/datapipe:0.8.0
image: projecteaina/datapipe:latest
imagePullPolicy: IfNotPresent
command: [ "python", "-m", "fetcher" ]
volumeMounts:
Expand Down
2 changes: 1 addition & 1 deletion k8s/gender/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
claimName: datapipe-storage-local
containers:
- name: gender
image: paulandrei/datapipe:0.8.0
image: projecteaina/datapipe:latest
imagePullPolicy: IfNotPresent
command: [ "python", "-m", "gender" ]
volumeMounts:
Expand Down
Loading

0 comments on commit 5329359

Please sign in to comment.