Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker compose #1

Merged
merged 2 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ k8s
*.webm
.gitignore
.dockerignore
.env
.github
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ __pycache__
media
*.webm
.idea
.env
18 changes: 9 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.9.7-slim-buster
FROM python:3.9-slim

RUN apt-get update && apt-get install --no-install-recommends -y ffmpeg bash curl git && \
rm -rf /var/lib/{apt,dpkg,cache,log}
Expand All @@ -7,17 +7,17 @@ RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash -
RUN apt-get install -y nodejs
RUN npm install --location=global nodemon

RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# RUN python -m venv /opt/venv
# ENV PATH="/opt/venv/bin:$PATH"

RUN mkdir -p /app
# RUN mkdir -p /app

RUN groupadd --gid 1001 --system datapipe && \
useradd --system --uid 1001 --gid 1001 datapipe && \
chown -R 1001:1001 /opt/venv && \
chown -R 1001:1001 /app
# RUN groupadd --gid 1001 --system datapipe && \
# useradd --system --uid 1001 --gid 1001 datapipe && \
# chown -R 1001:1001 /opt/venv && \
# chown -R 1001:1001 /app

USER 1001:1001
# USER 1001:1001

WORKDIR /app

Expand Down
15 changes: 12 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,22 @@ clean:
rm $(GRPC_SOURCES)

build:
docker build . -t paulandrei/datapipe:${VERSION}
docker build . -t projecteaina/datapipe:${VERSION}

push: build
docker push paulandrei/datapipe:${VERSION}
docker push projecteaina/datapipe:${VERSION}

deploy:
kustomize build k8s | kubectl apply -f -

undeploy:
kustomize build k8s | kubectl delete -f -
kustomize build k8s | kubectl delete -f -

deploy-docker:
docker compose --env-file .env up

undeploy-docker:
docker compose down

stop-docker:
docker compose stop
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ make deploy

## Start using datapipe

Access to any pod that was set up using paulandrei/datapipe image (example: converter-, fetcher-.. )
Access to any pod that was set up using projecteaina/datapipe image (example: converter-, fetcher-.. )
```bash
kubectl -n assistent exec -it fetcher-YOUR_POD_ID bash
```
Expand Down
228 changes: 228 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
services:

datapipe-db:
image: postgres:13.1
container_name: datapipe-db
restart: unless-stopped
environment:
- POSTGRES_DB=${POSTGRES_DB}
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
expose:
- "5432"
networks:
- db-network
volumes:
- ./k8s/postgresql/sql/tables.sql:/docker-entrypoint-initdb.d/db.sql
- datapipe-db-data:/var/lib/postgresql/data
healthcheck:
test:
[
"CMD-SHELL",
"pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}",
]
interval: 5s
timeout: 5s
retries: 5

# datapipe-db-backup:
# image: eeshugerman/postgres-backup-s3:13
# environment:
# SCHEDULE: ${SCHEDULE-@every 1h}
# BACKUP_KEEP_DAYS: ${BACKUP_KEEP_DAYS-30}
# S3_REGION: us-east-1
# S3_ENDPOINT: ${S3_ENDPOINT}
# S3_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID}
# S3_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY}
# S3_BUCKET: ${S3_BUCKET}
# S3_PREFIX: ${S3_PREFIX-datapipe_db_backups}
# POSTGRES_HOST: datapipe-db
# POSTGRES_DATABASE: ${POSTGRES_DB}
# POSTGRES_USER: ${POSTGRES_USER}
# POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
# networks:
# - db-network
# depends_on:
# datapipe-db:
# condition: service_healthy

datapipe-grafana:
image: grafana/grafana:7.5.4
container_name: datapipe-grafana
restart: unless-stopped
environment:
- GF_INSTALL_PLUGINS=grafana-piechart-panel,grafana-worldmap-panel
- GF_LOG_MODE=console
- GF_LOG_LEVEL=info
expose:
- "3000"
volumes:
- grafana-data:/var/lib/grafana

text2lang:
image: projecteaina/text2lang:latest
container_name: datapipe-text2lang
restart: unless-stopped
expose:
- "8000"

preprocessor:
image: projecteaina/datapipe:latest
container_name: datapipe-preprocessor
restart: unless-stopped
volumes:
- datapipe-data:/datapipe
command: "python -m preprocessor"
environment:
- PYTHONUNBUFFERED=1
- PG_HOST=datapipe-db
- PG_PASSWORD=${POSTGRES_PASSWORD}
- API_URL=http://text2lang:80/text2lang
networks:
- db-network
depends_on:
datapipe-db:
condition: service_healthy

fetcher:
image: projecteaina/datapipe:latest
restart: unless-stopped
volumes:
- datapipe-data:/datapipe
command: "fetcher"
deploy:
mode: replicated
replicas: 2
environment:
- PYTHONUNBUFFERED=1
- PG_HOST=datapipe-db
- PG_PASSWORD=${POSTGRES_PASSWORD}
- YT_AUDIO_DOWNLOAD_PATH=/datapipe/audio/youtube
- YT_CAPTION_DOWNLOAD_PATH=/datapipe/caption/youtube
- CCMA_AUDIO_DOWNLOAD_PATH=/datapipe/audio/ccma
- CCMA_VIDEO_DOWNLOAD_PATH=/datapipe/tmp/video/ccma
- CCMA_CAPTION_DOWNLOAD_PATH=/datapipe/caption/ccma
networks:
- db-network
depends_on:
datapipe-db:
condition: service_healthy

converter:
image: projecteaina/datapipe:latest
container_name: datapipe-converter
restart: unless-stopped
volumes:
- datapipe-data:/datapipe
command: "python -m converter"
environment:
- PYTHONUNBUFFERED=1
- PG_HOST=datapipe-db
- AUDIO_16_PATH=/datapipe/audio16
- CCMA_AUDIO_DOWNLOAD_PATH=/datapipe/audio/ccma
- CCMA_VIDEO_DOWNLOAD_PATH=/datapipe/tmp/video/ccma
- PG_PASSWORD=${POSTGRES_PASSWORD}
networks:
- db-network
depends_on:
datapipe-db:
condition: service_healthy

# vad-transcriber:
# image: projecteaina/datapipe:latest
# container_name: datapipe-vad-transcriber
# restart: unless-stopped
# volumes:
# - datapipe-data:/datapipe
# command: "python -m vosk_vad_transcriber"
# environment:
# - PYTHONUNBUFFERED=1
# - PG_HOST=datapipe-db
# - PG_PASSWORD=${POSTGRES_PASSWORD}
# - VOSK_SERVER_HOST=ona-vosk
# networks:
# - db-network
# depends_on:
# datapipe-db:
# condition: service_healthy

gender:
image: projecteaina/datapipe:latest
container_name: datapipe-gender
restart: unless-stopped
volumes:
- datapipe-data:/datapipe
command: "python -m gender"
environment:
- PYTHONUNBUFFERED=1
- PG_HOST=datapipe-db
- PG_PASSWORD=${POSTGRES_PASSWORD}
- CLIPS_PATH= /datapipe/clips
networks:
- db-network
depends_on:
datapipe-db:
condition: service_healthy

# splitter:
# image: projecteaina/datapipe:latest
# container_name: datapipe-splitter
# restart: unless-stopped
# volumes:
# - datapipe-data:/datapipe
# command: "python -m splitter"
# environment:
# - PYTHONUNBUFFERED=1
# - PG_HOST=datapipe-db
# - PG_PASSWORD=${POSTGRES_PASSWORD}
# - CLIPS_PATH= /datapipe/clips
# networks:
# - db-network
# depends_on:
# datapipe-db:
# condition: service_healthy

# ona-vosk:
# image: assistent/kaldi-catala:0.0.4
# container_name: ona-vosk
# restart: unless-stopped
# environment:
# - VOSK_SAMPLE_RATE=16000
# expose:
# - "5001"

# wav2vec2-catala:
# image: ghcr.io/ccoreilly/wav2vec2-catala-onnx:0.1.1
# container_name: wav2vec2-catala
# restart: unless-stopped
# expose:
# - "8000"

# wav2vec2-transcriber:
# image: projecteaina/datapipe:latest
# container_name: datapipe-wav2vec2-transcriber
# restart: unless-stopped
# volumes:
# - datapipe-data:/datapipe
# command: "python -m wav2vec2_transcriber"
# environment:
# - PYTHONUNBUFFERED=1
# - PG_HOST=datapipe-db
# - PG_PASSWORD=${POSTGRES_PASSWORD}
# - API_URL=http://wav2vec2-catala/recognize
# networks:
# - db-network
# depends_on:
# datapipe-db:
# condition: service_healthy


volumes:
datapipe-data:
datapipe-db-data:
grafana-data:

networks:
db-network:
# driver_opts:
# com.docker.network.driver.mtu: ${NETWORK_MTU}
2 changes: 1 addition & 1 deletion k8s/converter/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
claimName: datapipe-storage-local
containers:
- name: converter
image: paulandrei/datapipe:0.8.0
image: projecteaina/datapipe:latest
imagePullPolicy: IfNotPresent
command: [ "python", "-m", "converter" ]
volumeMounts:
Expand Down
2 changes: 1 addition & 1 deletion k8s/debug.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
claimName: datapipe-storage
containers:
- name: debug
image: paulandrei/datapipe:0.8.0
image: projecteaina/datapipe:latest
imagePullPolicy: IfNotPresent
command: [ "sleep" ]
args: ["infinity"]
Expand Down
2 changes: 1 addition & 1 deletion k8s/fetcher/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
claimName: datapipe-storage-local
containers:
- name: fetcher
image: paulandrei/datapipe:0.8.0
image: projecteaina/datapipe:latest
imagePullPolicy: IfNotPresent
command: [ "python", "-m", "fetcher" ]
volumeMounts:
Expand Down
2 changes: 1 addition & 1 deletion k8s/gender/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
claimName: datapipe-storage-local
containers:
- name: gender
image: paulandrei/datapipe:0.8.0
image: projecteaina/datapipe:latest
imagePullPolicy: IfNotPresent
command: [ "python", "-m", "gender" ]
volumeMounts:
Expand Down
Loading