From 4eaa5548626102438a5e819551717dfec6860f6b Mon Sep 17 00:00:00 2001 From: PaulNdrei Date: Mon, 6 May 2024 17:25:19 +0200 Subject: [PATCH 1/2] Add docker-compose --- .dockerignore | 2 + .gitignore | 1 + Dockerfile | 18 +- Makefile | 15 +- README.md | 2 +- docker-compose.yml | 228 ++++++++++++++++ k8s/converter/deployment.yaml | 2 +- k8s/debug.yaml | 2 +- k8s/fetcher/deployment.yaml | 2 +- k8s/gender/deployment.yaml | 2 +- k8s/postgresql/sql/tables.sql | 326 ++++++++++++----------- k8s/preprocessor/deployment.yaml | 2 +- k8s/splitter/deployment.yaml | 2 +- k8s/text2lang/deployment.yaml | 2 +- k8s/vosk_vad_transcriber/deployment.yaml | 2 +- k8s/wav2vec2_transcriber/deployment.yaml | 2 +- okteto.yml | 2 +- requirements.txt | 2 +- 18 files changed, 428 insertions(+), 186 deletions(-) create mode 100644 docker-compose.yml diff --git a/.dockerignore b/.dockerignore index 91a4433..fa5b81e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,3 +9,5 @@ k8s *.webm .gitignore .dockerignore +.env +.github \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8d42c05..0134e56 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__ media *.webm .idea +.env \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index b562487..1d32bdb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.9.7-slim-buster +FROM python:3.9-slim RUN apt-get update && apt-get install --no-install-recommends -y ffmpeg bash curl git && \ rm -rf /var/lib/{apt,dpkg,cache,log} @@ -7,17 +7,17 @@ RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash - RUN apt-get install -y nodejs RUN npm install --location=global nodemon -RUN python -m venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" +# RUN python -m venv /opt/venv +# ENV PATH="/opt/venv/bin:$PATH" -RUN mkdir -p /app +# RUN mkdir -p /app -RUN groupadd --gid 1001 --system datapipe && \ - useradd --system --uid 1001 --gid 1001 datapipe && \ - chown -R 1001:1001 /opt/venv && \ - chown -R 1001:1001 /app +# RUN groupadd --gid 1001 --system datapipe && \ +# useradd --system --uid 1001 --gid 1001 datapipe && \ +# chown -R 1001:1001 /opt/venv && \ +# chown -R 1001:1001 /app -USER 1001:1001 +# USER 1001:1001 WORKDIR /app diff --git a/Makefile b/Makefile index 76eb3a6..12338f6 100644 --- a/Makefile +++ b/Makefile @@ -10,13 +10,22 @@ clean: rm $(GRPC_SOURCES) build: - docker build . -t paulandrei/datapipe:${VERSION} + docker build . -t projecteaina/datapipe:${VERSION} push: build - docker push paulandrei/datapipe:${VERSION} + docker push projecteaina/datapipe:${VERSION} deploy: kustomize build k8s | kubectl apply -f - undeploy: - kustomize build k8s | kubectl delete -f - \ No newline at end of file + kustomize build k8s | kubectl delete -f - + +deploy-docker: + docker compose --env-file .env up + +undeploy-docker: + docker compose down + +stop-docker: + docker compose stop diff --git a/README.md b/README.md index 58dca1d..07ae87e 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ make deploy ## Start using datapipe -Access to any pod that was set up using paulandrei/datapipe image (example: converter-, fetcher-.. ) +Access to any pod that was set up using projecteaina/datapipe image (example: converter-, fetcher-.. ) ```bash kubectl -n assistent exec -it fetcher-YOUR_POD_ID bash ``` diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1625915 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,228 @@ +services: + + datapipe-db: + image: postgres:13.1 + container_name: datapipe-db + restart: unless-stopped + environment: + - POSTGRES_DB=${POSTGRES_DB} + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + expose: + - "5432" + networks: + - db-network + volumes: + - ./k8s/postgresql/sql/tables.sql:/docker-entrypoint-initdb.d/db.sql + - datapipe-db-data:/var/lib/postgresql/data + healthcheck: + test: + [ + "CMD-SHELL", + "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}", + ] + interval: 5s + timeout: 5s + retries: 5 + + # datapipe-db-backup: + # image: eeshugerman/postgres-backup-s3:16 + # environment: + # SCHEDULE: ${SCHEDULE-@every 1h} + # BACKUP_KEEP_DAYS: ${BACKUP_KEEP_DAYS-30} + # S3_REGION: us-east-1 + # S3_ENDPOINT: ${S3_ENDPOINT} + # S3_ACCESS_KEY_ID: ${S3_ACCESS_KEY_ID} + # S3_SECRET_ACCESS_KEY: ${S3_SECRET_ACCESS_KEY} + # S3_BUCKET: ${S3_BUCKET} + # S3_PREFIX: ${S3_PREFIX-datapipe_db_backups} + # POSTGRES_HOST: datapipe-db + # POSTGRES_DATABASE: ${POSTGRES_DB} + # POSTGRES_USER: ${POSTGRES_USER} + # POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + # networks: + # - db-network + # depends_on: + # datapipe-db: + # condition: service_healthy + + # datapipe-grafana: + # image: grafana/grafana:7.5.4 + # container_name: datapipe-grafana + # restart: unless-stopped + # environment: + # - GF_INSTALL_PLUGINS=grafana-piechart-panel,grafana-worldmap-panel + # - GF_LOG_MODE=console + # - GF_LOG_LEVEL=info + # expose: + # - "3000" + # volumes: + # - grafana-data:/var/lib/grafana + + # text2lang: + # image: projecteaina/text2lang:latest + # container_name: datapipe-text2lang + # restart: unless-stopped + # expose: + # - "8000" + + # preprocessor: + # image: projecteaina/datapipe:latest + # container_name: datapipe-preprocessor + # restart: unless-stopped + # volumes: + # - datapipe-data:/datapipe + # command: "python -m preprocessor" + # environment: + # - PYTHONUNBUFFERED=1 + # - PG_HOST=datapipe-db + # - PG_PASSWORD=${POSTGRES_PASSWORD} + # - API_URL=http://text2lang:80/text2lang + # networks: + # - db-network + # depends_on: + # datapipe-db: + # condition: service_healthy + + fetcher: + image: projecteaina/datapipe:latest + restart: unless-stopped + volumes: + - datapipe-data:/datapipe + command: "fetcher" + deploy: + mode: replicated + replicas: 2 + environment: + - PYTHONUNBUFFERED=1 + - PG_HOST=datapipe-db + - PG_PASSWORD=${POSTGRES_PASSWORD} + - YT_AUDIO_DOWNLOAD_PATH=/datapipe/audio/youtube + - YT_CAPTION_DOWNLOAD_PATH=/datapipe/caption/youtube + - CCMA_AUDIO_DOWNLOAD_PATH=/datapipe/audio/ccma + - CCMA_VIDEO_DOWNLOAD_PATH=/datapipe/tmp/video/ccma + - CCMA_CAPTION_DOWNLOAD_PATH=/datapipe/caption/ccma + networks: + - db-network + depends_on: + datapipe-db: + condition: service_healthy + + # converter: + # image: projecteaina/datapipe:latest + # container_name: datapipe-converter + # restart: unless-stopped + # volumes: + # - datapipe-data:/datapipe + # command: "python -m converter" + # environment: + # - PYTHONUNBUFFERED=1 + # - PG_HOST=datapipe-db + # - AUDIO_16_PATH=/datapipe/audio16 + # - CCMA_AUDIO_DOWNLOAD_PATH=/datapipe/audio/ccma + # - CCMA_VIDEO_DOWNLOAD_PATH=/datapipe/tmp/video/ccma + # - PG_PASSWORD=${POSTGRES_PASSWORD} + # networks: + # - db-network + # depends_on: + # datapipe-db: + # condition: service_healthy + + # vad-transcriber: + # image: projecteaina/datapipe:latest + # container_name: datapipe-vad-transcriber + # restart: unless-stopped + # volumes: + # - datapipe-data:/datapipe + # command: "python -m vosk_vad_transcriber" + # environment: + # - PYTHONUNBUFFERED=1 + # - PG_HOST=datapipe-db + # - PG_PASSWORD=${POSTGRES_PASSWORD} + # - VOSK_SERVER_HOST=ona-vosk + # networks: + # - db-network + # depends_on: + # datapipe-db: + # condition: service_healthy + + # gender: + # image: projecteaina/datapipe:latest + # container_name: datapipe-gender + # restart: unless-stopped + # volumes: + # - datapipe-data:/datapipe + # command: "python -m gender" + # environment: + # - PYTHONUNBUFFERED=1 + # - PG_HOST=datapipe-db + # - PG_PASSWORD=${POSTGRES_PASSWORD} + # - CLIPS_PATH= /datapipe/clips + # networks: + # - db-network + # depends_on: + # datapipe-db: + # condition: service_healthy + + # splitter: + # image: projecteaina/datapipe:latest + # container_name: datapipe-splitter + # restart: unless-stopped + # volumes: + # - datapipe-data:/datapipe + # command: "python -m splitter" + # environment: + # - PYTHONUNBUFFERED=1 + # - PG_HOST=datapipe-db + # - PG_PASSWORD=${POSTGRES_PASSWORD} + # - CLIPS_PATH= /datapipe/clips + # networks: + # - db-network + # depends_on: + # datapipe-db: + # condition: service_healthy + + # ona-vosk: + # image: assistent/kaldi-catala:0.0.4 + # container_name: ona-vosk + # restart: unless-stopped + # environment: + # - VOSK_SAMPLE_RATE=16000 + # expose: + # - "5001" + + # wav2vec2-catala: + # image: ghcr.io/ccoreilly/wav2vec2-catala-onnx:0.1.1 + # container_name: wav2vec2-catala + # restart: unless-stopped + # expose: + # - "8000" + + # wav2vec2-transcriber: + # image: projecteaina/datapipe:latest + # container_name: datapipe-wav2vec2-transcriber + # restart: unless-stopped + # volumes: + # - datapipe-data:/datapipe + # command: "python -m wav2vec2_transcriber" + # environment: + # - PYTHONUNBUFFERED=1 + # - PG_HOST=datapipe-db + # - PG_PASSWORD=${POSTGRES_PASSWORD} + # - API_URL=http://wav2vec2-catala/recognize + # networks: + # - db-network + # depends_on: + # datapipe-db: + # condition: service_healthy + + +volumes: + datapipe-data: + datapipe-db-data: + grafana-data: + +networks: + db-network: + # driver_opts: + # com.docker.network.driver.mtu: ${NETWORK_MTU} diff --git a/k8s/converter/deployment.yaml b/k8s/converter/deployment.yaml index 5eb10ad..40893a3 100644 --- a/k8s/converter/deployment.yaml +++ b/k8s/converter/deployment.yaml @@ -26,7 +26,7 @@ spec: claimName: datapipe-storage-local containers: - name: converter - image: paulandrei/datapipe:0.8.0 + image: projecteaina/datapipe:latest imagePullPolicy: IfNotPresent command: [ "python", "-m", "converter" ] volumeMounts: diff --git a/k8s/debug.yaml b/k8s/debug.yaml index 1aaffd1..17c5f5e 100644 --- a/k8s/debug.yaml +++ b/k8s/debug.yaml @@ -26,7 +26,7 @@ spec: claimName: datapipe-storage containers: - name: debug - image: paulandrei/datapipe:0.8.0 + image: projecteaina/datapipe:latest imagePullPolicy: IfNotPresent command: [ "sleep" ] args: ["infinity"] diff --git a/k8s/fetcher/deployment.yaml b/k8s/fetcher/deployment.yaml index b0aef99..aae9816 100644 --- a/k8s/fetcher/deployment.yaml +++ b/k8s/fetcher/deployment.yaml @@ -26,7 +26,7 @@ spec: claimName: datapipe-storage-local containers: - name: fetcher - image: paulandrei/datapipe:0.8.0 + image: projecteaina/datapipe:latest imagePullPolicy: IfNotPresent command: [ "python", "-m", "fetcher" ] volumeMounts: diff --git a/k8s/gender/deployment.yaml b/k8s/gender/deployment.yaml index a39a741..bf07f0f 100644 --- a/k8s/gender/deployment.yaml +++ b/k8s/gender/deployment.yaml @@ -26,7 +26,7 @@ spec: claimName: datapipe-storage-local containers: - name: gender - image: paulandrei/datapipe:0.8.0 + image: projecteaina/datapipe:latest imagePullPolicy: IfNotPresent command: [ "python", "-m", "gender" ] volumeMounts: diff --git a/k8s/postgresql/sql/tables.sql b/k8s/postgresql/sql/tables.sql index 1f82695..88ce2c8 100644 --- a/k8s/postgresql/sql/tables.sql +++ b/k8s/postgresql/sql/tables.sql @@ -1,162 +1,164 @@ --- DROP TYPE public."clip_status"; - -CREATE TYPE public."clip_status" AS ENUM ( - 'new', - 'split', - 'validated', - 'splitting'); - --- DROP TYPE public."gender_type"; - -CREATE TYPE public."gender_type" AS ENUM ( - 'male', - 'female', - 'other', - 'unknown'); - --- DROP TYPE public."source_status"; - -CREATE TYPE public."source_status" AS ENUM ( - 'new', - 'downloading', - 'downloaded', - 'audio_extracting', - 'audio_extracted', - 'audio_converting', - 'audio_converted', - 'error', - 'checking_language', - 'bad_language', - 'ready_for_download', - 'vad_running', - 'vad_done', - 'splitting', - 'splitting_done', - 'age_restricted'); - --- DROP TYPE public."source_type"; - -CREATE TYPE public."source_type" AS ENUM ( - 'youtube'); - --- DROP TYPE public."variant_type"; - -CREATE TYPE public."variant_type" AS ENUM ( - 'balear', - 'central', - 'nord-occidental', - 'septentrional', - 'valencià', - 'alguerès', - 'unknown'); - --- public.sources definition - --- Drop table - --- DROP TABLE public.sources; - -CREATE TABLE public.sources ( - source_id uuid NOT NULL DEFAULT gen_random_uuid(), - url text NULL, - "type" public."source_type" NULL, - videopath text NULL, - audiopath text NULL, - audiopath_16 text NULL, - metadata jsonb NULL, - status public."source_status" NOT NULL DEFAULT 'new'::source_status, - duration float4 NULL, - sr int4 NULL, - license varchar NULL, - status_update timestamp NULL, - has_captions bool NULL DEFAULT false, - CONSTRAINT sources_pkey PRIMARY KEY (source_id) -); -CREATE INDEX idx_sources_url ON public.sources USING btree (url); - - --- public.clips definition - --- Drop table - --- DROP TABLE public.clips; - -CREATE TABLE public.clips ( - clip_id uuid NOT NULL DEFAULT gen_random_uuid(), - source_id uuid NOT NULL, - filepath text NULL, - transcript_id uuid NULL, - "start" float4 NULL, - "end" float4 NULL, - "language" varchar NULL, - duration float4 NULL, - status public."clip_status" NOT NULL DEFAULT 'new'::clip_status, - status_update timestamp NULL, - CONSTRAINT clips_pkey PRIMARY KEY (clip_id) -); - - --- public.genders definition - --- Drop table - --- DROP TABLE public.genders; - -CREATE TABLE public.genders ( - gender_id uuid NOT NULL DEFAULT gen_random_uuid(), - gender public."gender_type" NOT NULL DEFAULT 'unknown'::gender_type, - origin text NULL, - clip_id uuid NULL, - CONSTRAINT genders_pkey PRIMARY KEY (gender_id) -); - - --- public.transcripts definition - --- Drop table - --- DROP TABLE public.transcripts; - -CREATE TABLE public.transcripts ( - transcript_id uuid NOT NULL DEFAULT gen_random_uuid(), - "text" text NOT NULL, - transcriber text NOT NULL, - clip_id uuid NOT NULL, - CONSTRAINT transcripts_pkey PRIMARY KEY (transcript_id) -); - - --- public.variants definition - --- Drop table - --- DROP TABLE public.variants; - -CREATE TABLE public.variants ( - variant_id uuid NOT NULL DEFAULT gen_random_uuid(), - variant public."variant_type" NOT NULL DEFAULT 'unknown'::variant_type, - origin text NULL, - clip_id uuid NULL, - CONSTRAINT variants_pkey PRIMARY KEY (variant_id) -); - - --- public.clips foreign keys - -ALTER TABLE public.clips ADD CONSTRAINT clips_source_id_fkey FOREIGN KEY (source_id) REFERENCES public.sources(source_id); -ALTER TABLE public.clips ADD CONSTRAINT clips_transcript_id_fkey FOREIGN KEY (transcript_id) REFERENCES public.transcripts(transcript_id); - - --- public.genders foreign keys - -ALTER TABLE public.genders ADD CONSTRAINT genders_clip_id_fkey FOREIGN KEY (clip_id) REFERENCES public.clips(clip_id); - - --- public.transcripts foreign keys - -ALTER TABLE public.transcripts ADD CONSTRAINT transcripts_clip_id_fkey FOREIGN KEY (clip_id) REFERENCES public.clips(clip_id); - - --- public.variants foreign keys - -ALTER TABLE public.variants ADD CONSTRAINT variants_clip_id_fkey FOREIGN KEY (clip_id) REFERENCES public.clips(clip_id); \ No newline at end of file + -- DROP TYPE public."clip_status"; + + CREATE TYPE public."clip_status" AS ENUM ( + 'new', + 'split', + 'validated', + 'splitting'); + + -- DROP TYPE public."gender_type"; + + CREATE TYPE public."gender_type" AS ENUM ( + 'male', + 'female', + 'other', + 'unknown'); + + -- DROP TYPE public."source_status"; + + CREATE TYPE public."source_status" AS ENUM ( + 'new', + 'downloading', + 'downloaded', + 'audio_extracting', + 'audio_extracted', + 'audio_converting', + 'audio_converted', + 'error', + 'checking_language', + 'bad_language', + 'bad_license', + 'ready_for_download', + 'vad_running', + 'vad_done', + 'splitting', + 'splitting_done', + 'age_restricted'); + + -- DROP TYPE public."source_type"; + + CREATE TYPE public."source_type" AS ENUM ( + 'youtube', 'ccma'); + + -- DROP TYPE public."variant_type"; + + CREATE TYPE public."variant_type" AS ENUM ( + 'balear', + 'central', + 'nord-occidental', + 'septentrional', + 'valencià', + 'alguerès', + 'unknown'); + + -- public.sources definition + + -- Drop table + + -- DROP TABLE public.sources; + + CREATE TABLE public.sources ( + source_id uuid NOT NULL DEFAULT gen_random_uuid(), + url text NULL, + "type" public."source_type" NULL, + videopath text NULL, + audiopath text NULL, + audiopath_16 text NULL, + subtitlepath text NULL, + metadata jsonb NULL, + status public."source_status" NOT NULL DEFAULT 'new'::source_status, + duration float4 NULL, + sr int4 NULL, + license varchar NULL, + status_update timestamp NULL, + has_captions bool NULL DEFAULT false, + CONSTRAINT sources_pkey PRIMARY KEY (source_id) + ); + CREATE INDEX idx_sources_url ON public.sources USING btree (url); + + + -- public.clips definition + + -- Drop table + + -- DROP TABLE public.clips; + + CREATE TABLE public.clips ( + clip_id uuid NOT NULL DEFAULT gen_random_uuid(), + source_id uuid NOT NULL, + filepath text NULL, + transcript_id uuid NULL, + "start" float4 NULL, + "end" float4 NULL, + "language" varchar NULL, + duration float4 NULL, + status public."clip_status" NOT NULL DEFAULT 'new'::clip_status, + status_update timestamp NULL, + CONSTRAINT clips_pkey PRIMARY KEY (clip_id) + ); + + + -- public.genders definition + + -- Drop table + + -- DROP TABLE public.genders; + + CREATE TABLE public.genders ( + gender_id uuid NOT NULL DEFAULT gen_random_uuid(), + gender public."gender_type" NOT NULL DEFAULT 'unknown'::gender_type, + origin text NULL, + clip_id uuid NULL, + CONSTRAINT genders_pkey PRIMARY KEY (gender_id) + ); + + + -- public.transcripts definition + + -- Drop table + + -- DROP TABLE public.transcripts; + + CREATE TABLE public.transcripts ( + transcript_id uuid NOT NULL DEFAULT gen_random_uuid(), + "text" text NOT NULL, + transcriber text NOT NULL, + clip_id uuid NOT NULL, + CONSTRAINT transcripts_pkey PRIMARY KEY (transcript_id) + ); + + + -- public.variants definition + + -- Drop table + + -- DROP TABLE public.variants; + + CREATE TABLE public.variants ( + variant_id uuid NOT NULL DEFAULT gen_random_uuid(), + variant public."variant_type" NOT NULL DEFAULT 'unknown'::variant_type, + origin text NULL, + clip_id uuid NULL, + CONSTRAINT variants_pkey PRIMARY KEY (variant_id) + ); + + + -- public.clips foreign keys + + ALTER TABLE public.clips ADD CONSTRAINT clips_source_id_fkey FOREIGN KEY (source_id) REFERENCES public.sources(source_id); + ALTER TABLE public.clips ADD CONSTRAINT clips_transcript_id_fkey FOREIGN KEY (transcript_id) REFERENCES public.transcripts(transcript_id); + + + -- public.genders foreign keys + + ALTER TABLE public.genders ADD CONSTRAINT genders_clip_id_fkey FOREIGN KEY (clip_id) REFERENCES public.clips(clip_id); + + + -- public.transcripts foreign keys + + ALTER TABLE public.transcripts ADD CONSTRAINT transcripts_clip_id_fkey FOREIGN KEY (clip_id) REFERENCES public.clips(clip_id); + + + -- public.variants foreign keys + + ALTER TABLE public.variants ADD CONSTRAINT variants_clip_id_fkey FOREIGN KEY (clip_id) REFERENCES public.clips(clip_id); \ No newline at end of file diff --git a/k8s/preprocessor/deployment.yaml b/k8s/preprocessor/deployment.yaml index 15f6b61..43caef5 100644 --- a/k8s/preprocessor/deployment.yaml +++ b/k8s/preprocessor/deployment.yaml @@ -22,7 +22,7 @@ spec: spec: containers: - name: preprocessor - image: paulandrei/datapipe:0.8.0 + image: projecteaina/datapipe:latest imagePullPolicy: IfNotPresent command: [ "python", "-m", "preprocessor" ] env: diff --git a/k8s/splitter/deployment.yaml b/k8s/splitter/deployment.yaml index 3a8a977..4ffb4b5 100644 --- a/k8s/splitter/deployment.yaml +++ b/k8s/splitter/deployment.yaml @@ -26,7 +26,7 @@ spec: claimName: datapipe-storage-local containers: - name: splitter - image: paulandrei/datapipe:0.8.0 + image: projecteaina/datapipe:latest imagePullPolicy: IfNotPresent command: [ "python", "-m", "splitter" ] volumeMounts: diff --git a/k8s/text2lang/deployment.yaml b/k8s/text2lang/deployment.yaml index 4feaf77..44a52cb 100644 --- a/k8s/text2lang/deployment.yaml +++ b/k8s/text2lang/deployment.yaml @@ -20,7 +20,7 @@ spec: spec: containers: - name: text2lang - image: paulandrei/text2lang:0.1.2 + image: projecteaina/text2lang:08f6f8bb4c9d4680db967c59cf608c8318733969 imagePullPolicy: IfNotPresent livenessProbe: httpGet: diff --git a/k8s/vosk_vad_transcriber/deployment.yaml b/k8s/vosk_vad_transcriber/deployment.yaml index 9420c6f..359002f 100644 --- a/k8s/vosk_vad_transcriber/deployment.yaml +++ b/k8s/vosk_vad_transcriber/deployment.yaml @@ -26,7 +26,7 @@ spec: claimName: datapipe-storage-local containers: - name: vad-transcriber - image: paulandrei/datapipe:0.8.0 + image: projecteaina/datapipe:latest imagePullPolicy: IfNotPresent command: [ "python", "-m", "vosk_vad_transcriber" ] volumeMounts: diff --git a/k8s/wav2vec2_transcriber/deployment.yaml b/k8s/wav2vec2_transcriber/deployment.yaml index ae89338..5395ea7 100644 --- a/k8s/wav2vec2_transcriber/deployment.yaml +++ b/k8s/wav2vec2_transcriber/deployment.yaml @@ -26,7 +26,7 @@ spec: claimName: datapipe-storage-local containers: - name: wav2vec2-transcriber - image: paulandrei/datapipe:0.8.0 + image: projecteaina/datapipe:latest imagePullPolicy: IfNotPresent command: [ "python", "-m", "wav2vec2_transcriber" ] volumeMounts: diff --git a/okteto.yml b/okteto.yml index 3c39d8e..218e3de 100644 --- a/okteto.yml +++ b/okteto.yml @@ -13,7 +13,7 @@ build: datapipe: context: . dockerfile: Dockerfile - image: paulandrei/datapipe:0.8.0 + image: projecteaina/datapipe:latest # The deploy section defines how to deploy your development environment # More info: https://www.okteto.com/docs/reference/manifest/#deploy diff --git a/requirements.txt b/requirements.txt index 8611760..1244275 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,7 +68,7 @@ prettytable==3.0.0 primePy==1.3 protobuf==3.19.1 psycopg2-binary==2.9.2 -pyannote.audio @ https://github.com/pyannote/pyannote-audio/archive/develop.zip +pyannote.audio==2.1.1 pyannote.core==4.4 pyannote.database==4.1.1 pyannote.metrics==3.2 From f89abb226ad3e143548c1c44838bea5123936205 Mon Sep 17 00:00:00 2001 From: PaulNdrei Date: Mon, 6 May 2024 17:27:25 +0200 Subject: [PATCH 2/2] Add docker-compose --- docker-compose.yml | 144 ++++++++++++++++++++++----------------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 1625915..9ae64a0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,7 +26,7 @@ services: retries: 5 # datapipe-db-backup: - # image: eeshugerman/postgres-backup-s3:16 + # image: eeshugerman/postgres-backup-s3:13 # environment: # SCHEDULE: ${SCHEDULE-@every 1h} # BACKUP_KEEP_DAYS: ${BACKUP_KEEP_DAYS-30} @@ -46,43 +46,43 @@ services: # datapipe-db: # condition: service_healthy - # datapipe-grafana: - # image: grafana/grafana:7.5.4 - # container_name: datapipe-grafana - # restart: unless-stopped - # environment: - # - GF_INSTALL_PLUGINS=grafana-piechart-panel,grafana-worldmap-panel - # - GF_LOG_MODE=console - # - GF_LOG_LEVEL=info - # expose: - # - "3000" - # volumes: - # - grafana-data:/var/lib/grafana + datapipe-grafana: + image: grafana/grafana:7.5.4 + container_name: datapipe-grafana + restart: unless-stopped + environment: + - GF_INSTALL_PLUGINS=grafana-piechart-panel,grafana-worldmap-panel + - GF_LOG_MODE=console + - GF_LOG_LEVEL=info + expose: + - "3000" + volumes: + - grafana-data:/var/lib/grafana - # text2lang: - # image: projecteaina/text2lang:latest - # container_name: datapipe-text2lang - # restart: unless-stopped - # expose: - # - "8000" + text2lang: + image: projecteaina/text2lang:latest + container_name: datapipe-text2lang + restart: unless-stopped + expose: + - "8000" - # preprocessor: - # image: projecteaina/datapipe:latest - # container_name: datapipe-preprocessor - # restart: unless-stopped - # volumes: - # - datapipe-data:/datapipe - # command: "python -m preprocessor" - # environment: - # - PYTHONUNBUFFERED=1 - # - PG_HOST=datapipe-db - # - PG_PASSWORD=${POSTGRES_PASSWORD} - # - API_URL=http://text2lang:80/text2lang - # networks: - # - db-network - # depends_on: - # datapipe-db: - # condition: service_healthy + preprocessor: + image: projecteaina/datapipe:latest + container_name: datapipe-preprocessor + restart: unless-stopped + volumes: + - datapipe-data:/datapipe + command: "python -m preprocessor" + environment: + - PYTHONUNBUFFERED=1 + - PG_HOST=datapipe-db + - PG_PASSWORD=${POSTGRES_PASSWORD} + - API_URL=http://text2lang:80/text2lang + networks: + - db-network + depends_on: + datapipe-db: + condition: service_healthy fetcher: image: projecteaina/datapipe:latest @@ -108,25 +108,25 @@ services: datapipe-db: condition: service_healthy - # converter: - # image: projecteaina/datapipe:latest - # container_name: datapipe-converter - # restart: unless-stopped - # volumes: - # - datapipe-data:/datapipe - # command: "python -m converter" - # environment: - # - PYTHONUNBUFFERED=1 - # - PG_HOST=datapipe-db - # - AUDIO_16_PATH=/datapipe/audio16 - # - CCMA_AUDIO_DOWNLOAD_PATH=/datapipe/audio/ccma - # - CCMA_VIDEO_DOWNLOAD_PATH=/datapipe/tmp/video/ccma - # - PG_PASSWORD=${POSTGRES_PASSWORD} - # networks: - # - db-network - # depends_on: - # datapipe-db: - # condition: service_healthy + converter: + image: projecteaina/datapipe:latest + container_name: datapipe-converter + restart: unless-stopped + volumes: + - datapipe-data:/datapipe + command: "python -m converter" + environment: + - PYTHONUNBUFFERED=1 + - PG_HOST=datapipe-db + - AUDIO_16_PATH=/datapipe/audio16 + - CCMA_AUDIO_DOWNLOAD_PATH=/datapipe/audio/ccma + - CCMA_VIDEO_DOWNLOAD_PATH=/datapipe/tmp/video/ccma + - PG_PASSWORD=${POSTGRES_PASSWORD} + networks: + - db-network + depends_on: + datapipe-db: + condition: service_healthy # vad-transcriber: # image: projecteaina/datapipe:latest @@ -146,23 +146,23 @@ services: # datapipe-db: # condition: service_healthy - # gender: - # image: projecteaina/datapipe:latest - # container_name: datapipe-gender - # restart: unless-stopped - # volumes: - # - datapipe-data:/datapipe - # command: "python -m gender" - # environment: - # - PYTHONUNBUFFERED=1 - # - PG_HOST=datapipe-db - # - PG_PASSWORD=${POSTGRES_PASSWORD} - # - CLIPS_PATH= /datapipe/clips - # networks: - # - db-network - # depends_on: - # datapipe-db: - # condition: service_healthy + gender: + image: projecteaina/datapipe:latest + container_name: datapipe-gender + restart: unless-stopped + volumes: + - datapipe-data:/datapipe + command: "python -m gender" + environment: + - PYTHONUNBUFFERED=1 + - PG_HOST=datapipe-db + - PG_PASSWORD=${POSTGRES_PASSWORD} + - CLIPS_PATH= /datapipe/clips + networks: + - db-network + depends_on: + datapipe-db: + condition: service_healthy # splitter: # image: projecteaina/datapipe:latest