From 511e3182592657f5ce51e8946b92bbea0a31133d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Bournhonesque?= Date: Thu, 3 Oct 2024 13:31:54 +0200 Subject: [PATCH 1/3] chore: upload data dump to AWS S3 --- scripts/gen_feeds_daily_off.sh | 10 ++++++++++ scripts/mongodb_dump.sh | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/scripts/gen_feeds_daily_off.sh b/scripts/gen_feeds_daily_off.sh index b2401a4af01e9..a55b4c0956e72 100755 --- a/scripts/gen_feeds_daily_off.sh +++ b/scripts/gen_feeds_daily_off.sh @@ -26,6 +26,16 @@ cd /srv/off/scripts ./mongodb_dump.sh /srv/off/html openfoodfacts 10.1.0.102 off +# Copy CSV and RDF files to AWS S3 using MinIO client +mc cp \ + en.openfoodfacts.org.products.csv \ + en.openfoodfacts.org.products.csv.gz \ + en.openfoodfacts.org.products.rdf \ + fr.openfoodfacts.org.products.csv \ + fr.openfoodfacts.org.products.csv.gz \ + fr.openfoodfacts.org.products.rdf \ + s3/openfoodfacts-ds + # Small products data and images export for Docker dev environments # for about 1/10000th of the products contained in production. ./export_products_data_and_images.pl --sample-mod 10000,0 \ diff --git a/scripts/mongodb_dump.sh b/scripts/mongodb_dump.sh index fe206172f08b2..dc7f43afeadf2 100755 --- a/scripts/mongodb_dump.sh +++ b/scripts/mongodb_dump.sh @@ -60,4 +60,11 @@ popd > /dev/null # data/delta mongoexport --collection recent_changes --host $HOST --db $DB --fields=_id,comment,code,userid,rev,countries_tags,t,diffs | gzip -9 > "new.${PREFIX}_recent_changes.jsonl.gz" && \ mv new.${PREFIX}_recent_changes.jsonl.gz ${PREFIX}_recent_changes.jsonl.gz +# Copy files to AWS S3 using MinIO client +mc cp \ + ${PREFIX}-products.jsonl.gz \ + ${PREFIX}_recent_changes.jsonl.gz \ + ${PREFIX}-mongodbdump.gz \ + s3/openfoodfacts-ds + popd > /dev/null # data From 4cbeda41004815e097868743dc48862077ccb2c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Bournhonesque?= Date: Thu, 3 Oct 2024 14:03:33 +0200 Subject: [PATCH 2/3] chore: redirect (HTTP 302) some dump files to AWS S3 --- conf/nginx/sites-available/off | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/conf/nginx/sites-available/off b/conf/nginx/sites-available/off index dcbfebeef7666..0abce759dc10f 100644 --- a/conf/nginx/sites-available/off +++ b/conf/nginx/sites-available/off @@ -53,6 +53,29 @@ server { gunzip on; } + # Add an HTTP 302 redirect to AWS S3 bucket for specific dump files + location = /data/openfoodfacts_recent_changes.jsonl.gz { + return 302 https://openfoodfacts-ds.s3.eu-west-3.amazonaws.com/openfoodfacts_recent_changes.jsonl.gz; + } + location = /data/openfoodfacts-mongodbdump.gz { + return 302 https://openfoodfacts-ds.s3.eu-west-3.amazonaws.com/openfoodfacts-mongodbdump.gz; + } + location = /data/openfoodfacts-products.jsonl.gz { + return 302 https://openfoodfacts-ds.s3.eu-west-3.amazonaws.com/openfoodfacts-products.jsonl.gz; + } + location = /data/en.openfoodfacts.org.products.csv { + return 302 https://openfoodfacts-ds.s3.eu-west-3.amazonaws.com/en.openfoodfacts.org.products.csv; + } + location = /data/en.openfoodfacts.org.products.csv.gz { + return 302 https://openfoodfacts-ds.s3.eu-west-3.amazonaws.com/en.openfoodfacts.org.products.csv.gz; + } + location = /data/fr.openfoodfacts.org.products.csv { + return 302 https://openfoodfacts-ds.s3.eu-west-3.amazonaws.com/fr.openfoodfacts.org.products.csv; + } + location = /data/fr.openfoodfacts.org.products.csv.gz { + return 302 https://openfoodfacts-ds.s3.eu-west-3.amazonaws.com/fr.openfoodfacts.org.products.csv.gz; + } + if ($http_referer ~* (jobothoniel.com) ) { return 403; } # blocked since 2021-07-13 # the app requests /1.json to get the product count... From 4ff2a7e58f5074c8745f6067ecf75e0c3e584a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Bournhonesque?= Date: Thu, 3 Oct 2024 14:17:10 +0200 Subject: [PATCH 3/3] fix: upload CSV and RDF right after creation --- scripts/gen_feeds_daily_off.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/gen_feeds_daily_off.sh b/scripts/gen_feeds_daily_off.sh index a55b4c0956e72..aabd82ba49911 100755 --- a/scripts/gen_feeds_daily_off.sh +++ b/scripts/gen_feeds_daily_off.sh @@ -21,11 +21,6 @@ for export in en.openfoodfacts.org.products.csv fr.openfoodfacts.org.products.cs mv -f new.$export.gz $export.gz done -# Generate the MongoDB dumps and jsonl export -cd /srv/off/scripts - -./mongodb_dump.sh /srv/off/html openfoodfacts 10.1.0.102 off - # Copy CSV and RDF files to AWS S3 using MinIO client mc cp \ en.openfoodfacts.org.products.csv \ @@ -36,6 +31,11 @@ mc cp \ fr.openfoodfacts.org.products.rdf \ s3/openfoodfacts-ds +# Generate the MongoDB dumps and jsonl export +cd /srv/off/scripts + +./mongodb_dump.sh /srv/off/html openfoodfacts 10.1.0.102 off + # Small products data and images export for Docker dev environments # for about 1/10000th of the products contained in production. ./export_products_data_and_images.pl --sample-mod 10000,0 \