Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Freeze to master #337

Merged
merged 20 commits into from
Jul 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
82edce7
disable xpack security with 8.1 upgrade
rhanka May 2, 2022
c9fb8a8
Merge pull request #312 from matchID-project/feat/upgrade-elastic-8
rhanka May 2, 2022
74ec7eb
add repository s3 for snapshots in elasticsearch
rhanka Dec 11, 2023
feb4c0d
Merge pull request #317 from matchID-project/feat/es-respository
rhanka Dec 11, 2023
8b1746d
⬆️ 🔒️ update Elasticsearch to 7.16.3 CVE-2021-22144
rhanka Jan 18, 2022
e99e490
disable xpack security with 8.1 upgrade
rhanka May 2, 2022
c61f49c
add repository s3 for snapshots in elasticsearch
rhanka Dec 11, 2023
84fcd1a
add elasticsearch repository snapshot method on s3
rhanka Dec 17, 2023
946412d
Merge pull request #318 from matchID-project/feat/es-respository
rhanka Dec 17, 2023
a55988c
fix repository plugin config
rhanka Dec 17, 2023
f0c2373
freeze index before backup
rhanka Jan 6, 2024
6cfa833
speed up elasticsearch init
rhanka Jan 6, 2024
bc84e4a
extend backend backup timeout
rhanka Apr 16, 2024
aa2cff4
Merge pull request #324 from matchID-project/fix/backend-timeout
rhanka Apr 16, 2024
9e93841
upgrade to flask_restx, werkzeug 3
rhanka Jul 18, 2024
f6fff7b
remove useless imports
rhanka Jul 18, 2024
230a552
remove obsolete methode prefix for pandas.read_csv
rhanka Jul 21, 2024
a42bf0e
Merge pull request #335 from matchID-project/fix/upgrade-flask
rhanka Jul 21, 2024
a012f37
fix: requirements.txt to reduce vulnerabilities
snyk-bot Jul 21, 2024
6099586
Merge pull request #336 from matchID-project/snyk-fix-a774df6c2207f34…
rhanka Jul 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 124 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export API_TEST_JSON_PATH=swagger
export PORT=8081
export BACKEND_PORT=8081
export TIMEOUT=30
export BACKUP_TIMEOUT=1800
# auth method - do not use auth by default (auth can be both passwords and OAuth)
export NO_AUTH=True
export TWITTER_OAUTH_ID=None
Expand Down Expand Up @@ -78,21 +79,25 @@ export MATCHID_DATA_BUCKET=$(shell echo ${APP_GROUP} | tr '[:upper:]' '[:lower:]
export MATCHID_CONFIG_BUCKET=$(shell echo ${APP_GROUP} | tr '[:upper:]' '[:lower:]')

# elasticsearch defaut configuration
export ES_INDEX=${APP_GROUP}
export ES_NODES = 1 # elasticsearch number of nodes
export ES_SWARM_NODE_NUMBER = 2 # elasticsearch number of nodes
export ES_MEM = 1024m # elasticsearch : memory of each node
export ES_VERSION = 7.16.3
export ES_MEM := 1024m # elasticsearch : memory of each node
export ES_JAVA_OPTS := -Xms${ES_MEM} -Xmx${ES_MEM} # elasticsearch : java options
export ES_VERSION = 8.6.1
export ES_DATA = ${BACKEND}/esdata
export ES_THREADS = 2
export ES_MAX_TRIES = 3
export ES_CHUNK = 500
export ES_BACKUP_FILE := $(shell echo esdata_`date +"%Y%m%d"`.tar)
export ES_BACKUP_FILE_SNAR = esdata.snar
export ES_BACKUP_NAME := $(shell echo esdata_`date +"%Y%m%d"`)
export ES_BACKUP_FILE := ${ES_BACKUP_NAME}.tar
export ES_BACKUP_FILE_SNAR = ${ES_BACKUP_NAME}.snar

export DB_SERVICES=elasticsearch postgres

export SERVICES=${DB_SERVICES} backend frontend

-include ${APP_PATH}/${GIT_TOOLS}/artifacts.SCW
dummy := $(shell touch artifacts)
include ./artifacts

Expand Down Expand Up @@ -189,6 +194,106 @@ endif
elasticsearch2-stop:
@${DC} -f ${DC_FILE}-elasticsearch-huge-remote.yml down

elasticsearch-repository-plugin: elasticsearch-start
@if [ ! -f "elasticsearch-repository-plugin" ]; then\
echo installing elasticsearch repository plugin;\
docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch sh -c \
"echo ${STORAGE_ACCESS_KEY} | bin/elasticsearch-keystore add --stdin --force s3.client.default.access_key";\
docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch sh -c \
"echo ${STORAGE_SECRET_KEY} | bin/elasticsearch-keystore add --stdin --force s3.client.default.secret_key";\
docker restart ${DC_PREFIX}-elasticsearch;\
timeout=${TIMEOUT} ; ret=1 ; until [ "$$timeout" -le 0 -o "$$ret" -eq "0" ] ; do (docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch curl -s --fail -XGET localhost:9200/ > /dev/null) ; ret=$$? ; if [ "$$ret" -ne "0" ] ; then echo -en "\rwaiting for elasticsearch API to start $$timeout" ; fi ; ((timeout--)); sleep 1 ; done ;\
echo; touch elasticsearch-repository-plugin ; exit $$ret;\
fi;

elasticsearch-repository-config: elasticsearch-repository-plugin
@if [ ! -f "elasticsearch-repository-config" ]; then\
echo creating elasticsearch repository ${APP_GROUP} in s3 bucket ${REPOSITORY_BUCKET} && \
docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch \
curl -s -XPUT "localhost:9200/_snapshot/${APP_GROUP}" -H 'Content-Type: application/json' \
-d '{"type": "s3","settings": {"bucket": "${REPOSITORY_BUCKET}","client": "default","region": "${SCW_REGION}","endpoint": "${SCW_ENDPOINT}","path_style_access": true,"protocol": "https"}}' \
| grep -q '"acknowledged":true' && touch elasticsearch-repository-config;\
fi

elasticsearch-freeze:
@\
docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch \
curl -s -XPUT "localhost:9200/${ES_INDEX}/_settings" -H 'Content-Type: application/json' \
-d '{"index":{"blocks.write": true}}' | grep -q '"acknowledged":true' \
&& echo "index ${ES_INDEX} frozen" \
|| echo "index ${ES_INDEX} freeze failed"

elasticsearch-repository-backup: elasticsearch-repository-config elasticsearch-freeze
@\
docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch \
curl -s -XPUT "localhost:9200/_snapshot/${APP_GROUP}/${ES_BACKUP_NAME}" -H 'Content-Type: application/json'\
-d '{"indices": "${ES_INDEX}", "ignore_unavailable": true, "include_global_state": false}' \
| grep -q '{"accepted":true}';\
if [ "$$?" -ne "0" ]; then\
echo "snapshot ${ES_BACKUP_NAME} creation failed";\
exit 1;\
fi;\
echo -n creating snapshot ${ES_BACKUP_NAME} in elasticsearch repository;\
timeout=${BACKUP_TIMEOUT} ; ret=1 ; dot_count=0 ;\
until [ "$$timeout" -le 0 -o "$$ret" -eq "0" ] ; do\
docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch \
curl -s -XGET "localhost:9200/_snapshot/${APP_GROUP}/${ES_BACKUP_NAME}"\
| grep -q '"state":"SUCCESS"';\
ret=$$? ; \
if [ "$$ret" -ne "0" ] ; then\
echo -en "." ; \
((dot_count++));\
if [ "$$dot_count" -gt "10" ]; then\
echo -en "\rwaiting for snapshot ${ES_BACKUP_NAME} to complete $$timeout" ;\
dot_count=0;\
fi;\
fi ;\
((timeout--));((timeout--)); sleep 2 ; \
done ; echo ;\
if [ "$$ret" -ne "0" ]; then\
echo "snapshot ${ES_BACKUP_NAME} creation failed";\
exit $$ret;\
fi;\
echo "snapshot ${ES_BACKUP_NAME} created in elasticsearch repository" && touch elasticsearch-repository-backup


elasticsearch-repository-backup-async: elasticsearch-repository-config
@docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch \
curl -s -XPUT "localhost:9200/_snapshot/${APP_GROUP}/${ES_BACKUP_NAME}" -H 'Content-Type: application/json'\
-d '{"indices": "${ES_INDEX}", "ignore_unavailable": true, "include_global_state": false}'

elasticsearch-repository-delete: elasticsearch-repository-config
@(\
docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch \
curl -s -XDELETE "localhost:9200/_snapshot/${APP_GROUP}/${ES_BACKUP_NAME}"\
> /dev/null 2>&1\
) && echo "snapshot ${ES_BACKUP_NAME} deleted from elasticsearch repository"

elasticsearch-repository-list: elasticsearch-repository-config
@docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch \
curl -s -XGET "localhost:9200/_snapshot/${APP_GROUP}/_all"\
| jq -r '.snapshots[].snapshot'

elasticsearch-repository-restore: elasticsearch-repository-config
@echo restoring snapshot ${ES_BACKUP_NAME} from elasticsearch repository;\
(\
docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch \
curl -s -XPOST localhost:9200/_snapshot/${APP_GROUP}/${ES_BACKUP_NAME}/_restore?wait_for_completion=true -H 'Content-Type: application/json'\
-d '{"indices": "${ES_INDEX}","ignore_unavailable": true,"include_global_state": false}' \
> /dev/null 2>&1\
) && echo "snapshot ${ES_BACKUP_NAME} restored from elasticsearch repository" && touch elasticsearch-repository-restore

elasticsearch-repository-check: elasticsearch-repository-config backup-dir
@if [ ! -f "${BACKUP_DIR}/${ES_BACKUP_NAME}.check" ]; then\
(\
docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch \
curl -s -XGET "localhost:9200/_snapshot/${APP_GROUP}/_all" \
| jq -r '.snapshots[].snapshot' | grep -q "${ES_BACKUP_NAME}" \
) > /dev/null 2>&1 \
&& echo "snapshot found for or ${ES_BACKUP_NAME} in elasticsearch repository" && mkdir -p touch "${BACKUP_DIR}/${ES_BACKUP_NAME}.check" \
|| (echo "no snapshot found for ${ES_BACKUP_NAME} in elasticsearch repository")\
fi

elasticsearch-backup: elasticsearch-stop backup-dir
@echo taring ${ES_DATA} to ${BACKUP_DIR}/${ES_BACKUP_FILE}
@cd $$(dirname ${ES_DATA}) && sudo tar --create --file=${BACKUP_DIR}/${ES_BACKUP_FILE} --listed-incremental=${BACKUP_DIR}/${ES_BACKUP_FILE_SNAR} $$(basename ${ES_DATA})
Expand Down Expand Up @@ -227,9 +332,9 @@ endif

elasticsearch-dev: elasticsearch

elasticsearch: network vm_max
elasticsearch-cluster: network vm_max
@echo docker-compose up matchID elasticsearch with ${ES_NODES} nodes
@cat ${DC_FILE}-elasticsearch.yml | sed "s/%M/${ES_MEM}/g" > ${DC_FILE}-elasticsearch-huge.yml
@cat ${DC_FILE}-elasticsearch.yml > ${DC_FILE}-elasticsearch-huge.yml
@(if [ ! -d ${ES_DATA}/node1 ]; then sudo mkdir -p ${ES_DATA}/node1 ; sudo chmod g+rw ${ES_DATA}/node1/.; sudo chown 1000:1000 ${ES_DATA}/node1/.; fi)
@(i=$(ES_NODES); while [ $${i} -gt 1 ]; \
do \
Expand All @@ -241,15 +346,23 @@ elasticsearch: network vm_max
${DC} -f ${DC_FILE}-elasticsearch-huge.yml up -d
@timeout=${TIMEOUT} ; ret=1 ; until [ "$$timeout" -le 0 -o "$$ret" -eq "0" ] ; do (docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch curl -s --fail -XGET localhost:9200/_cat/indices > /dev/null) ; ret=$$? ; if [ "$$ret" -ne "0" ] ; then echo -en "\rwaiting for elasticsearch to start $$timeout" ; fi ; ((timeout--)); sleep 1 ; done ; echo ; exit $$ret

elasticsearch-start: network vm_max
@echo docker-compose up matchID elasticsearch with ${ES_NODES} nodes
@(if [ ! -d ${ES_DATA}/node1 ]; then sudo mkdir -p ${ES_DATA}/node1 ; sudo chmod g+rw ${ES_DATA}/node1/.; sudo chown 1000:1000 ${ES_DATA}/node1/.; fi)
${DC} -f ${DC_FILE}-elasticsearch.yml up -d

elasticsearch: elasticsearch-start
@timeout=${TIMEOUT} ; ret=1 ; until [ "$$timeout" -le 0 -o "$$ret" -eq "0" ] ; do (docker exec -i ${USE_TTY} ${DC_PREFIX}-elasticsearch curl -s --fail -XGET localhost:9200/ > /dev/null) ; ret=$$? ; if [ "$$ret" -ne "0" ] ; then echo -en "\rwaiting for elasticsearch API to start $$timeout" ; fi ; ((timeout--)); sleep 1 ; done ; echo ; exit $$ret

elasticsearch2:
@echo docker-compose up matchID elasticsearch with ${ES_NODES} nodes
@cat ${DC_FILE}-elasticsearch.yml | head -8 > ${DC_FILE}-elasticsearch-huge-remote.yml
@(i=$$(( $(ES_NODES) * $(ES_SWARM_NODE_NUMBER) ));j=$$(( $(ES_NODES) * $(ES_SWARM_NODE_NUMBER) - $(ES_NODES))); while [ $${i} -gt $${j} ]; \
do \
if [ ! -d ${ES_DATA}/node$$i ]; then (echo ${ES_DATA}/node$$i && sudo mkdir -p ${ES_DATA}/node$$i && sudo chmod g+rw ${ES_DATA}/node$$i/. && sudo chown 1000:1000 ${ES_DATA}/node$$i/.); fi; \
cat ${DC_FILE}-elasticsearch-node.yml | sed "s/%N/$$i/g;s/%MM/${ES_MEM}/g;s/%M/${ES_MEM}/g" | egrep -v 'depends_on|- elasticsearch' >> ${DC_FILE}-elasticsearch-huge-remote.yml; \
i=`expr $$i - 1`; \
done;\
do \
if [ ! -d ${ES_DATA}/node$$i ]; then (echo ${ES_DATA}/node$$i && sudo mkdir -p ${ES_DATA}/node$$i && sudo chmod g+rw ${ES_DATA}/node$$i/. && sudo chown 1000:1000 ${ES_DATA}/node$$i/.); fi; \
cat ${DC_FILE}-elasticsearch-node.yml | sed "s/%N/$$i/g;s/%MM/${ES_MEM}/g;s/%M/${ES_MEM}/g" | egrep -v 'depends_on|- elasticsearch' >> ${DC_FILE}-elasticsearch-huge-remote.yml; \
i=`expr $$i - 1`; \
done;\
true)
${DC} -f ${DC_FILE}-elasticsearch-huge-remote.yml up -d

Expand Down
3 changes: 1 addition & 2 deletions code/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import simplejson
from collections.abc import Iterable
from collections import OrderedDict
from pandas.io.json import json_normalize
from collections import deque


Expand All @@ -44,7 +43,7 @@
from flask import Flask, current_app, jsonify, Response, abort, request, g, stream_with_context
from flask.sessions import SecureCookieSessionInterface
from flask_login import LoginManager, login_user, login_required, logout_user, current_user
from flask_restplus import Resource, Api, reqparse
from flask_restx import Resource, Api
from werkzeug.utils import secure_filename
from werkzeug.serving import run_simple
from werkzeug.middleware.dispatcher import DispatcherMiddleware
Expand Down
2 changes: 1 addition & 1 deletion code/parsers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import werkzeug
from flask_restplus import reqparse
from flask_restx import reqparse
from werkzeug.datastructures import FileStorage


Expand Down
3 changes: 1 addition & 2 deletions code/recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import simplejson
from collections import Iterable
from collections import OrderedDict
from pandas.io.json import json_normalize
from collections import deque


Expand Down Expand Up @@ -537,7 +536,7 @@ def iterator_from_files(self):
self.open(file),
sep=self.sep, usecols=self.select, chunksize=self.chunk,
encoding=self.encoding,dtype=object, header=self.header, names=self.names, skiprows=self.skiprows,
prefix=self.prefix, iterator=True, index_col=False, keep_default_na=False
iterator=True, index_col=False, keep_default_na=False
)
elif (self.type == "fwf"):
reader = pd.read_fwf(
Expand Down
3 changes: 2 additions & 1 deletion docker-components/docker-compose-elasticsearch-node.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
environment:
- cluster.name=matchid-cluster
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms%M -Xmx%M"
- ES_JAVA_OPTS
- "discovery.zen.ping.unicast.hosts=elasticsearch"
- xpack.security.enabled=false
ulimits:
memlock:
soft: -1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ services:
container_name: ${DC_PREFIX}-elasticsearch-phonetic
environment:
- cluster.name=docker-cluster
# - xpack.security.enabled=false
- xpack.security.enabled=false
- "ES_JAVA_OPTS=-Xms3072m -Xmx3072m"
ulimits:
memlock:
Expand Down
17 changes: 16 additions & 1 deletion docker-components/docker-compose-elasticsearch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,27 @@ networks:
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:${ES_VERSION}
command:
- /bin/bash
- -c
- |
if [ ! -z "$http_proxy" ]; then
host=$${http_proxy##*//};
host=$${host%%:*};
port=$${http_proxy##*:};
(echo -e "cluster.name: \"docker-cluster\"\nnetwork.host: 0.0.0.0\ns3.client.default.proxy.host: $${host}\ns3.client.default.proxy.port: $${port}" > /usr/share/elasticsearch/config/elasticsearch.yml);
fi;
exec /usr/local/bin/docker-entrypoint.sh elasticsearch
container_name: ${DC_PREFIX}-elasticsearch
environment:
- cluster.name=matchid-cluster
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms%M -Xmx%M"
- ES_JAVA_OPTS
- http_proxy
- https_proxy
- no_proxy
- discovery.type=single-node
- xpack.security.enabled=false
ulimits:
memlock:
soft: -1
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ elasticsearch
enum34
Flask
flask-login
flask_restplus
flask_restx
geopy
fuzzywuzzy[speedup]
jellyfish
Expand All @@ -21,5 +21,6 @@ smart-open==1.10.0
sqlalchemy
tslib
typing
Werkzeug==0.16.1
Werkzeug==3.0.3
rsa>=4.7 # not directly required, pinned by Snyk to avoid a vulnerability
zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability
Loading