-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_german_addresses_linux_yarn.sh
50 lines (43 loc) · 1.36 KB
/
create_german_addresses_linux_yarn.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/bin/bash
VENV_PATH='/home/ec2-user/.virtualenvs/address_list_from_osm'
HDFS_BASE_PATH="hdfs:///user/$USER"
while [[ $# -gt 0 ]]; do
case $1 in
-p|--path)
INPUT_PATH="$2"
shift # past argument
shift # past value
;;
-r|--result)
RESULT_PATH="$2"
shift # past argument
shift # past value
;;
-*|--*)
echo "Unknown option $1"
exit 1
;;
*)
echo "Unknown argument $1"
exit 1
;;
esac
done
base_osm_name=$(basename ${INPUT_PATH})
rm -f /tmp/${base_osm_name}
wget ${INPUT_PATH} -O /tmp/${base_osm_name}
hadoop fs -put -f /tmp/${base_osm_name} ${HDFS_BASE_PATH}/${base_osm_name}
rm -f /tmp/${base_osm_name}
rm -f /tmp/venv_address_list_from_osm.tar.gz
venv-pack -o /tmp/venv_address_list_from_osm.tar.gz -p $VENV_PATH
archives="/tmp/venv_address_list_from_osm.tar.gz#environment"
spark-submit \
--conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./environment/bin/python3 \
--conf spark.yarn.appMasterEnv.PYTHONPATH=. \
--master yarn \
--deploy-mode cluster \
--archives ${archives} \
--packages com.acervera.osm4scala:osm4scala-spark3-shaded_2.12:1.0.11,org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.5.0,org.datasyslab:geotools-wrapper:1.4.0-28.2 \
create_german_addresses.py \
-p "${HDFS_BASE_PATH}/${base_osm_name}" \
-r "${HDFS_BASE_PATH}/result/${base_osm_name}"