From 1b025ca40accbd4f72fb4643ba41f3eb8fdcd09b Mon Sep 17 00:00:00 2001 From: Gibson Chikafa Date: Tue, 1 Nov 2022 19:43:21 +0100 Subject: [PATCH] RStudio --- attributes/default.rb | 32 +++- .../hopsworks_templates/config_template.json | 1 + .../rstudio_logging_config_template | 9 + .../rstudio_rserver_config_template | 5 + .../rstudio_rsession_config_template | 2 + .../hopsworks_templates/sparklyr_config.yml | 10 ++ .../default/sql/ddl/3.1.0__initial_tables.sql | 46 ++--- files/default/sql/ddl/updates/3.1.0.sql | 24 ++- recipes/install.rb | 47 ++++- templates/default/rstudio-kill.sh.erb | 26 +++ templates/default/rstudio-launch.sh.erb | 130 ++++++++++++++ .../default/rstudio-project-cleanup.sh.erb | 30 ++++ templates/default/rstudio.sh.erb | 164 ++++++++++++++++++ templates/default/sql/dml/3.1.0.sql.erb | 13 ++ .../default/sql/dml/undo/3.1.0__undo.sql.erb | 13 ++ 15 files changed, 519 insertions(+), 33 deletions(-) create mode 100644 files/default/hopsworks_templates/rstudio_logging_config_template create mode 100644 files/default/hopsworks_templates/rstudio_rserver_config_template create mode 100644 files/default/hopsworks_templates/rstudio_rsession_config_template create mode 100644 files/default/hopsworks_templates/sparklyr_config.yml create mode 100644 templates/default/rstudio-kill.sh.erb create mode 100644 templates/default/rstudio-launch.sh.erb create mode 100644 templates/default/rstudio-project-cleanup.sh.erb create mode 100644 templates/default/rstudio.sh.erb diff --git a/attributes/default.rb b/attributes/default.rb index 25f015e2e..9cbbd17cb 100644 --- a/attributes/default.rb +++ b/attributes/default.rb @@ -446,4 +446,34 @@ default['hops']['cadvisor']['dir'] = "#{node['hops']['dir']}/cadvisor" default['hops']['cadvisor']['download-url'] = "#{node['download_url']}/docker/cadvisor" -default['hops']['cadvisor']['port'] = "4194" \ No newline at end of file +default['hops']['cadvisor']['port'] = "4194" + + +default['hopsworks']['rstudio_dir'] = node['hopsworks']['dir'] + "/rstudio" +#rstudio configuration variables +default["hopsworks"]['rstudio_host'] = "localhost" +default["hopsworks"]['rstudio_origin_scheme'] = "https" +default["hopsworks"]["rstudio_www_address"] = "0.0.0.0" +default["hopsworks"]["rstudio_session_timeout_minutes"] = 360 +default["hopsworks"]["rstudio_logging_level"] = "info" +default["hopsworks"]["rstudio_logger_type"] = "file" +default["hopsworks"]["rstudio_log_file_max_size"] = 512 +default["hopsworks"]["rstudio_default_cran_repo"] = "https://cloud.r-project.org/" + +default['rstudio']['base_dir'] = node['install']['dir'].empty? ? node['hopsworks']['dir'] + "/rstudio" : node['install']['dir'] + "/rstudio" +default['rstudio']['shutdown_timer_interval'] = "30m" + +# CRAN +default['rstudio']['cran']['mirror'] = 'http://cran.rstudio.com/' + +# APT configuration for Ubuntu or Debian installs. +case node["platform"].downcase +when "ubuntu" + default['rstudio']['apt']['key'] = 'E084DAB9' + default['rstudio']['apt']['keyserver'] = 'keyserver.ubuntu.com' + default['rstudio']['apt']['uri'] = 'http://cran.stat.ucla.edu/bin/linux/ubuntu' +when "debian" + default['rstudio']['apt']['key'] = '381BA480' + default['rstudio']['apt']['keyserver'] = 'subkeys.pgp.net' + default['rstudio']['apt']['uri'] = 'http://cran.stat.ucla.edu/bin/linux/debian' +end \ No newline at end of file diff --git a/files/default/hopsworks_templates/config_template.json b/files/default/hopsworks_templates/config_template.json index 19fca1939..21a73e4ed 100644 --- a/files/default/hopsworks_templates/config_template.json +++ b/files/default/hopsworks_templates/config_template.json @@ -50,6 +50,7 @@ "executorCores": ${conf.executorCores?c}, "executorMemory": "${conf.executorMemory}", "proxyUser": "${conf.hdfsUser}", + "name": "${conf.livySessionName}", "queue": "${conf.yarnQueue}", "conf": { ${conf.sparkConfiguration} diff --git a/files/default/hopsworks_templates/rstudio_logging_config_template b/files/default/hopsworks_templates/rstudio_logging_config_template new file mode 100644 index 000000000..d89a89a82 --- /dev/null +++ b/files/default/hopsworks_templates/rstudio_logging_config_template @@ -0,0 +1,9 @@ +[*] +log-level=warn +logger-type=syslog + +[@rserver] +log-level=${conf.logLevel} +logger-type=${conf.loggerType} +max-size-mb=${conf.maxSizeMb} +log-file-include-pid=${conf.includePid} \ No newline at end of file diff --git a/files/default/hopsworks_templates/rstudio_rserver_config_template b/files/default/hopsworks_templates/rstudio_rserver_config_template new file mode 100644 index 000000000..e4c6a7856 --- /dev/null +++ b/files/default/hopsworks_templates/rstudio_rserver_config_template @@ -0,0 +1,5 @@ +rsession-which-r=${conf.versionPath} +www-address=${conf.ipAddress} +www-port=${conf.port} +www-root-path=${conf.rootPath} +server-user=${conf.serverUser} \ No newline at end of file diff --git a/files/default/hopsworks_templates/rstudio_rsession_config_template b/files/default/hopsworks_templates/rstudio_rsession_config_template new file mode 100644 index 000000000..42cc2e1d4 --- /dev/null +++ b/files/default/hopsworks_templates/rstudio_rsession_config_template @@ -0,0 +1,2 @@ +session-timeout-minutes=${conf.sessionTimeoutMinutes} +r-cran-repos=${conf.cranRepo} \ No newline at end of file diff --git a/files/default/hopsworks_templates/sparklyr_config.yml b/files/default/hopsworks_templates/sparklyr_config.yml new file mode 100644 index 000000000..2d3b4e3d7 --- /dev/null +++ b/files/default/hopsworks_templates/sparklyr_config.yml @@ -0,0 +1,10 @@ +default: + livy.driverCores: ${conf.driverCores} + livy.driverMemory: "${conf.driverMemory}" + livy.numExecutors: ${conf.numExecutors} + livy.executorCores: ${conf.executorCores} + livy.executorMemory: "${conf.executorMemory}" + livy.proxyUser: "${conf.proxyUser}" + livy.queue: "${conf.yarnQueue}" + livy.name: "${conf.livyAppName}" +${conf.sparkConfiguration} \ No newline at end of file diff --git a/files/default/sql/ddl/3.1.0__initial_tables.sql b/files/default/sql/ddl/3.1.0__initial_tables.sql index 050a22851..f1d9896f9 100644 --- a/files/default/sql/ddl/3.1.0__initial_tables.sql +++ b/files/default/sql/ddl/3.1.0__initial_tables.sql @@ -1047,20 +1047,21 @@ CREATE TABLE `rstudio_interpreter` ( /*!40101 SET @saved_cs_client = @@character_set_client */; /*!40101 SET character_set_client = utf8 */; CREATE TABLE `rstudio_project` ( - `port` int(11) NOT NULL, - `hdfs_user_id` int(11) NOT NULL, + `port` int NOT NULL, + `hdfs_user_id` int NOT NULL, `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + `expires` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, `last_accessed` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, - `host_ip` varchar(255) COLLATE latin1_general_cs NOT NULL, - `token` varchar(255) COLLATE latin1_general_cs NOT NULL, - `secret` varchar(64) COLLATE latin1_general_cs NOT NULL, - `pid` bigint(20) NOT NULL, - `project_id` int(11) NOT NULL, + `secret` varchar(64) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL, + `pid` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL, + `project_id` int NOT NULL, + `login_password` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL, + `login_username` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL, PRIMARY KEY (`port`), KEY `hdfs_user_idx` (`hdfs_user_id`), KEY `project_id` (`project_id`), - CONSTRAINT `FK_103_577` FOREIGN KEY (`hdfs_user_id`) REFERENCES `hops`.`hdfs_users` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION, - CONSTRAINT `FK_284_578` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION + CONSTRAINT `FK_103_577` FOREIGN KEY (`hdfs_user_id`) REFERENCES `hops`.`hdfs_users` (`id`) ON DELETE CASCADE, + CONSTRAINT `FK_284_578` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs; /*!40101 SET character_set_client = @saved_cs_client */; @@ -1073,34 +1074,19 @@ CREATE TABLE `rstudio_project` ( CREATE TABLE `rstudio_settings` ( `project_id` int(11) NOT NULL, `team_member` varchar(150) COLLATE latin1_general_cs NOT NULL, - `num_tf_ps` int(11) DEFAULT '1', - `num_tf_gpus` int(11) DEFAULT '0', - `num_mpi_np` int(11) DEFAULT '1', - `appmaster_cores` int(11) DEFAULT '1', - `appmaster_memory` int(11) DEFAULT '1024', - `num_executors` int(11) DEFAULT '1', - `num_executor_cores` int(11) DEFAULT '1', - `executor_memory` int(11) DEFAULT '1024', - `dynamic_initial_executors` int(11) DEFAULT '1', - `dynamic_min_executors` int(11) DEFAULT '1', - `dynamic_max_executors` int(11) DEFAULT '1', `secret` varchar(255) COLLATE latin1_general_cs NOT NULL, - `log_level` varchar(32) COLLATE latin1_general_cs DEFAULT 'INFO', - `mode` varchar(32) COLLATE latin1_general_cs NOT NULL, - `umask` varchar(32) COLLATE latin1_general_cs DEFAULT '022', `advanced` tinyint(1) DEFAULT '0', - `archives` varchar(1500) COLLATE latin1_general_cs DEFAULT '', - `jars` varchar(1500) COLLATE latin1_general_cs DEFAULT '', - `files` varchar(1500) COLLATE latin1_general_cs DEFAULT '', - `py_files` varchar(1500) COLLATE latin1_general_cs DEFAULT '', - `spark_params` varchar(6500) COLLATE latin1_general_cs DEFAULT '', `shutdown_level` int(11) NOT NULL DEFAULT '6', + `base_dir` varchar(255) COLLATE latin1_general_cs DEFAULT NULL, + `job_config` varchar(11000) COLLATE latin1_general_cs DEFAULT NULL, + `docker_config` varchar(1000) COLLATE latin1_general_cs DEFAULT NULL, PRIMARY KEY (`project_id`,`team_member`), KEY `team_member` (`team_member`), KEY `secret_idx` (`secret`), - CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO ACTION, + CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO + ACTION, CONSTRAINT `RS_FK_PROJS` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION -) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs; + ) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs; /*!40101 SET character_set_client = @saved_cs_client */; /*!40101 SET character_set_client = utf8 */; diff --git a/files/default/sql/ddl/updates/3.1.0.sql b/files/default/sql/ddl/updates/3.1.0.sql index 756bc5443..30e404f3b 100644 --- a/files/default/sql/ddl/updates/3.1.0.sql +++ b/files/default/sql/ddl/updates/3.1.0.sql @@ -123,5 +123,27 @@ ALTER TABLE `hopsworks`.`feature_store_activity` ADD CONSTRAINT `fs_act_validati ALTER TABLE `hopsworks`.`feature_store_activity` ADD COLUMN `expectation_suite_id` Int(11) NULL; ALTER TABLE `hopsworks`.`feature_store_activity` ADD CONSTRAINT `fs_act_expectationsuite_fk` FOREIGN KEY (`expectation_suite_id`) REFERENCES `expectation_suite` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION; +ALTER TABLE `hopsworks`.`project` ADD COLUMN `creation_status` TINYINT(1) NOT NULL DEFAULT '0'; -ALTER TABLE `hopsworks`.`project` ADD COLUMN `creation_status` TINYINT(1) NOT NULL DEFAULT '0'; \ No newline at end of file +ALTER TABLE `hopsworks`.`rstudio_settings` DROP `num_tf_ps`, DROP `num_tf_gpus`, DROP `num_mpi_np`, +DROP `appmaster_cores`, DROP `appmaster_memory`, DROP `num_executors`, DROP `num_executor_cores`, + DROP `executor_memory`, DROP `dynamic_initial_executors`,DROP `dynamic_min_executors`, DROP `dynamic_max_executors`, + DROP `log_level`, DROP `mode`, DROP `umask`, DROP `archives`, DROP `jars`, DROP `files`,DROP `py_files`, DROP `spark_params`; + +ALTER TABLE `hopsworks`.`rstudio_project` DROP `host_ip`, DROP `token`; + +ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `expires` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP; + +ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `login_username` varchar(255) COLLATE latin1_general_cs DEFAULT + NULL; + +ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `login_password` varchar(255) COLLATE latin1_general_cs DEFAULT + NULL; + +ALTER TABLE `hopsworks`.`rstudio_project` MODIFY COLUMN `pid` varchar(255) COLLATE latin1_general_cs NOT NULL; + +ALTER TABLE `hopsworks`.`rstudio_settings` ADD COLUMN `job_config` varchar(11000) COLLATE latin1_general_cs DEFAULT + NULL; + +ALTER TABLE `hopsworks`.`rstudio_settings` ADD COLUMN `docker_config` varchar(11000) COLLATE latin1_general_cs DEFAULT + NULL; diff --git a/recipes/install.rb b/recipes/install.rb index 9438a8c35..276171788 100644 --- a/recipes/install.rb +++ b/recipes/install.rb @@ -125,6 +125,14 @@ not_if { node['install']['external_users'].casecmp("true") == 0 } end +#update permissions of base_dir for rstudio to 770 +directory node['rstudio']['base_dir'] do + owner node['hops']['yarnapp']['user'] + group node['hops']['group'] + mode "770" + action :create +end + #update permissions of base_dir to 770 directory node['jupyter']['base_dir'] do owner node['hops']['yarnapp']['user'] @@ -578,6 +586,14 @@ not_if { node['install']['kubernetes'].casecmp("true") == 0 } end +kagent_sudoers "rstudio" do + user node['glassfish']['user'] + group "root" + script_name "rstudio.sh" + template "rstudio.sh.erb" + run_as "ALL" # run this as root - inside we change to different users +end + kagent_sudoers "convert-ipython-notebook" do user node['glassfish']['user'] group "root" @@ -665,9 +681,19 @@ action :create end +kagent_sudoers "rstudio-project-cleanup" do + user node['glassfish']['user'] + group "root" + script_name "rstudio-project-cleanup.sh" + template "rstudio-project-cleanup.sh.erb" + run_as "ALL" + not_if { node['install']['kubernetes'].casecmp("true") == 0 } +end + ["zip-hdfs-files.sh", "zip-background.sh", "unzip-background.sh", "tensorboard-launch.sh", "tensorboard-cleanup.sh", "condasearch.sh", "list_environment.sh", "jupyter-kill.sh", - "jupyter-launch.sh", "tfserving-kill.sh", "sklearn_serving-launch.sh", "sklearn_serving-kill.sh", "git-container-kill.sh"].each do |script| + "jupyter-launch.sh", "tfserving-kill.sh", "sklearn_serving-launch.sh", "sklearn_serving-kill.sh", "git-container-kill.sh", "rstudio-kill.sh", + "rstudio-launch.sh"].each do |script| template "#{theDomain}/bin/#{script}" do source "#{script}.erb" owner node['glassfish']['user'] @@ -677,6 +703,25 @@ end end +#update permissions of base_dir to 770 +directory node["rstudio"]["base_dir"] do + owner node["rstudio"]["user"] + group node["rstudio"]["group"] + mode "770" + action :create +end + +template "#{theDomain}/bin/rstudio-launch.sh" do + source "rstudio-launch.sh.erb" + owner node['glassfish']['user'] + group node['glassfish']['group'] + mode "500" + action :create + variables({ + :namenode_fdqn => namenode_fdqn, + }) +end + template "#{theDomain}/bin/git-container-launch.sh" do source "git-container-launch.sh.erb" owner node['glassfish']['user'] diff --git a/templates/default/rstudio-kill.sh.erb b/templates/default/rstudio-kill.sh.erb new file mode 100644 index 000000000..b2e5747c1 --- /dev/null +++ b/templates/default/rstudio-kill.sh.erb @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +help() { + echo "" + echo "usage: $0 CONTAINER_ID PROJECT_USER_NAME" + echo "" + exit 1 +} + + +if [ $# -ne 2 ]; then + help +fi + +if [ "$2" != "" ]; then + PROJECT_USER_NAME=$2 + CONTAINER_NAME=${PROJECT_USER_NAME}__rstudio + + echo "Killing input container_name: $CONTAINER_NAME" + docker rm -f "$CONTAINER_NAME" > /dev/null 2>&1 + exit $? +fi + +echo "Killing input container_id: $1" +docker rm -f "$1" > /dev/null 2>&1 +exit $? \ No newline at end of file diff --git a/templates/default/rstudio-launch.sh.erb b/templates/default/rstudio-launch.sh.erb new file mode 100644 index 000000000..a7e1fde6d --- /dev/null +++ b/templates/default/rstudio-launch.sh.erb @@ -0,0 +1,130 @@ +#!/usr/bin/env bash + +WAIT_START=60 +RSTUDIO_HOME=$1 +HADOOP_HOME=$2 +HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop +HADOOP_USERNAME=$3 +PORT=$4 +SECRET_DIR=$5 +CERTS_DIR=$6 +IMAGE=$7 +LOGFILE=${RSTUDIO_HOME}/logs/$8 +PROJECT_NAME=$9 +LIVY_IP=${10} +LIVY_PORT=${11} +HADOOP_BASE_DIR=${12} +SERVER_PASSWORD=${13} +HADOOP_VERSION=${14} +SPARK_VERSION=${15} +HADOOP_CLIENT_ENV_OPTS='-D fs.permissions.umask-mode=0002' +CONTAINER_NAME=${HADOOP_USERNAME}__rstudio +PID_FILE=${RSTUDIO_HOME}/run/rstudio.pid +SPARK_CONF_DIR=/srv/hops/spark/conf +FLINK_CONF_DIR=/srv/hops/flink/conf +NOT_FOUND=127 +SPARKLYR_CONFIG_FILE=${RSTUDIO_HOME}/conf/config.yml +NAMENODE_IP=<%= @namenode_fdqn %> +NAMENODE_PORT=<%= node['hops']['nn']['port'] %> +CLIENT_CERTIFICATES_BUNDLE=$CERTS_DIR/certificate_bundle.pem +ROOT_CA_BUNDLE=$CERTS_DIR/root_ca.pem +CLIENT_KEY=$CERTS_DIR/private_key.pem + + +help() { + echo "" + echo "usage: $0 RSTUDIO_HOME HADOOP_HOME HADOOP_USERNAME PORT SECRET_DIR CERTS_DIR IMAGE LOGFILE PROJECT_NAME LIVY_IP LIVY_PORT HADOOP_BASE_DIR SERVER_PASSWORD HADOOP_VERSION SPARK_VERSION" + echo "" + exit 1 +} + +function kill_named { + CID=$(docker container list -a | grep $CONTAINER_NAME | grep -v grep | awk '{print $1}') + if [ "$CID" != "" ] ; then + docker rm -f "$CID" > /dev/null 2>&1 + res=$? + else + res=$NOT_FOUND + fi + return "$res" +} + +if [ $# -ne 15 ]; then + help +fi + +#check if the folders exist +cd "$RSTUDIO_HOME" || exit +cd "$SECRET_DIR" || exit + +kill_named + +if [ -f "$PID_FILE" ] ; then + rm $PID_FILE +fi + +touch $LOGFILE +if [ $? -ne 0 ] ; then + echo "Error: could not create the log file for rstudio server" + exit 1 +fi +chmod 766 $LOGFILE + +docker run --rm -d --cap-add SYS_ADMIN --device /dev/fuse --security-opt apparmor:unconfined --name $CONTAINER_NAME --cidfile=$PID_FILE\ + --network=host \ + --init \ + -e "RSTUDIO_PATH=$RSTUDIO_HOME" \ + -e "RSTUDIO_DATA_DIR=$RSTUDIO_HOME" \ + -e "PDIR=$SECRET_DIR" \ + -e "RSTUDIO_CONFIG_DIR=${RSTUDIO_HOME}/conf" \ + -e "RSTUDIO_RUNTIME_DIR=${RSTUDIO_HOME}/run" \ + -e "HADOOP_HDFS_HOME=${HADOOP_HOME}" \ + -e "HADOOP_CONF_DIR=${HADOOP_CONF_DIR}" \ + -e "HADOOP_CLIENT_OPTS='-Dfs.permissions.umask-mode=0002'" \ + -e "MATERIAL_DIRECTORY=$CERTS_DIR" \ + -e "HADOOP_USERNAME=$HADOOP_USERNAME" \ + -e "HADOOP_HOME=${HADOOP_HOME}" \ + -e "LOGFILE=${LOGFILE}" \ + -e "RSTUDIO_PORT=${PORT}" \ + -e "PROJECT_NAME=${PROJECT_NAME}" \ + -e "LIVY_IP=${LIVY_IP}" \ + -e "LIVY_PORT=${LIVY_PORT}" \ + -e "HADOOP_BASE_DIR=${HADOOP_BASE_DIR}" \ + -e "HADOOP_CLIENT_ENV_OPTS=${HADOOP_CLIENT_ENV_OPTS}" \ + -e "SPARKLYR_CONFIG_FILE=${SPARKLYR_CONFIG_FILE}" \ + -e "SERVER_PASSWORD=${SERVER_PASSWORD}" \ + -e "HADOOP_VERSION=${HADOOP_VERSION}" \ + -e "NAMENODE_IP=${NAMENODE_IP}" \ + -e "NAMENODE_PORT=${NAMENODE_PORT}" \ + -e "SPARK_VERSION=${SPARK_VERSION}" \ + -e "CLIENT_CERTIFICATES_BUNDLE=${CLIENT_CERTIFICATES_BUNDLE}" \ + -e "ROOT_CA_BUNDLE=${ROOT_CA_BUNDLE}" \ + -e "CLIENT_KEY=${CLIENT_KEY}" \ + -v $RSTUDIO_HOME:$RSTUDIO_HOME:rw\ + -v $SECRET_DIR:$SECRET_DIR:rw\ + -v ${HADOOP_CONF_DIR}:${HADOOP_CONF_DIR}:ro \ + -v ${SPARK_CONF_DIR}:${SPARK_CONF_DIR}:ro \ + -v $LOGFILE:"/var/log/rstudio-server/rserver.log":rw\ + -u="yarnapp" \ + -w="$SECRET_DIR" \ + $IMAGE & \ + +# Wait for rstudio to start +timeout=0 +while [ $timeout -lt $WAIT_START ] ; do + docker logs $(cat $PID_FILE) | grep "...done" + if [ $? -eq 0 ] ; then + break + fi + echo -n "." + timeout=$((timeout + 1)) +done +echo "" + +# If the timeout was exceeded, kill rstudio +if [ "$timeout" -eq $WAIT_START ] ; then + kill_named +fi + + +exit $? \ No newline at end of file diff --git a/templates/default/rstudio-project-cleanup.sh.erb b/templates/default/rstudio-project-cleanup.sh.erb new file mode 100644 index 000000000..41bad3f01 --- /dev/null +++ b/templates/default/rstudio-project-cleanup.sh.erb @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# This script allows Hopsworks to cleanup local directories for RStudio servers. +# There will be 1 RStudio server per ProjectUser. +# This script can be run by hopsworks (running as user 'glassfish') as a sudo command as 'root' +# The script should run as 'root' as the rstudio user does not have read privileges on the base directory for a user's +# project and cannot do a recursive delete on the directory. +# + +help() { + echo "" + echo "usage: $0 project" + echo "e.g., " + exit 1 +} + +if [ $# -ne 1 ]; then + help +fi + +base="<%= node['rstudio']['base_dir'] %>/Projects/$1" +# Sanity checks for injection attacks +if [ ! -d "$base" ] ; then + echo "Invalid RSTUDIO_HOME directory: $base" + exit 1 +fi + +rm -rf $base + +exit $? \ No newline at end of file diff --git a/templates/default/rstudio.sh.erb b/templates/default/rstudio.sh.erb new file mode 100644 index 000000000..0fd8beef1 --- /dev/null +++ b/templates/default/rstudio.sh.erb @@ -0,0 +1,164 @@ +#!/usr/bin/env bash + +# This script allows Hopsworks to start/kill RStudio . +# There will be 1 rstudio-server per ProjectUser. +# This script can be run by hopsworks (running as user 'glassfish') as a sudo command whereupon +# it changes user to 'rstudio' to run the command as user 'rstudio'. +# + +help() { + echo "" + echo "usage: $0 [start rstudio_home hadoop_home hadoop_username port secret_dir certs_dir image_name logfile project_name livy_address livy_port base_dir server_password hadoop_version spark_version] | [kill rstudio_home container_id project_user_name]" + echo "" + exit 1 +} + + +DOMAINS_DIR=/srv/hops/domains +RSTUDIO_USER=yarnapp +RSTUDIO_GROUP=hadoop +HOPSWORKS_USER=glassfish +VALID_IMAGE_NAME='^([a-z0-9]+(-[a-z0-9]+)*.)*[a-z0-9]+(:[0-9]*)?(/([a-zA-Z0-9-]*))?/([-:._a-zA-Z0-9]{0,62}[-:.a-zA-Z0-9]$)' + +if [ "$1" == "kill" ] ; then + + if [ $# -ne 4 ]; then + help + fi + + # Don't kill the pid from this script, as it is run with 'sudo' privileges. Only do it as user 'rstudio' + sudo ${DOMAINS_DIR}/domain1/bin/rstudio-kill.sh $3 $4 + + # Remove all the directories in the home rstudio folder for this project-user. + if [ "$2" != "" ] ; then + # Try and make sure some silly directory is not deleted, that this + if [[ $2 = *"hops/rstudio/Projects"* ]]; then + rm -rf "${2}"/../* + fi + fi + +elif [ "$1" == "start" ] ; then + + if [ $# -ne 16 ]; then + help + fi + + # Sanity checks for injection attacks + if [ ! -d "$2" ] ; then + echo "Invalid RSTUDIO_HOME directory: $2" + exit 1 + fi + if [ ! -d "$3" ] ; then + echo "Invalid HADOOP_HOME directory: $3" + exit 2 + fi + + if [ ! -d "${7}" ] ; then + echo "Invalid certificates directory: $9" + exit 3 + fi + + re='^[0-9]+$' + if ! [[ $5 =~ $re ]] ; then + echo "error: Not a number" >&2 + help + fi + + if ! [[ ${8} =~ $VALID_IMAGE_NAME ]] ; then + echo "error: Not a valid image name ${8}" >&2 + help + fi + + pid=$(ps -ef | grep -E "rstudio-server" | grep "port=$5" | awk '{print $2}') + if [ "$pid" != "" ] ; then + echo "There is already an rstudio server using this port" + exit 1 + fi + + mkdir -p "$6" + if [ $? -ne 0 ] ; then + echo "Error: could not create private_dir: $6" + exit 1 + fi + chmod 770 "$6" + chown "${RSTUDIO_USER}":"${RSTUDIO_GROUP}" "$6" + + # Make the group of the files, the rstudio group so that they can write to the files + chown -R "${HOPSWORKS_USER}":"${RSTUDIO_GROUP}" "$2/".. + if [ $? -ne 0 ] ; then + echo "Error: could not change ownership of config_dir for RStudio: $2" + exit 1 + fi + chmod 0730 "$2/".. + chown -R "${RSTUDIO_USER}":"${RSTUDIO_GROUP}" "$2" + chmod -R 770 "$2" + + # Launch RStudio server + ${DOMAINS_DIR}/domain1/bin/rstudio-launch.sh $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} ${12} ${13} ${14} ${15} ${16} ${17} ${18} ${19} + +# list +elif [ "$1" == "list" ] ; then + # This command will output line-separated PIDs for all running rstudio server instances into the tmp file, from + # where it is read by Hopsworks + docker container list -a | grep "__rstudio" | grep -v 'grep' | awk '{print $1}' > /tmp/rstudioServerPids.pids +elif [ "$1" == "generate_pems" ]; then + if [ $# -ne 3 ]; then + help + fi + + CERTS_DIR=$2 + HADOOP_USERNAME=$3 + TSTORE_FILE=$CERTS_DIR/${HADOOP_USERNAME}__tstore.jks + KSTORE_FILE=$CERTS_DIR/${HADOOP_USERNAME}__kstore.jks + KEY_FILE=$CERTS_DIR/${HADOOP_USERNAME}__cert.key + + + PEM_CERTIFICATE_BUNDLE="certificate_bundle.pem" + PEM_ROOT_CA="root_ca.pem" + PEM_PRIVATE_KEY="private_key.pem" + + KEY=$( cat ${KEY_FILE} ) + + #1. generate pem certificates bundle from the keystore.jks file + printf "$KEY\n$KEY\n$KEY\n" | keytool -importkeystore -srckeystore $KSTORE_FILE -destkeystore $CERTS_DIR/${HADOOP_USERNAME}__keystore.p12 -deststoretype PKCS12 + echo $KEY | keytool -deststoretype PKCS12 -keystore $CERTS_DIR/${HADOOP_USERNAME}__keystore.p12 -list + echo $KEY | openssl pkcs12 -nokeys -in $CERTS_DIR/${HADOOP_USERNAME}__keystore.p12 -out $CERTS_DIR/$PEM_CERTIFICATE_BUNDLE + + #2. generate root ca pem from the tstore.jks file file + printf "$KEY\n$KEY\n$KEY\n" | keytool -importkeystore -srckeystore $TSTORE_FILE -destkeystore $CERTS_DIR/${HADOOP_USERNAME}__tstore.p12 -deststoretype PKCS12 + echo $KEY | keytool -deststoretype PKCS12 -keystore $CERTS_DIR/${HADOOP_USERNAME}__tstore.p12 -list + echo $KEY | openssl pkcs12 -nokeys -in $CERTS_DIR/${HADOOP_USERNAME}__tstore.p12 -out $CERTS_DIR/$PEM_ROOT_CA + + #3 extract private key from the keystore + echo $KEY | openssl pkcs12 -info -in $CERTS_DIR/${HADOOP_USERNAME}__keystore.p12 -nodes -nocerts > $CERTS_DIR/$PEM_PRIVATE_KEY + + #4. verify that files have been generated + CERTIFICATES_BUNDLE=$CERTS_DIR/$PEM_CERTIFICATE_BUNDLE + if [ ! -f ${CERTIFICATES_BUNDLE} ]; then + echo "Failed to generate certificates bundle for project" + exit 4 + fi + ROOT_CA=$CERTS_DIR/$PEM_ROOT_CA + if [ ! -f ${ROOT_CA} ]; then + echo "Failed to generate root ca for project" + exit 5 + fi + PRIVATE_KEY=$CERTS_DIR/$PEM_PRIVATE_KEY + if [ ! -f ${PRIVATE_KEY} ]; then + echo "Failed to generate private key for project" + exit 6 + fi + + chmod 644 $ROOT_CA + chmod 644 $CERTIFICATES_BUNDLE + chmod 644 $PRIVATE_KEY + + chown glassfish:glassfish $ROOT_CA + chown glassfish:glassfish $CERTIFICATES_BUNDLE + chown glassfish:glassfish $PRIVATE_KEY + + rm $CERTS_DIR/${HADOOP_USERNAME}__keystore.p12 + rm $CERTS_DIR/${HADOOP_USERNAME}__tstore.p12 +else + help +fi diff --git a/templates/default/sql/dml/3.1.0.sql.erb b/templates/default/sql/dml/3.1.0.sql.erb index 14c7e90f0..20ad9aef3 100644 --- a/templates/default/sql/dml/3.1.0.sql.erb +++ b/templates/default/sql/dml/3.1.0.sql.erb @@ -8,3 +8,16 @@ DELETE FROM `hopsworks`.`variables` WHERE `id`='dela_hopsworks_public_port'; DELETE FROM `hopsworks`.`variables` WHERE `id`='hops_site_host'; DELETE FROM `hopsworks`.`variables` WHERE `id`='hopssite_heartbeat_interval'; DELETE FROM `hopsworks`.`variables` WHERE `id`='hops_site_endpoint'; + +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_host", "<%= node['hopsworks']['rstudio_host'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_origin_scheme", "<%= node['hopsworks']['rstudio_origin_scheme'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_www_address", "<%= node['hopsworks']['rstudio_www_address'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_session_timeout_minutes", "<%= node['hopsworks']['rstudio_session_timeout_minutes'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_logging_level", "<%= node['hopsworks']['rstudio_logging_level'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_logger_type", "<%= node['hopsworks']['rstudio_logger_type'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_log_file_max_size", "<%= node['hopsworks']['rstudio_log_file_max_size'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_dir", "<%= node['hopsworks']['rstudio_dir'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_user", "<%= node['hops']['yarnapp']['user'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_group", "<%= node['hops']['group'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_cran_repo", "<%= node['hopsworks']['rstudio_cran_repo'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_shutdown_timer_interval", "<%= node['rstudio']['shutdown_timer_interval'] %>"); diff --git a/templates/default/sql/dml/undo/3.1.0__undo.sql.erb b/templates/default/sql/dml/undo/3.1.0__undo.sql.erb index 55caf2210..a098e2008 100644 --- a/templates/default/sql/dml/undo/3.1.0__undo.sql.erb +++ b/templates/default/sql/dml/undo/3.1.0__undo.sql.erb @@ -1 +1,14 @@ DELETE FROM `hopsworks`.`variables` WHERE `id`='pki_ca_configuration'; + +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_host'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_origin_scheme'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_www_address'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_session_timeout_minutes'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_logging_level'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_logger_type'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_log_file_max_size'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_dir'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_user'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_group'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_cran_repo'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_shutdown_timer_interval';