diff --git a/attributes/default.rb b/attributes/default.rb index d208b7108..f2c322807 100644 --- a/attributes/default.rb +++ b/attributes/default.rb @@ -63,6 +63,8 @@ default['hopsworks']['jupyter_dir'] = node['hopsworks']['dir'] + "/jupyter" +default['hopsworks']['rstudio_dir'] = node['hopsworks']['dir'] + "/rstudio" + default['hopsworks']['max_mem'] = "3000" default['glassfish']['max_mem'] = node['hopsworks']['max_mem'].to_i default['hopsworks']['min_mem'] = "1024" @@ -371,6 +373,51 @@ default['rstudio']['rpm'] = "rstudio-server-rhel-1.1.447-x86_64.rpm" default['rstudio']['enabled'] = "false" + +#rstudio configuration variables +default["hopsworks"]['rstudio_host'] = "localhost" +default["hopsworks"]['rstudio_origin_scheme'] = "https" +default["hopsworks"]["rstudio_www_address"] = "127.0.0.1" +default["hopsworks"]["rstudio_session_timeout_minutes"] = 360 +default["hopsworks"]["rstudio_logging_level"] = "info" +default["hopsworks"]["rstudio_logger_type"] = "file" +default["hopsworks"]["rstudio_log_file_max_size"] = 512 +default["hopsworks"]["rstudio_default_cran_repo"] = "https://cloud.r-project.org/" + +default['rstudio']['base_dir'] = node['install']['dir'].empty? ? node['hopsworks']['dir'] + "/rstudio" : node['install']['dir'] + "/rstudio" +default['rstudio']['shutdown_timer_interval'] = "30m" + +# CRAN +default['rstudio']['cran']['mirror'] = 'http://cran.rstudio.com/' + +# APT configuration for Ubuntu or Debian installs. +case node["platform"].downcase +when "ubuntu" + default['rstudio']['apt']['key'] = 'E084DAB9' + default['rstudio']['apt']['keyserver'] = 'keyserver.ubuntu.com' + default['rstudio']['apt']['uri'] = 'http://cran.stat.ucla.edu/bin/linux/ubuntu' +when "debian" + default['rstudio']['apt']['key'] = '381BA480' + default['rstudio']['apt']['keyserver'] = 'subkeys.pgp.net' + default['rstudio']['apt']['uri'] = 'http://cran.stat.ucla.edu/bin/linux/debian' +end + +# You can define a simple array of packages in your role/environment/node and the +# CRAN recipe will install them. +default['rstudio']['cran']['packages'] = [] + +# RStudio Server +default['rstudio']['server']['www_port'] = '8787' +default['rstudio']['server']['www_address'] = '127.0.0.1' +default['rstudio']['server']['ld_library_path'] = '' +default['rstudio']['server']['r_binary_path'] = '' +default['rstudio']['server']['user_group'] = '' + +# RStudio Session +default['rstudio']['session']['timeout'] = '30' +default['rstudio']['session']['package_path'] = '' +default['rstudio']['session']['cran_repo'] = 'http://cran.case.edu/' + default['hopsworks']['kafka_max_num_topics'] = '100' default['hopsworks']['audit_log_dump_enabled'] = "false" diff --git a/files/default/hopsworks_templates/rstudio_logging_config_template b/files/default/hopsworks_templates/rstudio_logging_config_template new file mode 100644 index 000000000..6bd91c26c --- /dev/null +++ b/files/default/hopsworks_templates/rstudio_logging_config_template @@ -0,0 +1,10 @@ +[*] +log-level=warn +logger-type=syslog + +[@rserver] +log-level=${conf.logLevel} +logger-type=${conf.loggerType} +max-size-mb=${conf.maxSizeMb} +log-dir=${conf.logDir} +log-file-include-pid=${conf.includePid} \ No newline at end of file diff --git a/files/default/hopsworks_templates/rstudio_rserver_config_template b/files/default/hopsworks_templates/rstudio_rserver_config_template new file mode 100644 index 000000000..09adf0c9b --- /dev/null +++ b/files/default/hopsworks_templates/rstudio_rserver_config_template @@ -0,0 +1,5 @@ +rsession-which-r=${conf.versionPath} +www-address=${conf.ipAddress} +www-port=${conf.port} +www-root-path=${conf.rootPath} +server-user=${conf.serverUser} diff --git a/files/default/hopsworks_templates/rstudio_rsession_config_template b/files/default/hopsworks_templates/rstudio_rsession_config_template new file mode 100644 index 000000000..42cc2e1d4 --- /dev/null +++ b/files/default/hopsworks_templates/rstudio_rsession_config_template @@ -0,0 +1,2 @@ +session-timeout-minutes=${conf.sessionTimeoutMinutes} +r-cran-repos=${conf.cranRepo} \ No newline at end of file diff --git a/files/default/hopsworks_templates/sparklyr_config.yml b/files/default/hopsworks_templates/sparklyr_config.yml new file mode 100644 index 000000000..6d9dfea33 --- /dev/null +++ b/files/default/hopsworks_templates/sparklyr_config.yml @@ -0,0 +1,9 @@ +default: + livy.driverCores: ${conf.driverCores} + livy.driverMemory: "${conf.driverMemory}" + livy.numExecutors: ${conf.numExecutors} + livy.executorCores: ${conf.executorCores} + livy.executorMemory: "${conf.executorMemory}" + livy.proxyUser: "${conf.proxyUser}" + livy.queue: "${conf.yarnQueue}" +${conf.sparkConfiguration} \ No newline at end of file diff --git a/files/default/sql/ddl/3.0.0__initial_tables.sql b/files/default/sql/ddl/3.0.0__initial_tables.sql index 281a6d3ce..ebfba6f88 100755 --- a/files/default/sql/ddl/3.0.0__initial_tables.sql +++ b/files/default/sql/ddl/3.0.0__initial_tables.sql @@ -1056,8 +1056,9 @@ CREATE TABLE `rstudio_interpreter` ( `name` varchar(255) COLLATE latin1_general_cs NOT NULL, `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, `last_accessed` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, - PRIMARY KEY (`port`,`name`), - CONSTRAINT `FK_575_582` FOREIGN KEY (`port`) REFERENCES `rstudio_project` (`port`) ON DELETE CASCADE ON UPDATE NO ACTION + PRIMARY KEY (`port`,`name`), + CONSTRAINT `FK_575_582` FOREIGN KEY (`port`) REFERENCES `rstudio_project` (`port`) ON DELETE CASCADE ON UPDATE NO + ACTION ) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs; /*!40101 SET character_set_client = @saved_cs_client */; @@ -1068,20 +1069,21 @@ CREATE TABLE `rstudio_interpreter` ( /*!40101 SET @saved_cs_client = @@character_set_client */; /*!40101 SET character_set_client = utf8 */; CREATE TABLE `rstudio_project` ( - `port` int(11) NOT NULL, - `hdfs_user_id` int(11) NOT NULL, - `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, - `last_accessed` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, - `host_ip` varchar(255) COLLATE latin1_general_cs NOT NULL, - `token` varchar(255) COLLATE latin1_general_cs NOT NULL, - `secret` varchar(64) COLLATE latin1_general_cs NOT NULL, - `pid` bigint(20) NOT NULL, - `project_id` int(11) NOT NULL, - PRIMARY KEY (`port`), - KEY `hdfs_user_idx` (`hdfs_user_id`), - KEY `project_id` (`project_id`), - CONSTRAINT `FK_103_577` FOREIGN KEY (`hdfs_user_id`) REFERENCES `hops`.`hdfs_users` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION, - CONSTRAINT `FK_284_578` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION + `port` int NOT NULL, + `hdfs_user_id` int NOT NULL, + `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + `expires` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + `last_accessed` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `secret` varchar(64) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL, + `pid` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL, + `project_id` int NOT NULL, + `login_password` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL, + `login_username` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL, + PRIMARY KEY (`port`), + KEY `hdfs_user_idx` (`hdfs_user_id`), + KEY `project_id` (`project_id`), + CONSTRAINT `FK_103_577` FOREIGN KEY (`hdfs_user_id`) REFERENCES `hops`.`hdfs_users` (`id`) ON DELETE CASCADE, + CONSTRAINT `FK_284_578` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs; /*!40101 SET character_set_client = @saved_cs_client */; @@ -1092,35 +1094,20 @@ CREATE TABLE `rstudio_project` ( /*!40101 SET @saved_cs_client = @@character_set_client */; /*!40101 SET character_set_client = utf8 */; CREATE TABLE `rstudio_settings` ( - `project_id` int(11) NOT NULL, - `team_member` varchar(150) COLLATE latin1_general_cs NOT NULL, - `num_tf_ps` int(11) DEFAULT '1', - `num_tf_gpus` int(11) DEFAULT '0', - `num_mpi_np` int(11) DEFAULT '1', - `appmaster_cores` int(11) DEFAULT '1', - `appmaster_memory` int(11) DEFAULT '1024', - `num_executors` int(11) DEFAULT '1', - `num_executor_cores` int(11) DEFAULT '1', - `executor_memory` int(11) DEFAULT '1024', - `dynamic_initial_executors` int(11) DEFAULT '1', - `dynamic_min_executors` int(11) DEFAULT '1', - `dynamic_max_executors` int(11) DEFAULT '1', - `secret` varchar(255) COLLATE latin1_general_cs NOT NULL, - `log_level` varchar(32) COLLATE latin1_general_cs DEFAULT 'INFO', - `mode` varchar(32) COLLATE latin1_general_cs NOT NULL, - `umask` varchar(32) COLLATE latin1_general_cs DEFAULT '022', - `advanced` tinyint(1) DEFAULT '0', - `archives` varchar(1500) COLLATE latin1_general_cs DEFAULT '', - `jars` varchar(1500) COLLATE latin1_general_cs DEFAULT '', - `files` varchar(1500) COLLATE latin1_general_cs DEFAULT '', - `py_files` varchar(1500) COLLATE latin1_general_cs DEFAULT '', - `spark_params` varchar(6500) COLLATE latin1_general_cs DEFAULT '', - `shutdown_level` int(11) NOT NULL DEFAULT '6', - PRIMARY KEY (`project_id`,`team_member`), - KEY `team_member` (`team_member`), - KEY `secret_idx` (`secret`), - CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO ACTION, - CONSTRAINT `RS_FK_PROJS` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION + `project_id` int(11) NOT NULL, + `team_member` varchar(150) COLLATE latin1_general_cs NOT NULL, + `secret` varchar(255) COLLATE latin1_general_cs NOT NULL, + `advanced` tinyint(1) DEFAULT '0', + `shutdown_level` int(11) NOT NULL DEFAULT '6', + `base_dir` varchar(255) COLLATE latin1_general_cs DEFAULT NULL, + `job_config` varchar(11000) COLLATE latin1_general_cs DEFAULT NULL, + `docker_config` varchar(1000) COLLATE latin1_general_cs DEFAULT NULL, + PRIMARY KEY (`project_id`,`team_member`), + KEY `team_member` (`team_member`), + KEY `secret_idx` (`secret`), + CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO + ACTION, + CONSTRAINT `RS_FK_PROJS` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION ) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs; /*!40101 SET character_set_client = @saved_cs_client */; diff --git a/files/default/sql/ddl/updates/3.0.0.sql b/files/default/sql/ddl/updates/3.0.0.sql index 3b567cf82..681c37ad9 100644 --- a/files/default/sql/ddl/updates/3.0.0.sql +++ b/files/default/sql/ddl/updates/3.0.0.sql @@ -30,3 +30,26 @@ ALTER TABLE `hopsworks`.`dataset_shared_with` ADD COLUMN `accepted_by` INT(11) D ALTER TABLE `hopsworks`.`dataset_shared_with` ADD CONSTRAINT `fk_shared_by` FOREIGN KEY (`shared_by`) REFERENCES `users` (`uid`) ON DELETE NO ACTION ON UPDATE NO ACTION; ALTER TABLE `hopsworks`.`dataset_shared_with` ADD CONSTRAINT `fk_accepted_by` FOREIGN KEY (`accepted_by`) REFERENCES `users` (`uid`) ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE `hopsworks`.`rstudio_settings` DROP `num_tf_ps`, DROP `num_tf_gpus`, DROP `num_mpi_np`, +DROP `appmaster_cores`, DROP `appmaster_memory`, DROP `num_executors`, DROP `num_executor_cores`, + DROP `executor_memory`, DROP `dynamic_initial_executors`,DROP `dynamic_min_executors`, DROP `dynamic_max_executors`, + DROP `log_level`, DROP `mode`, DROP `umask`, DROP `archives`, DROP `jars`, DROP `files`,DROP `py_files`, DROP `spark_params`; + +ALTER TABLE `hopsworks`.`rstudio_project` DROP `host_ip`, DROP `token`; + +ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `expires` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP; + +ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `login_username` varchar(255) COLLATE latin1_general_cs DEFAULT + NULL; + +ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `login_password` varchar(255) COLLATE latin1_general_cs DEFAULT + NULL; + +ALTER TABLE `hopsworks`.`rstudio_project` MODIFY COLUMN `pid` varchar(255) COLLATE latin1_general_cs NOT NULL; + +ALTER TABLE `hopsworks`.`rstudio_settings` ADD COLUMN `job_config` varchar(11000) COLLATE latin1_general_cs DEFAULT + NULL; + +ALTER TABLE `hopsworks`.`rstudio_settings` ADD COLUMN `docker_config` varchar(11000) COLLATE latin1_general_cs DEFAULT + NULL; \ No newline at end of file diff --git a/files/default/sql/ddl/updates/undo/3.0.0__undo.sql b/files/default/sql/ddl/updates/undo/3.0.0__undo.sql index 9cc55cc0b..ede15446b 100644 --- a/files/default/sql/ddl/updates/undo/3.0.0__undo.sql +++ b/files/default/sql/ddl/updates/undo/3.0.0__undo.sql @@ -51,3 +51,39 @@ ALTER TABLE `hopsworks`.`dataset_shared_with` DROP COLUMN `shared_by`; ALTER TABLE `hopsworks`.`dataset_shared_with` DROP COLUMN `accepted_by`; DROP TABLE IF EXISTS `feature_store_code`; + +ALTER TABLE `hopsworks`.`rstudio_settings` + ADD COLUMN `num_tf_ps` int(11) DEFAULT '1', + ADD COLUMN `num_tf_gpus` int(11) DEFAULT '0', + ADD COLUMN `num_mpi_np` int(11) DEFAULT '1', + ADD COLUMN `appmaster_cores` int(11) DEFAULT '1', + ADD COLUMN `appmaster_memory` int(11) DEFAULT '1024', + ADD COLUMN `num_executors` int(11) DEFAULT '1', + ADD COLUMN `num_executor_cores` int(11) DEFAULT '1', + ADD COLUMN `executor_memory` int(11) DEFAULT '1024', + ADD COLUMN `dynamic_initial_executors` int(11) DEFAULT '1', + ADD COLUMN `dynamic_min_executors` int(11) DEFAULT '1', + ADD COLUMN `dynamic_max_executors` int(11) DEFAULT '1', + ADD COLUMN `mode` varchar(32) COLLATE latin1_general_cs NOT NULL, + ADD COLUMN `umask` varchar(32) COLLATE latin1_general_cs DEFAULT '022', + ADD COLUMN `advanced` tinyint(1) DEFAULT '0', + ADD COLUMN `archives` varchar(1500) COLLATE latin1_general_cs DEFAULT '', + ADD COLUMN `jars` varchar(1500) COLLATE latin1_general_cs DEFAULT '', + ADD COLUMN `files` varchar(1500) COLLATE latin1_general_cs DEFAULT '', + ADD COLUMN `py_files` varchar(1500) COLLATE latin1_general_cs DEFAULT '', + ADD COLUMN `spark_params` varchar(6500) COLLATE latin1_general_cs DEFAULT ''; + +ALTER TABLE `hopsworks`.`rstudio_project` + ADD COLUMN `host_ip` varchar(255) COLLATE latin1_general_cs NOT NULL, + ADD COLUMN `token` varchar(255) COLLATE latin1_general_cs NOT NULL; + +ALTER TABLE `hopsworks`.`rstudio_project` +DROP COLUMN `expires`, + DROP COLUMN `login_username`, + DROP COLUMN `login_password`; + +ALTER TABLE `hopsworks`.`rstudio_project` MODIFY COLUMN `pid` bigint(20) NOT NULL; + +ALTER TABLE `hopsworks`.`rstudio_settings` DROP COLUMN `job_config`; + +ALTER TABLE `hopsworks`.`rstudio_settings` DROP COLUMN `docker_config`; diff --git a/metadata.rb b/metadata.rb index 80baa0f09..0a0420cf7 100644 --- a/metadata.rb +++ b/metadata.rb @@ -830,6 +830,41 @@ :description => "Set to 'true' to enable RStudio in Hopsworks. Default 'false'.", :type => 'string' +attribute "hopsworks/rstudio_host", + :description => "Host for the RStudio server e.g localhost", + :type => "string" + +attribute "hopsworks/rstudio_origin_scheme", + :description => "The origin scheme for the RStudio server e.g https", + :type => "string" + +attribute "hopsworks/rstudio_www_address", + :description => "The network address that RStudio Server will listen on for incoming connections.", + :type => "string" + +attribute "hopsworks/rstudio_session_timeout_minutes", + :description => "The amount of minutes before a session times out, at which point the session will either suspend or exit.", + :type => "string" + +attribute "hopsworks/rstudio_logging_level", + :description => "The minimum log level to capture. Can be one of debug, info, warn, or error", + :type => "string" + +attribute "hopsworks/rstudio_logger_type", + :description => "The type of logger to use. Can be one of stderr, syslog, or file.", + :type => "string" + +attribute "hopsworks/rstudio_log_file_max_size", + :description => "Maximum allowable size of the file before it is rotated. Only applicable if rotate is enabled.", + :type => "string" + +attribute "hopsworks/rstudio_dir", + :description => "Default installation directory for rstudio server", + :type => "string" + +attribute "rstudio/shutdown_timer_interval", + :description => "rstudio interval for shutting down expired rstudio servers", + :type => "string" ### PyPi attribute "hopsworks/pypi_rest_endpoint", diff --git a/recipes/install.rb b/recipes/install.rb index 78028c881..519a5bd0c 100644 --- a/recipes/install.rb +++ b/recipes/install.rb @@ -112,6 +112,14 @@ action :create end +#update permissions of base_dir for rstudio to 770 +directory node['rstudio']['base_dir'] do + owner node['hops']['yarnapp']['user'] + group node['hops']['group'] + mode "770" + action :create +end + directory node['hopsworks']['dir'] do owner node['hopsworks']['user'] group node['hopsworks']['group'] @@ -646,6 +654,14 @@ not_if { node['install']['kubernetes'].casecmp("true") == 0 } end +kagent_sudoers "rstudio" do + user node['glassfish']['user'] + group "root" + script_name "rstudio.sh" + template "rstudio.sh.erb" + run_as "ALL" # run this as root - inside we change to different users +end + kagent_sudoers "convert-ipython-notebook" do user node['glassfish']['user'] group "root" @@ -689,6 +705,15 @@ not_if { node['install']['kubernetes'].casecmp("true") == 0 } end +kagent_sudoers "rstudio-project-cleanup" do + user node['glassfish']['user'] + group "root" + script_name "rstudio-project-cleanup.sh" + template "rstudio-project-cleanup.sh.erb" + run_as "ALL" + not_if { node['install']['kubernetes'].casecmp("true") == 0 } +end + kagent_sudoers "global-ca-sign-csr" do user node['glassfish']['user'] group "root" @@ -740,7 +765,8 @@ ["zip-hdfs-files.sh", "zip-background.sh", "unzip-background.sh", "tensorboard-launch.sh", "tensorboard-cleanup.sh", "condasearch.sh", "list_environment.sh", "jupyter-kill.sh", - "jupyter-launch.sh", "tfserving-kill.sh", "sklearn_serving-launch.sh", "sklearn_serving-kill.sh"].each do |script| + "jupyter-launch.sh", "tfserving-kill.sh", "sklearn_serving-launch.sh", "sklearn_serving-kill.sh", "rstudio-kill.sh", + "rstudio-launch.sh"].each do |script| template "#{theDomain}/bin/#{script}" do source "#{script}.erb" owner node['glassfish']['user'] @@ -812,6 +838,14 @@ action :create end +#update permissions of base_dir to 770 +directory node["rstudio"]["base_dir"] do + owner node["rstudio"]["user"] + group node["rstudio"]["group"] + mode "770" + action :create +end + directory node["hopssite"]["certs_dir"] do owner node["glassfish"]["user"] group node['kagent']['certs_group'] diff --git a/templates/default/rstudio-kill.sh.erb b/templates/default/rstudio-kill.sh.erb new file mode 100644 index 000000000..4839e39d7 --- /dev/null +++ b/templates/default/rstudio-kill.sh.erb @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +help() { + echo "" + echo "usage: $0 CONTAINER_ID PROJECT_USER_NAME" + echo "" + exit 1 +} + + +if [ $# -ne 2 ]; then + help +fi + +if [ "$2" != "" ]; then + PROJECT_USER_NAME=$2 + CONTAINER_NAME=${PROJECT_USER_NAME}__rstudio + + echo "Killing input container_name: $CONTAINER_NAME" + docker rm -f "$CONTAINER_NAME" > /dev/null 2>&1 + exit $? +fi + +echo "Killing input container_id: $1" +docker rm -f "$1" > /dev/null 2>&1 +exit $? + diff --git a/templates/default/rstudio-launch.sh.erb b/templates/default/rstudio-launch.sh.erb new file mode 100644 index 000000000..83bde679d --- /dev/null +++ b/templates/default/rstudio-launch.sh.erb @@ -0,0 +1,127 @@ +#!/usr/bin/env bash + +WAIT_START=60 +RSTUDIO_HOME=$1 +HADOOP_HOME=$2 +HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop +PROJECT_USER_NAME=$3 +PORT=$4 +SECRET_DIR=$5 +CERTS_DIR=$6 +IMAGE=${7} +LOGFILE=${RSTUDIO_HOME}/logs/$8 +PROJECT_NAME=$9 +LIVY_IP=${10} +LIVY_PORT=${11} +HADOOP_BASE_DIR=${12} +SERVER_PASSWORD=${13} +HADOOP_VERSION=${14} +HADOOP_CLIENT_ENV_OPTS='-D fs.permissions.umask-mode=0002' +CONTAINER_NAME=${PROJECT_USER_NAME}__rstudio +PID_FILE=${RSTUDIO_HOME}/run/rstudio.pid +SPARK_CONF_DIR=<%= node['hadoop_spark']['conf_dir'] %> +FLINK_CONF_DIR=<%= node['flink']['conf_dir'] %> +NOT_FOUND=127 +SPARKLYR_CONFIG_FILE=${RSTUDIO_HOME}/conf/config.yml +NAMENODE_IP=${15} +NAMENODE_PORT=${16} +PROJECT_NAME=${17} +SPARK_VERSION=${18} +CLIENT_CERTIFICATES_BUNDLE=${CERTS_DIR}/${PROJECT_USER_NAME}_certificate_bundle.pem +ROOT_CA_BUNDLE=${CERTS_DIR}/${PROJECT_USER_NAME}_root_ca.pem +CLIENT_KEY=${CERTS_DIR}/${PROJECT_USER_NAME}_private_key.pem + +echo "Server password is ${SERVER_PASSWORD}" +help() { + echo "" + echo "usage: $0 RSTUDIO_HOME HADOOP_HOME PROJECT_USER_NAME PORT SECRET_DIR CERTS_DIR IMAGE LOGFILE PROJECT_NAME NAMENODE_HOST NAMENODE_PORT HADOOP_BASE_DIR HADOOP_CLIENT_ENV_OPTS SERVER_PASSWORD HADOOP_VERSION PROJECT_NAME SPARK_VERSION" + echo "" + exit 1 +} + +function kill_named { + CID=$(docker container list -a | grep $CONTAINER_NAME | grep -v grep | awk '{print $1}') + if [ "$CID" != "" ] ; then + docker rm -f "$CID" > /dev/null 2>&1 + res=$? + else + res=$NOT_FOUND + fi + return "$res" +} + +if [ $# -ne 18 ]; then + help +fi + +#check if the folders exist +cd "$RSTUDIO_HOME" || exit +cd "$SECRET_DIR" || exit + +kill_named + +if [ -f "$PID_FILE" ] ; then + rm $PID_FILE +fi + +echo "Secret dir is ${SECRET_DIR}" +docker run --rm -d --cap-add SYS_ADMIN --device /dev/fuse --security-opt apparmor:unconfined --name $CONTAINER_NAME --cidfile=$PID_FILE\ + --network=host \ + --init \ + -e "RSTUDIO_PATH=$RSTUDIO_HOME" \ + -e "RSTUDIO_DATA_DIR=$RSTUDIO_HOME" \ + -e "PDIR=$SECRET_DIR" \ + -e "RSTUDIO_CONFIG_DIR=${RSTUDIO_HOME}/conf" \ + -e "RSTUDIO_RUNTIME_DIR=${RSTUDIO_HOME}/run" \ + -e "HADOOP_HDFS_HOME=${HADOOP_HOME}" \ + -e "HADOOP_CONF_DIR=${HADOOP_CONF_DIR}" \ + -e "HADOOP_CLIENT_OPTS='-Dfs.permissions.umask-mode=0002'" \ + -e "MATERIAL_DIRECTORY=$CERTS_DIR" \ + -e "HADOOP_USER_NAME=$PROJECT_USER_NAME" \ + -e "HADOOP_HOME=${HADOOP_HOME}" \ + -e "LOGFILE=${LOGFILE}" \ + -e "RSTUDIO_PORT=${PORT}" \ + -e "PROJECT_NAME=${PROJECT_NAME}" \ + -e "LIVY_IP=${LIVY_IP}" \ + -e "LIVY_PORT=${LIVY_PORT}" \ + -e "HADOOP_BASE_DIR=${HADOOP_BASE_DIR}" \ + -e "HADOOP_CLIENT_ENV_OPTS=${HADOOP_CLIENT_ENV_OPTS}" \ + -e "SPARKLYR_CONFIG_FILE=${SPARKLYR_CONFIG_FILE}" \ + -e "SERVER_PASSWORD=${SERVER_PASSWORD}" \ + -e "HADOOP_VERSION=${HADOOP_VERSION}" \ + -e "NAMENODE_IP=${NAMENODE_IP}" \ + -e "NAMENODE_PORT=${NAMENODE_PORT}" \ + -e "PROJECT_NAME=${PROJECT_NAME}" \ + -e "SPARK_VERSION=${SPARK_VERSION}" \ + -e "CLIENT_CERTIFICATES_BUNDLE=${CLIENT_CERTIFICATES_BUNDLE}" \ + -e "ROOT_CA_BUNDLE=${ROOT_CA_BUNDLE}" \ + -e "CLIENT_KEY=${CLIENT_KEY}" \ + -v $RSTUDIO_HOME:$RSTUDIO_HOME:rw\ + -v $SECRET_DIR:$SECRET_DIR:rw\ + -v ${HADOOP_CONF_DIR}:${HADOOP_CONF_DIR}:ro \ + -v ${SPARK_CONF_DIR}:${SPARK_CONF_DIR}:ro \ + -u="yarnapp" \ + -w="$SECRET_DIR" \ + $IMAGE & \ + +# Wait for rstudio to start +timeout=0 +while [ $timeout -lt $WAIT_START ] ; do + sleep 1 + grep 'Connecting to sqlite3 database' "$LOGFILE" + if [ $? -eq 0 ] ; then + break + fi + echo -n "." + timeout=$((timeout + 1)) +done +echo "" + +# If the timeout was exceeded, kill rstudio +if [ "$timeout" -eq $WAIT_START ] ; then + kill_named +fi + + +exit $? + diff --git a/templates/default/rstudio-project-cleanup.sh.erb b/templates/default/rstudio-project-cleanup.sh.erb new file mode 100644 index 000000000..4fd8cda25 --- /dev/null +++ b/templates/default/rstudio-project-cleanup.sh.erb @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# This script allows Hopsworks to cleanup local directories for RStudio servers. +# There will be 1 RStudio server per ProjectUser. +# This script can be run by hopsworks (running as user 'glassfish') as a sudo command as 'root' +# The script should run as 'root' as the rstudio user does not have read privileges on the base directory for a user's +# project and cannot do a recursive delete on the directory. +# + +help() { + echo "" + echo "usage: $0 project" + echo "e.g., " + exit 1 +} + +if [ $# -ne 1 ]; then + help +fi + +base="<%= node['rstudio']['base_dir'] %>/Projects/$1" +# Sanity checks for injection attacks +if [ ! -d "$base" ] ; then + echo "Invalid RSTUDIO_HOME directory: $base" + exit 1 +fi + +rm -rf $base + +exit $? diff --git a/templates/default/rstudio.sh.erb b/templates/default/rstudio.sh.erb new file mode 100644 index 000000000..e278e3fc6 --- /dev/null +++ b/templates/default/rstudio.sh.erb @@ -0,0 +1,216 @@ +#!/usr/bin/env bash + +# This script allows Hopsworks to start/kill RStudio . +# There will be 1 rstudio-server per ProjectUser. +# This script can be run by hopsworks (running as user 'glassfish') as a sudo command whereupon +# it changes user to 'rstudio' to run the command as user 'rstudio'. +# + +help() { + echo "" + echo "usage: $0 [start rstudio_home hadoop_home project_user_name port secret_dir certs_dir + image_name logfile server_password hadoop_version hdfs_namenode_ip hdfs_namenode_port spark_version] | [kill rstudio_home container_id project_user_name]" + echo "" + exit 1 +} + + +DOMAINS_DIR=<%= node['glassfish']['domains_dir'] %> +RSTUDIO_USER=<%= node['hops']['yarnapp']['user'] %> +RSTUDIO_GROUP=<%= node['hops']['group'] %> +HOPSWORKS_USER=<%= node['hopsworks']['user'] %> +VALID_IMAGE_NAME='<%= node['conda']['docker']['image-validation-regex'] %>' + +if [ "$1" == "kill" ] ; then + + if [ $# -ne 4 ]; then + help + fi + + # Don't kill the pid from this script, as it is run with 'sudo' privileges. Only do it as user 'rstudio' + sudo ${DOMAINS_DIR}/domain1/bin/rstudio-kill.sh $3 $4 + + # Remove all the directories in the home rstudio folder for this project-user. + if [ "$2" != "" ] ; then + # Try and make sure some silly directory is not deleted, that this + if [[ $2 = *"hops/rstudio/Projects"* ]]; then + rm -rf "${2}"/../* + fi + fi + +elif [ "$1" == "start" ] ; then + + if [ $# -ne 19 ]; then + help + fi + + # Sanity checks for injection attacks + if [ ! -d "$2" ] ; then + echo "Invalid RSTUDIO_HOME directory: $2" + exit 1 + fi + if [ ! -d "$3" ] ; then + echo "Invalid HADOOP_HOME directory: $3" + exit 2 + fi + + if [ ! -d "${7}" ] ; then + echo "Invalid certificates directory: $9" + exit 3 + fi + + re='^[0-9]+$' + if ! [[ $5 =~ $re ]] ; then + echo "error: Not a number" >&2 + help + fi + + if ! [[ ${8} =~ $VALID_IMAGE_NAME ]] ; then + echo "error: Not a valid image name ${8}" >&2 + help + fi + + pid=$(ps -ef | grep -E "rstudio-server" | grep "port=$5" | awk '{print $2}') + if [ "$pid" != "" ] ; then + echo "There is already an rstudio server using this port" + exit 1 + fi + + if [[ $6 =~ ^\/$|(^(?=\/)|^\.|^\.\.)(\/(?=[^/\0])[^/\0]+)*\/?$ ]] ; then + echo "secret_dir looks like a path has, ok" + else + echo "Invalid secret_dir parameter, doesn't look like a path: $6" + exit 4 + fi + + mkdir -p "$6" + if [ $? -ne 0 ] ; then + echo "Error: could not create private_dir: $6" + exit 1 + fi + chmod 770 "$6" + chown "${RSTUDIO_USER}":"${RSTUDIO_GROUP}" "$6" + + # Make the group of the files, the rstudio group so that they can write to the files + chown -R "${HOPSWORKS_USER}":"${RSTUDIO_GROUP}" "$2/".. + if [ $? -ne 0 ] ; then + echo "Error: could not change ownership of config_dir for RStudio: $2" + exit 1 + fi + chmod 0730 "$2/".. + chown -R "${RSTUDIO_USER}":"${RSTUDIO_GROUP}" "$2" + chmod -R 770 "$2" + + # Launch RStudio server + ${DOMAINS_DIR}/domain1/bin/rstudio-launch.sh $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} ${12} ${13} ${14} ${15} ${16} ${17} ${18} ${19} + +# list +elif [ "$1" == "list" ] ; then + # This command will output line-separated PIDs for all running rstudio server instances into the tmp file, from + # where it is read by Hopsworks + docker container list -a | grep "__rstudio" | grep -v 'grep' | awk '{print $1}' > /tmp/rstudioServerPids.pids +elif [ "$1" == "generate_pems" ]; then + if [ $# -ne 3 ]; then + help + fi + + CERTS_DIR=$2 + HADOOP_USERNAME=$3 + TSTORE_FILE=$CERTS_DIR/${HADOOP_USERNAME}__tstore.jks + KSTORE_FILE=$CERTS_DIR/${HADOOP_USERNAME}__kstore.jks + KEY_FILE=$CERTS_DIR/${HADOOP_USERNAME}__cert.key + + KEY=$( cat ${KEY_FILE} ) + + #1. generate pem certificates bundle from the keystore.jks file + printf "$KEY\n$KEY\n$KEY\n" | keytool -importkeystore -srckeystore $KSTORE_FILE -destkeystore $CERTS_DIR/${HADOOP_USERNAME}__keystore.p12 -deststoretype PKCS12 + echo $KEY | keytool -deststoretype PKCS12 -keystore $CERTS_DIR/${HADOOP_USERNAME}__keystore.p12 -list + echo $KEY | openssl pkcs12 -nokeys -in $CERTS_DIR/${HADOOP_USERNAME}__keystore.p12 -out $CERTS_DIR/${HADOOP_USERNAME}_certificate_bundle.pem + + #2. generate root ca pem from the tstore.jks file file + printf "$KEY\n$KEY\n$KEY\n" | keytool -importkeystore -srckeystore $TSTORE_FILE -destkeystore $CERTS_DIR/${HADOOP_USERNAME}__tstore.p12 -deststoretype PKCS12 + echo $KEY | keytool -deststoretype PKCS12 -keystore $CERTS_DIR/${HADOOP_USERNAME}__tstore.p12 -list + echo $KEY | openssl pkcs12 -nokeys -in $CERTS_DIR/${HADOOP_USERNAME}__tstore.p12 -out $CERTS_DIR/${HADOOP_USERNAME}_root_ca.pem + + #3 extract private key from the keystore + echo $KEY | openssl pkcs12 -info -in $CERTS_DIR/${HADOOP_USERNAME}__keystore.p12 -nodes -nocerts > $CERTS_DIR/${HADOOP_USERNAME}_private_key.pem + + #4. verify that files have been generated + CERTIFICATES_BUNDLE=$CERTS_DIR/${HADOOP_USERNAME}_certificate_bundle.pem + if [ ! -f ${CERTIFICATES_BUNDLE} ]; then + echo "Failed to generate certificates bundle for project" + exit 4 + fi + ROOT_CA=$CERTS_DIR/${HADOOP_USERNAME}_root_ca.pem + if [ ! -f ${ROOT_CA} ]; then + echo "Failed to generate root ca for project" + exit 5 + fi + PRIVATE_KEY=$CERTS_DIR/${HADOOP_USERNAME}_private_key.pem + if [ ! -f ${PRIVATE_KEY} ]; then + echo "Failed to generate private key for project" + exit 6 + fi + + chmod 644 $ROOT_CA + chmod 644 $CERTIFICATES_BUNDLE + chmod 644 $PRIVATE_KEY + + chown glassfish:glassfish $ROOT_CA + chown glassfish:glassfish $CERTIFICATES_BUNDLE + chown glassfish:glassfish $PRIVATE_KEY + + rm $CERTS_DIR/${HADOOP_USERNAME}__keystore.p12 + rm $CERTS_DIR/${HADOOP_USERNAME}__tstore.p12 + + #put root and intermediate cas in one pem + #Define the string to split + CERT_BUNDLE_TEXT=$( cat $CERTIFICATES_BUNDLE ) + + #Define multi-character delimiter + DELIMETER="-----END CERTIFICATE-----" + #Concatenate the delimiter with the main string + STRDELIM=$CERT_BUNDLE_TEXT$DELIMETER + + #Split the text based on the delimiter + #put root and intermediate cas in one pem + #Define the string to split + CERT_BUNDLE_TEXT=$( cat $CERTIFICATES_BUNDLE) + + #Define multi-character delimiter + DELIMETER="-----END CERTIFICATE-----" + #Concatenate the delimiter with the main string + STRDELIM=$CERT_BUNDLE_TEXT$DELIMETER + + #Split the text based on the delimiter + STR_ARRAY=() + while [[ $STRDELIM ]]; do + STR_ARRAY+=( "${STRDELIM%%"$DELIMETER"*}" ) + STRDELIM=${STRDELIM#*"$DELIMETER"} + done + + INTERMEDIATE_CA=${STR_ARRAY[0]} + + DELIMETER="-----BEGIN CERTIFICATE-----" + + STRDELIM=$INTERMEDIATE_CA$DELIMETER + STR_ARRAY=() + while [[ $STRDELIM ]]; do + STR_ARRAY+=( "${STRDELIM%%"$DELIMETER"*}" ) + STRDELIM=${STRDELIM#*"$DELIMETER"} + done + + PROCESSED=${STR_ARRAY[1]} + + cp $CERTS_DIR/${HADOOP_USERNAME}_root_ca.pem $CERTS_DIR/${HADOOP_USERNAME}_ca_bundle.pem + + echo "-----BEGIN CERTIFICATE-----" >> $CERTS_DIR/${HADOOP_USERNAME}_ca_bundle.pem + echo $PROCESSED | tr " " "\n" >> $CERTS_DIR/${HADOOP_USERNAME}_ca_bundle.pem + echo "-----END CERTIFICATE-----" >> $CERTS_DIR/${HADOOP_USERNAME}_ca_bundle.pem + + chmod 644 $CERTS_DIR/${HADOOP_USERNAME}_ca_bundle.pem + chown glassfish:glassfish $CERTS_DIR/${HADOOP_USERNAME}_ca_bundle.pem + +else + help +fi diff --git a/templates/default/sql/dml/3.0.0.sql.erb b/templates/default/sql/dml/3.0.0.sql.erb index 1baac1efd..439e58a77 100644 --- a/templates/default/sql/dml/3.0.0.sql.erb +++ b/templates/default/sql/dml/3.0.0.sql.erb @@ -4,3 +4,16 @@ REPLACE INTO `hopsworks`.`variables` VALUES ("kube_serving_node_tolerations", "< DELETE FROM `hopsworks`.`variables` WHERE id="ndb_dir"; DELETE FROM `hopsworks`.`variables` WHERE id="mysql_dir"; DELETE FROM `hopsworks`.`variables` WHERE id="mysql_user"; + +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_host", "<%= node['hopsworks']['rstudio_host'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_origin_scheme", "<%= node['hopsworks']['rstudio_origin_scheme'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_www_address", "<%= node['hopsworks']['rstudio_www_address'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_session_timeout_minutes", "<%= node['hopsworks']['rstudio_session_timeout_minutes'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_logging_level", "<%= node['hopsworks']['rstudio_logging_level'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_logger_type", "<%= node['hopsworks']['rstudio_logger_type'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_log_file_max_size", "<%= node['hopsworks']['rstudio_log_file_max_size'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_dir", "<%= node['hopsworks']['rstudio_dir'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_user", "<%= node['hops']['yarnapp']['user'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_group", "<%= node['hops']['group'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_cran_repo", "<%= node['hopsworks']['rstudio_cran_repo'] %>"); +REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("rstudio_shutdown_timer_interval", "<%= node['rstudio']['shutdown_timer_interval'] %>"); diff --git a/templates/default/sql/dml/undo/3.0.0__undo.sql.erb b/templates/default/sql/dml/undo/3.0.0__undo.sql.erb index 828b610c8..363307b95 100644 --- a/templates/default/sql/dml/undo/3.0.0__undo.sql.erb +++ b/templates/default/sql/dml/undo/3.0.0__undo.sql.erb @@ -4,3 +4,16 @@ DELETE FROM `hopsworks`.`variables` WHERE `id`='kube_serving_node_tolerations'; REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("mysql_user", "<%= node['ndb']['user'] %>"); REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("ndb_dir", "<%= node['ndb']['dir'] %>/mysql-cluster"); REPLACE INTO `hopsworks`.`variables`(`id`, `value`) VALUES ("mysql_dir", "<%= node['mysql']['dir'] %>/mysql"); + +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_host'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_origin_scheme'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_www_address'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_session_timeout_minutes'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_logging_level'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_logger_type'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_log_file_max_size'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_dir'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_user'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_group'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_cran_repo'; +DELETE FROM `hopsworks`.`variables` WHERE `id`='rstudio_shutdown_timer_interval';