Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Hopsworks-2702] Hopsworks RStudio Integration #652

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 22 additions & 23 deletions Berksfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,28 @@ source 'https://supermarket.chef.io'

metadata

cookbook 'conda', github: "logicalclocks/conda-chef", branch: "master"
cookbook 'kagent', github: "logicalclocks/kagent-chef", branch: "master"
cookbook 'hops', github: "logicalclocks/hops-hadoop-chef", branch: "master"
cookbook 'ndb', github: "logicalclocks/ndb-chef", branch: "master"
cookbook 'hadoop_spark', github: "logicalclocks/spark-chef", branch: "master"
cookbook 'flink', github: "logicalclocks/flink-chef", branch: "master"
cookbook 'livy', github: "logicalclocks/livy-chef", branch: "master"
cookbook 'epipe', github: "logicalclocks/epipe-chef", branch: "master"
cookbook 'tensorflow', github: "logicalclocks/tensorflow-chef", branch: "master"
cookbook 'kzookeeper', github: "logicalclocks/kzookeeper", branch: "master"
cookbook 'kkafka', github: "logicalclocks/kafka-cookbook", branch: "master"
cookbook 'elastic', github: "logicalclocks/elasticsearch-chef", branch: "master"
cookbook 'hopslog', github: "logicalclocks/hopslog-chef", branch: "master"
cookbook 'hopsmonitor', github: "logicalclocks/hopsmonitor-chef", branch: "master"
cookbook 'hops_airflow', github: "logicalclocks/airflow-chef", branch: "master"
cookbook 'hive2', github: "logicalclocks/hive-chef", branch: "master"
cookbook 'consul', github: "logicalclocks/consul-chef", branch: "master"
cookbook 'kube-hops', github: "logicalclocks/kube-hops-chef", branch: "master"
cookbook 'cloud', github: "logicalclocks/cloud-chef", branch: "master"
cookbook 'onlinefs', github: "logicalclocks/onlinefs-chef", branch: "master"
cookbook 'flyingduck', github: "logicalclocks/flyingduck-chef", branch: "master"
cookbook 'conda', github: "logicalclocks/conda-chef", branch: "3.4"
cookbook 'kagent', github: "logicalclocks/kagent-chef", branch: "3.4"
cookbook 'hops', github: "logicalclocks/hops-hadoop-chef", branch: "3.4"
cookbook 'ndb', github: "logicalclocks/ndb-chef", branch: "3.4"
cookbook 'hadoop_spark', github: "logicalclocks/spark-chef", branch: "3.4"
cookbook 'flink', github: "logicalclocks/flink-chef", branch: "3.4"
cookbook 'livy', github: "logicalclocks/livy-chef", branch: "3.4"
cookbook 'epipe', github: "logicalclocks/epipe-chef", branch: "3.4"
cookbook 'tensorflow', github: "logicalclocks/tensorflow-chef", branch: "3.4"
cookbook 'kzookeeper', github: "logicalclocks/kzookeeper", branch: "3.4"
cookbook 'kkafka', github: "logicalclocks/kafka-cookbook", branch: "3.4"
cookbook 'elastic', github: "logicalclocks/elasticsearch-chef", branch: "3.4"
cookbook 'hopslog', github: "logicalclocks/hopslog-chef", branch: "3.4"
cookbook 'hopsmonitor', github: "logicalclocks/hopsmonitor-chef", branch: "3.4"
cookbook 'hops_airflow', github: "logicalclocks/airflow-chef", branch: "3.4"
cookbook 'hive2', github: "logicalclocks/hive-chef", branch: "3.4"
cookbook 'consul', github: "logicalclocks/consul-chef", branch: "3.4"
cookbook 'kube-hops', github: "logicalclocks/kube-hops-chef", branch: "3.4"
cookbook 'cloud', github: "logicalclocks/cloud-chef", branch: "3.4"
cookbook 'onlinefs', github: "logicalclocks/onlinefs-chef", branch: "3.4"
cookbook 'flyingduck', github: "logicalclocks/flyingduck-chef", branch: "3.4"

cookbook 'ulimit', github: "logicalclocks/chef-ulimit", branch: "master"
cookbook 'glassfish', github: "logicalclocks/chef-glassfish", branch: "master"
cookbook 'glassfish', github: "logicalclocks/chef-glassfish", branch: "3.4"
cookbook 'java', github: "logicalclocks/java", branch: "v7.0.0-1"
cookbook 'packagecloud', '= 1.0.1'
5 changes: 4 additions & 1 deletion Karamelfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,7 @@ dependencies:
- recipe: hopsworks::loadbalancer
global:
- hopsworks::config_node

- recipe: hopsworks::reindex
global:
- hopsworks::default
- elastic::default
32 changes: 31 additions & 1 deletion attributes/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -521,4 +521,34 @@
default['judge']['port'] = "1111"
default['judge']['home'] = "#{node['install']['dir']}/judge"
default['judge']['etc'] = "#{node['judge']['home']}/etc"
default['judge']['logs'] = "#{node['judge']['home']}/logs"
default['judge']['logs'] = "#{node['judge']['home']}/logs"


default['hopsworks']['rstudio_dir'] = node['hopsworks']['dir'] + "/rstudio"
#rstudio configuration variables
default["hopsworks"]['rstudio_host'] = "localhost"
default["hopsworks"]['rstudio_origin_scheme'] = "https"
default["hopsworks"]["rstudio_www_address"] = "0.0.0.0"
default["hopsworks"]["rstudio_session_timeout_minutes"] = 360
default["hopsworks"]["rstudio_logging_level"] = "info"
default["hopsworks"]["rstudio_logger_type"] = "file"
default["hopsworks"]["rstudio_log_file_max_size"] = 512
default["hopsworks"]["rstudio_default_cran_repo"] = "https://cloud.r-project.org/"

default['rstudio']['base_dir'] = node['install']['dir'].empty? ? node['hopsworks']['dir'] + "/rstudio" : node['install']['dir'] + "/rstudio"
default['rstudio']['shutdown_timer_interval'] = "30m"

# CRAN
default['rstudio']['cran']['mirror'] = 'http://cran.rstudio.com/'

# APT configuration for Ubuntu or Debian installs.
case node["platform"].downcase
when "ubuntu"
default['rstudio']['apt']['key'] = 'E084DAB9'
default['rstudio']['apt']['keyserver'] = 'keyserver.ubuntu.com'
default['rstudio']['apt']['uri'] = 'http://cran.stat.ucla.edu/bin/linux/ubuntu'
when "debian"
default['rstudio']['apt']['key'] = '381BA480'
default['rstudio']['apt']['keyserver'] = 'subkeys.pgp.net'
default['rstudio']['apt']['uri'] = 'http://cran.stat.ucla.edu/bin/linux/debian'
end
1 change: 1 addition & 0 deletions files/default/hopsworks_templates/config_template.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"executorCores": ${conf.executorCores?c},
"executorMemory": "${conf.executorMemory}",
"proxyUser": "${conf.hdfsUser}",
"name": "${conf.livySessionName}",
"queue": "${conf.yarnQueue}",
"conf": {
${conf.sparkConfiguration}
Expand Down
6 changes: 6 additions & 0 deletions files/default/hopsworks_templates/rstudio_dockerfile_template
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM ${conf.baseImage}

COPY ${conf.buildScript} ./
RUN chmod +x ${conf.buildScript}
RUN ./${conf.buildScript}
RUN rm ${conf.buildScript}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[*]
log-level=warn
logger-type=syslog

[@rserver]
log-level=${conf.logLevel}
logger-type=${conf.loggerType}
max-size-mb=${conf.maxSizeMb}
log-file-include-pid=${conf.includePid}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
rsession-which-r=${conf.versionPath}
www-address=${conf.ipAddress}
www-port=${conf.port}
www-root-path=${conf.rootPath}
server-user=${conf.serverUser}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
session-timeout-minutes=${conf.sessionTimeoutMinutes}
r-cran-repos=${conf.cranRepo}
10 changes: 10 additions & 0 deletions files/default/hopsworks_templates/sparklyr_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
default:
livy.driverCores: ${conf.driverCores}
livy.driverMemory: "${conf.driverMemory}"
livy.numExecutors: ${conf.numExecutors}
livy.executorCores: ${conf.executorCores}
livy.executorMemory: "${conf.executorMemory}"
livy.proxyUser: "${conf.proxyUser}"
livy.queue: "${conf.yarnQueue}"
livy.name: "${conf.livyAppName}"
${conf.sparkConfiguration}
63 changes: 36 additions & 27 deletions files/default/sql/ddl/3.4.0__initial_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,7 @@ CREATE TABLE `project` (
`topic_name` VARCHAR(255) DEFAULT NULL,
`python_env_id` int(11) DEFAULT NULL,
`creation_status` tinyint(1) NOT NULL DEFAULT '0',
`rstudio_docker_image` varchar(255) COLLATE latin1_general_cs DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `projectname` (`projectname`),
KEY `user_idx` (`username`),
Expand Down Expand Up @@ -1084,17 +1085,21 @@ CREATE TABLE `rstudio_interpreter` (
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `rstudio_project` (
`port` int(11) NOT NULL,
`port` int NOT NULL,
`hdfs_user_id` int NOT NULL,
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`expires` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`last_accessed` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`host_ip` varchar(255) COLLATE latin1_general_cs NOT NULL,
`token` varchar(255) COLLATE latin1_general_cs NOT NULL,
`secret` varchar(64) COLLATE latin1_general_cs NOT NULL,
`pid` bigint(20) NOT NULL,
`project_id` int(11) NOT NULL,
`secret` varchar(64) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`pid` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`project_id` int NOT NULL,
`login_password` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
`login_username` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
PRIMARY KEY (`port`),
KEY `hdfs_user_idx` (`hdfs_user_id`),
KEY `project_id` (`project_id`),
CONSTRAINT `FK_284_578` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
CONSTRAINT `FK_103_577` FOREIGN KEY (`hdfs_user_id`) REFERENCES `hops`.`hdfs_users` (`id`) ON DELETE CASCADE,
CONSTRAINT `FK_284_578` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

Expand All @@ -1107,39 +1112,43 @@ CREATE TABLE `rstudio_project` (
CREATE TABLE `rstudio_settings` (
`project_id` int(11) NOT NULL,
`team_member` varchar(150) COLLATE latin1_general_cs NOT NULL,
`num_tf_ps` int(11) DEFAULT '1',
`num_tf_gpus` int(11) DEFAULT '0',
`num_mpi_np` int(11) DEFAULT '1',
`appmaster_cores` int(11) DEFAULT '1',
`appmaster_memory` int(11) DEFAULT '1024',
`num_executors` int(11) DEFAULT '1',
`num_executor_cores` int(11) DEFAULT '1',
`executor_memory` int(11) DEFAULT '1024',
`dynamic_initial_executors` int(11) DEFAULT '1',
`dynamic_min_executors` int(11) DEFAULT '1',
`dynamic_max_executors` int(11) DEFAULT '1',
`secret` varchar(255) COLLATE latin1_general_cs NOT NULL,
`log_level` varchar(32) COLLATE latin1_general_cs DEFAULT 'INFO',
`mode` varchar(32) COLLATE latin1_general_cs NOT NULL,
`umask` varchar(32) COLLATE latin1_general_cs DEFAULT '022',
`advanced` tinyint(1) DEFAULT '0',
`archives` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`jars` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`files` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`py_files` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`spark_params` varchar(6500) COLLATE latin1_general_cs DEFAULT '',
`shutdown_level` int(11) NOT NULL DEFAULT '6',
`base_dir` varchar(255) COLLATE latin1_general_cs DEFAULT NULL,
`job_config` varchar(11000) COLLATE latin1_general_cs DEFAULT NULL,
`docker_config` varchar(1000) COLLATE latin1_general_cs DEFAULT NULL,
PRIMARY KEY (`project_id`,`team_member`),
KEY `team_member` (`team_member`),
KEY `secret_idx` (`secret`),
CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO
ACTION,
CONSTRAINT `RS_FK_PROJS` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

/*!40101 SET character_set_client = utf8 */;
/*!40101 SET character_set_client = @saved_cs_client */;

CREATE TABLE `rstudio_environment_build` (
`id` int NOT NULL AUTO_INCREMENT,
`build_script` varchar(1000) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`user` int NOT NULL,
`project` int NOT NULL,
`build_start` bigint DEFAULT NULL,
`build_finish` bigint DEFAULT NULL,
`build_result` varchar(128) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`secret` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`logFile` varchar(1000) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
`build_name` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
`description` varchar(1000) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `user_fk` (`user`),
KEY `rstudio_env_build_project_fk` (`project`),
CONSTRAINT `rstudio_env_build_project_fk` FOREIGN KEY (`project`) REFERENCES `project` (`id`) ON DELETE CASCADE,
CONSTRAINT `rstudio_env_build_usr_fkc` FOREIGN KEY (`user`) REFERENCES `users` (`uid`) ON DELETE CASCADE
) ENGINE=ndbcluster AUTO_INCREMENT=5154 DEFAULT CHARSET=latin1;

--
-- Table structure for table `serving`
--
Expand Down
52 changes: 50 additions & 2 deletions files/default/sql/ddl/updates/3.4.0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,57 @@ WHERE REGEXP_SUBSTR(`subject`, "^([0-9]+)_([0-9]+)_(.+)_([0-9]+)(_onlinefs|$)");
UPDATE `hopsworks`.`feature_group` `fg`
JOIN `hopsworks`.`feature_store` `fs` ON `fg`.`feature_store_id` = `fs`.`id`
SET `fg`.`topic_name` = CONCAT(fs.project_id, "_", fg.id, "_", fg.name, "_", fg.version, IF(fg.online_enabled , "_onlinefs", ""));
SET SQL_SAFE_UPDATES = 1;

-- FSTORE-1010: Don't leave orphaned subjects when deleting online enabled fg
UPDATE `hopsworks`.`project_topics`
SET `subject_id` = NULL
WHERE REGEXP_SUBSTR(`topic_name`, "^([0-9]+)_([0-9]+)_(.+)_([0-9]+)(_onlinefs|$)");
WHERE REGEXP_SUBSTR(`topic_name`, "^([0-9]+)_([0-9]+)_(.+)_([0-9]+)(_onlinefs|$)");

SET SQL_SAFE_UPDATES = 1;


-- Rstudio updates
ALTER TABLE `hopsworks`.`rstudio_project` DROP COLUMN `host_ip`,
DROP COLUMN `token`;

ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `login_password` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL;
ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `login_username` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL;

ALTER TABLE `hopsworks`.`rstudio_project` MODIFY COLUMN `pid` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL;

ALTER TABLE `hopsworks`.`rstudio_settings`
DROP COLUMN num_tf_ps, DROP COLUMN num_tf_gpus,
DROP COLUMN num_mpi_np, DROP COLUMN appmaster_cores,
DROP COLUMN appmaster_memory, DROP COLUMN num_executors,
DROP COLUMN num_executor_cores, DROP COLUMN executor_memory,
DROP COLUMN dynamic_initial_executors, DROP COLUMN dynamic_min_executors,
DROP COLUMN dynamic_max_executors, DROP COLUMN log_level,
DROP COLUMN mode, DROP COLUMN umask,
DROP COLUMN archives, DROP COLUMN jars,
DROP COLUMN files, DROP COLUMN py_files,
DROP COLUMN spark_params;

ALTER TABLE `hopsworks`.`rstudio_settings` ADD COLUMN `base_dir` varchar(255) COLLATE latin1_general_cs DEFAULT NULL;
ALTER TABLE `hopsworks`.`rstudio_settings` ADD COLUMN `job_config` varchar(11000) COLLATE latin1_general_cs DEFAULT NULL;
ALTER TABLE `hopsworks`.`rstudio_settings` ADD COLUMN `docker_config` varchar(1000) COLLATE latin1_general_cs DEFAULT NULL;

ALTER TABLE `hopsworks`.`project` ADD COLUMN `rstudio_docker_image` VARCHAR(255) COLLATE latin1_general_cs DEFAULT NULL;

CREATE TABLE `rstudio_environment_build` (
`id` int NOT NULL AUTO_INCREMENT,
`build_script` varchar(1000) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`user` int NOT NULL,
`project` int NOT NULL,
`build_start` bigint DEFAULT NULL,
`build_finish` bigint DEFAULT NULL,
`build_result` varchar(128) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`secret` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`logFile` varchar(1000) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
`build_name` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
`description` varchar(1000) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `user_fk` (`user`),
KEY `rstudio_env_build_project_fk` (`project`),
CONSTRAINT `rstudio_env_build_project_fk` FOREIGN KEY (`project`) REFERENCES `project` (`id`) ON DELETE CASCADE,
CONSTRAINT `rstudio_env_build_usr_fkc` FOREIGN KEY (`user`) REFERENCES `users` (`uid`) ON DELETE CASCADE
) ENGINE=ndbcluster AUTO_INCREMENT=5154 DEFAULT CHARSET=latin1;
1 change: 1 addition & 0 deletions metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
recipe "hopsworks::migrate", "Call expat to migrate between Hopsworks versions"

recipe "hopsworks::purge", "Deletes glassfish installation."
recipe "hopsworks::reindex", "Reindex the featurestore search index"
#######################################################################################
# Required Attributes
#######################################################################################
Expand Down
6 changes: 5 additions & 1 deletion providers/worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,12 @@
start_domain_timeout: systemd_start_timeout,
stop_domain_timeout: systemd_stop_timeout,
authbind: new_resource.requires_authbind)
notifies :start, "service[#{service_name}]", :delayed
end

kagent_config service_name do
action :systemd_reload
end

if node['services']['enabled'].casecmp?("true")
service service_name do
supports start: true, restart: true, stop: true, status: true
Expand Down
Loading