Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nixos/hadoop: package rewrite and module improvements #141143

Merged
merged 3 commits into from
Oct 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 23 additions & 11 deletions nixos/modules/services/cluster/hadoop/conf.nix
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{ hadoop, pkgs }:
{ cfg, pkgs, lib }:
let
propertyXml = name: value: ''
<property>
Expand All @@ -13,19 +13,31 @@ let
${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)}
</configuration>
'';
cfgLine = name: value: ''
${name}=${builtins.toString value}
'';
cfgFile = fileName: properties: pkgs.writeTextDir fileName ''
# generated by NixOS
${builtins.concatStringsSep "" (pkgs.lib.mapAttrsToList cfgLine properties)}
'';
userFunctions = ''
hadoop_verify_logdir() {
echo Skipping verification of log directory
}
'';
hadoopEnv = ''
export HADOOP_LOG_DIR=/tmp/hadoop/$USER
'';
in
pkgs.buildEnv {
name = "hadoop-conf";
paths = [
(siteXml "core-site.xml" hadoop.coreSite)
(siteXml "hdfs-site.xml" hadoop.hdfsSite)
(siteXml "mapred-site.xml" hadoop.mapredSite)
(siteXml "yarn-site.xml" hadoop.yarnSite)
(pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions)
];
}
pkgs.runCommand "hadoop-conf" {} ''
mkdir -p $out/
cp ${siteXml "core-site.xml" cfg.coreSite}/* $out/
cp ${siteXml "hdfs-site.xml" cfg.hdfsSite}/* $out/
cp ${siteXml "mapred-site.xml" cfg.mapredSite}/* $out/
cp ${siteXml "yarn-site.xml" cfg.yarnSite}/* $out/
cp ${cfgFile "container-executor.cfg" cfg.containerExecutorCfg}/* $out/
cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/
cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/
cp ${cfg.log4jProperties} $out/log4j.properties
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") cfg.extraConfDirs}
''
80 changes: 72 additions & 8 deletions nixos/modules/services/cluster/hadoop/default.nix
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{ config, lib, pkgs, ...}:

let
cfg = config.services.hadoop;
in
with lib;
{
imports = [ ./yarn.nix ./hdfs.nix ];
Expand All @@ -17,7 +19,9 @@ with lib;
};

hdfsSite = mkOption {
default = {};
default = {
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
};
type = types.attrsOf types.anything;
example = literalExpression ''
{
Expand All @@ -28,27 +32,81 @@ with lib;
};

mapredSite = mkOption {
default = {};
default = {
"mapreduce.framework.name" = "yarn";
"yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
"mapreduce.map.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
"mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
};
type = types.attrsOf types.anything;
example = literalExpression ''
{
"mapreduce.map.cpu.vcores" = "1";
options.services.hadoop.mapredSite.default // {
"mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC";
}
'';
description = "Hadoop mapred-site.xml definition";
};

yarnSite = mkOption {
default = {};
default = {
"yarn.nodemanager.admin-env" = "PATH=$PATH";
"yarn.nodemanager.aux-services" = "mapreduce_shuffle";
"yarn.nodemanager.aux-services.mapreduce_shuffle.class" = "org.apache.hadoop.mapred.ShuffleHandler";
"yarn.nodemanager.bind-host" = "0.0.0.0";
"yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor";
"yarn.nodemanager.env-whitelist" = "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,LANG,TZ";
"yarn.nodemanager.linux-container-executor.group" = "hadoop";
"yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor";
"yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
"yarn.resourcemanager.bind-host" = "0.0.0.0";
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
};
type = types.attrsOf types.anything;
example = literalExpression ''
{
"yarn.resourcemanager.ha.id" = "resourcemanager1";
options.services.hadoop.yarnSite.default // {
"yarn.resourcemanager.hostname" = "''${config.networking.hostName}";
}
'';
description = "Hadoop yarn-site.xml definition";
};

log4jProperties = mkOption {
default = "${cfg.package}/lib/${cfg.package.untarDir}/etc/hadoop/log4j.properties";
type = types.path;
example = literalExpression ''
"''${pkgs.hadoop}/lib/''${pkgs.hadoop.untarDir}/etc/hadoop/log4j.properties";
'';
description = "log4j.properties file added to HADOOP_CONF_DIR";
};

containerExecutorCfg = mkOption {
default = {
# must be the same as yarn.nodemanager.linux-container-executor.group in yarnSite
"yarn.nodemanager.linux-container-executor.group"="hadoop";
"min.user.id"=1000;
"feature.terminal.enabled"=1;
};
type = types.attrsOf types.anything;
example = literalExpression ''
options.services.hadoop.containerExecutorCfg.default // {
"feature.terminal.enabled" = 0;
}
'';
description = "Yarn container-executor.cfg definition";
};

extraConfDirs = mkOption {
default = [];
type = types.listOf types.path;
example = literalExpression ''
[
./extraHDFSConfs
./extraYARNConfs
]
'';
description = "Directories containing additional config files to be added to HADOOP_CONF_DIR";
};

package = mkOption {
type = types.package;
default = pkgs.hadoop;
Expand All @@ -64,6 +122,12 @@ with lib;
users.groups.hadoop = {
gid = config.ids.gids.hadoop;
};
environment = {
systemPackages = [ cfg.package ];
etc."hadoop-conf".source = let
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
in "${hadoopConf}";
};
})

];
Expand Down
81 changes: 59 additions & 22 deletions nixos/modules/services/cluster/hadoop/hdfs.nix
Original file line number Diff line number Diff line change
@@ -1,24 +1,54 @@
{ config, lib, pkgs, ...}:
with lib;
let
cfg = config.services.hadoop;
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
restartIfChanged = mkOption {
type = types.bool;
description = ''
Automatically restart the service on config change.
This can be set to false to defer restarts on clusters running critical applications.
Please consider the security implications of inadvertently running an older version,
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
'';
default = false;
};
in
with lib;
{
options.services.hadoop.hdfs = {
namenode.enabled = mkOption {
type = types.bool;
default = false;
description = ''
Whether to run the Hadoop YARN NameNode
'';
namenode = {
enabled = mkOption {
type = types.bool;
default = false;
description = ''
Whether to run the HDFS NameNode
'';
};
inherit restartIfChanged;
openFirewall = mkOption {
type = types.bool;
default = true;
description = ''
Open firewall ports for namenode
'';
};
};
datanode.enabled = mkOption {
type = types.bool;
default = false;
description = ''
Whether to run the Hadoop YARN DataNode
'';
datanode = {
enabled = mkOption {
type = types.bool;
default = false;
description = ''
Whether to run the HDFS DataNode
'';
};
inherit restartIfChanged;
openFirewall = mkOption {
type = types.bool;
default = true;
description = ''
Open firewall ports for datanode
'';
};
};
};

Expand All @@ -27,10 +57,7 @@ with lib;
systemd.services.hdfs-namenode = {
description = "Hadoop HDFS NameNode";
wantedBy = [ "multi-user.target" ];

environment = {
HADOOP_HOME = "${cfg.package}";
};
inherit (cfg.hdfs.namenode) restartIfChanged;

preStart = ''
${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true
Expand All @@ -40,24 +67,34 @@ with lib;
User = "hdfs";
SyslogIdentifier = "hdfs-namenode";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode";
Restart = "always";
};
};

networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.namenode.openFirewall [
9870 # namenode.http-address
8020 # namenode.rpc-address
]);
})
(mkIf cfg.hdfs.datanode.enabled {
systemd.services.hdfs-datanode = {
description = "Hadoop HDFS DataNode";
wantedBy = [ "multi-user.target" ];

environment = {
HADOOP_HOME = "${cfg.package}";
};
inherit (cfg.hdfs.datanode) restartIfChanged;

serviceConfig = {
User = "hdfs";
SyslogIdentifier = "hdfs-datanode";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode";
Restart = "always";
};
};

networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.datanode.openFirewall [
9864 # datanode.http.address
9866 # datanode.address
9867 # datanode.ipc.address
]);
})
(mkIf (
cfg.hdfs.namenode.enabled || cfg.hdfs.datanode.enabled
Expand Down
Loading