Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added support for running MLCP using a dependency to MLCP jar instead… #61

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
.DS_Store
.gradle/
.settings/
bin/
data-hub/bin/
quick-start/bin/
build/
releases/
.classpath
.project
gradle-local.properties
/quick-start/environment.properties
/quick-start/assetInstallTime.json
/quick-start/input
/quick-start/plugins
5 changes: 5 additions & 0 deletions data-hub/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ dependencies {
compile 'com.marklogic:ml-app-deployer:2.0'
compile 'commons-io:commons-io:2.4'
compile 'com.google.code.gson:gson:2.6.1'
compile("com.marklogic:mlcp:8.0-4") {
exclude module : 'servlet-api'
exclude group: 'com.google.guava', module: 'guava'
}
compile 'com.google.guava:guava:11.0.2'
testCompile 'junit:junit:4.12'
testCompile 'xmlunit:xmlunit:1.3'
testCompile 'org.hamcrest:hamcrest-junit:2.0.0.0'
Expand Down
40 changes: 40 additions & 0 deletions data-hub/src/main/java/com/marklogic/hub/DataHubContentPump.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package com.marklogic.hub;

import java.io.File;
import java.util.List;

import com.marklogic.contentpump.ContentPump;
import com.marklogic.contentpump.utilities.OptionsFileUtil;

public class DataHubContentPump extends ContentPump {

private String[] arguments;

public DataHubContentPump(List<String> arguments) {
this(arguments.toArray(new String[0]));
}

public DataHubContentPump(String[] arguments) {
this.arguments = arguments;
}

/**
* Run the Content Pump.
*
* @return true if the content pump executed successfully, false otherwise.
*/
public boolean execute() {
String[] expandedArgs = null;
int rc = 1;
try {
expandedArgs = OptionsFileUtil.expandArguments(arguments);
rc = runCommand(expandedArgs);
} catch (Exception ex) {
LOG.error("Error while expanding arguments", ex);
System.err.println(ex.getMessage());
System.err.println("Try 'mlcp help' for usage.");
}

return rc == 0;
}
}
45 changes: 17 additions & 28 deletions data-hub/src/main/java/com/marklogic/hub/Mlcp.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,13 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.marklogic.hub.util.IOUtil;
import com.marklogic.hub.util.IOUtil.LogLevel;

public class Mlcp {
private static final Logger LOGGER = LoggerFactory.getLogger(Mlcp.class);

private final static String DEFAULT_HADOOP_HOME_DIR= "./hadoop/";

private List<MlcpSource> sources = new ArrayList<>();

private String mlcpPath;

private String host;

private String port;
Expand All @@ -41,21 +38,13 @@ public class Mlcp {

private String password;

public Mlcp(String mlcpHome, String host, String port, String user, String password) {
public Mlcp(String host, String port, String user, String password) throws IOException {
this.host = host;
this.port = port;
this.user = user;
this.password = password;

// set the mlcp executable path based on OS
this.mlcpPath = mlcpHome;
String osName = System.getProperty("os.name");
if (osName != null && osName.toLowerCase().startsWith("windows")) {
mlcpPath += "/bin/mlcp.bat";
}
else {
mlcpPath += "/bin/mlcp.sh";
}

setHadoopHomeDir();
}

public void addSourceDirectory(String directoryPath, SourceOptions options) {
Expand All @@ -70,7 +59,7 @@ public void loadContent() {
try {
List<String> arguments = new ArrayList<>();

arguments.add(mlcpPath);
// arguments.add(mlcpPath);
arguments.add("import");
arguments.add("-mode");
arguments.add("local");
Expand All @@ -86,17 +75,9 @@ public void loadContent() {
// add arguments related to the source
List<String> sourceArguments = source.getMlcpArguments();
arguments.addAll(sourceArguments);

ProcessBuilder pb = new ProcessBuilder(arguments.toArray(new String[0]));
Process process = pb.start();

inputThread = IOUtil.createInputStreamSink(process.getInputStream(), LOGGER, LogLevel.DEBUG);
errorThread = IOUtil.createInputStreamSink(process.getErrorStream(), LOGGER, LogLevel.ERROR);

inputThread.start();
errorThread.start();

process.waitFor();

DataHubContentPump contentPump = new DataHubContentPump(arguments);
contentPump.execute();
}
catch (Exception e) {
LOGGER.error("Failed to load {}", source.getSourcePath(), e);
Expand All @@ -111,6 +92,14 @@ public void loadContent() {
}
}
}

protected void setHadoopHomeDir() throws IOException {
String home = System.getProperty("hadoop.home.dir");
if (home == null) {
home = DEFAULT_HADOOP_HOME_DIR;
}
System.setProperty("hadoop.home.dir", new File(home).getCanonicalPath());
}

private static class MlcpSource {
private String sourcePath;
Expand Down
147 changes: 147 additions & 0 deletions quick-start/hadoop/bin/hadoop
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script runs the hadoop core commands.

bin=`which $0`
bin=`dirname ${bin}`
bin=`cd "$bin"; pwd`

DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh

function print_usage(){
echo "Usage: hadoop [--config confdir] COMMAND"
echo " where COMMAND is one of:"
echo " fs run a generic filesystem user client"
echo " version print the version"
echo " jar <jar> run a jar file"
echo " checknative [-a|-h] check native hadoop and compression libraries availability"
echo " distcp <srcurl> <desturl> copy file or directories recursively"
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
echo " classpath prints the class path needed to get the"
echo " credential interact with credential providers"
echo " Hadoop jar and the required libraries"
echo " daemonlog get/set the log level for each daemon"
echo " trace view and modify Hadoop tracing settings"
echo " or"
echo " CLASSNAME run the class named CLASSNAME"
echo ""
echo "Most commands print help when invoked w/o parameters."
}

if [ $# = 0 ]; then
print_usage
exit
fi

COMMAND=$1
case $COMMAND in
# usage flags
--help|-help|-h)
print_usage
exit
;;

#hdfs commands
namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3)
echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2
echo "Instead use the hdfs command for it." 1>&2
echo "" 1>&2
#try to locate hdfs and if present, delegate to it.
shift
if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
else
echo "HADOOP_HDFS_HOME not found!"
exit 1
fi
;;

#mapred commands for backwards compatibility
pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2
echo "Instead use the mapred command for it." 1>&2
echo "" 1>&2
#try to locate mapred and if present, delegate to it.
shift
if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
else
echo "HADOOP_MAPRED_HOME not found!"
exit 1
fi
;;

#core commands
*)
# the core commands
if [ "$COMMAND" = "fs" ] ; then
CLASS=org.apache.hadoop.fs.FsShell
elif [ "$COMMAND" = "version" ] ; then
CLASS=org.apache.hadoop.util.VersionInfo
elif [ "$COMMAND" = "jar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
elif [ "$COMMAND" = "key" ] ; then
CLASS=org.apache.hadoop.crypto.key.KeyShell
elif [ "$COMMAND" = "checknative" ] ; then
CLASS=org.apache.hadoop.util.NativeLibraryChecker
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
elif [ "$COMMAND" = "archive" ] ; then
CLASS=org.apache.hadoop.tools.HadoopArchives
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
elif [ "$COMMAND" = "credential" ] ; then
CLASS=org.apache.hadoop.security.alias.CredentialShell
elif [ "$COMMAND" = "trace" ] ; then
CLASS=org.apache.hadoop.tracing.TraceAdmin
elif [ "$COMMAND" = "classpath" ] ; then
if [ "$#" -eq 1 ]; then
# No need to bother starting up a JVM for this simple case.
echo $CLASSPATH
exit
else
CLASS=org.apache.hadoop.util.Classpath
fi
elif [[ "$COMMAND" = -* ]] ; then
# class and package names cannot begin with a -
echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
exit 1
else
CLASS=$COMMAND
fi
shift

# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"

#make sure security appender is turned off
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"

export CLASSPATH=$CLASSPATH
exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
;;

esac
Loading