-
Notifications
You must be signed in to change notification settings - Fork 1
/
Dockerfile
100 lines (73 loc) · 3.61 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
FROM phusion/baseimage:latest
# Use baseimage's init process
CMD ["/sbin/my_init"]
MAINTAINER Firas
ENV DEBIAN_FRONTEND noninteractive
ENV JAVA_HOME /usr/lib/jvm/java-8-oracle
## UTF-8
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8
## Remove any existing JDKs
RUN apt-get --purge remove openjdk*
## Install Oracle's JDK
RUN echo "oracle-java8-installer shared/accepted-oracle-license-v1-1 select true" | debconf-set-selections
RUN echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu xenial main" > /etc/apt/sources.list.d/webupd8team-java-trusty.list
RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys EEA14886
RUN apt-get update && \
apt-get install -y --no-install-recommends oracle-java8-installer bzip2 python2.7 unzip p7zip-full && \
apt-get clean all
# Define versions and paths
ENV HADOOP_VERSION 2.6.4
ENV MAHOUT_VERSION 0.12.2
ENV PIG_VERSION 0.17.0
ENV HIVE_VERSION 1.2.2
ENV HADOOP_HOME /usr/local/hadoop-${HADOOP_VERSION}
ENV MAHOUT_HOME /usr/local/apache-mahout-distribution-${MAHOUT_VERSION}
ENV HADOOP_STREAMING_JAR ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-streaming-${HADOOP_VERSION}.jar
WORKDIR /tmp
# Download and extract Mahout
RUN wget --quiet http://www-us.apache.org/dist/mahout/${MAHOUT_VERSION}/apache-mahout-distribution-${MAHOUT_VERSION}.tar.gz && \
tar -xzf apache-mahout-distribution-${MAHOUT_VERSION}.tar.gz && \
mv apache-mahout-distribution-${MAHOUT_VERSION} /usr/local/apache-mahout-distribution-${MAHOUT_VERSION} && \
ln -sf /usr/local/apache-mahout-distribution-${MAHOUT_VERSION}/bin/mahout /usr/local/bin/mahout
ENV PATH /usr/local/bin/mahout:$PATH
# Download and extract Hadoop
RUN wget --quiet https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz && \
tar -xzf hadoop-${HADOOP_VERSION}.tar.gz && \
mv hadoop-${HADOOP_VERSION} /usr/local/hadoop-${HADOOP_VERSION}
ENV PATH /usr/local/hadoop-${HADOOP_VERSION}/bin:$PATH
ENV PATH /usr/local/hadoop-${HADOOP_VERSION}/sbin:$PATH
# Download and install PIG
RUN wget --quiet http://www-us.apache.org/dist/pig/pig-${PIG_VERSION}/pig-${PIG_VERSION}.tar.gz && \
tar -zxf pig-${PIG_VERSION}.tar.gz && \
mv pig-${PIG_VERSION} /usr/local/pig-${PIG_VERSION}
ENV PATH /usr/local/pig-${PIG_VERSION}/bin:$PATH
# Download and install Hive
RUN wget --quiet http://www-us.apache.org/dist/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz && \
tar -zxf apache-hive-${HIVE_VERSION}-bin.tar.gz && \
mv apache-hive-${HIVE_VERSION}-bin /usr/local/apache-hive-${HIVE_VERSION}-bin
ENV PATH /usr/local/apache-hive-${HIVE_VERSION}-bin/bin:$PATH
# Copy the Hadoop config files from conf directory
COPY conf $HADOOP_HOME/etc/hadoop/
# Define JAVA_HOME for Hadoop
RUN echo "export JAVA_HOME=$JAVA_HOME" >> $HADOOP_HOME/etc/hadoop/hadoop-env.sh
# Formatting HDFS
RUN mkdir -p /data/dfs/data /data/dfs/name /data/dfs/namesecondary && \
hdfs namenode -format
VOLUME /data
RUN rm -f /etc/service/sshd/down
# Enable SSH root login
RUN echo "PermitRootLogin yes" >> /etc/ssh/sshd_config
# Regenerate SSH host keys
RUN /etc/my_init.d/00_regen_ssh_host_keys.sh
# Expose Hadoop ports
# http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/cdh_ig_ports_cdh5.html
EXPOSE 9000 50070 50010 50020 50075 50090 9021
# Setup python 2.7 as default python
RUN ln -s /usr/bin/python2.7 /usr/bin/python
# cleanup temp and cache files
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /var/cache
# Call entrypoint.sh when starting the container
ADD entrypoint.sh /opt/entrypoint.sh
ENTRYPOINT ["/opt/entrypoint.sh"]