docker搭建Hadoop集群
Docker搭建Hadoop集群
首先搭建Docker环境,Docker版本大于1.3.2
安装主机监控程序和加速器(curl -sSL https://get.daocloud.io/daomonit/install.sh | sh -s 7a029f60d36056fe1b85fabca6a133887245abe6)
docker pull daocloud.io/library/centos:centos6.7
docker run -it -h master --name master insaneworks/centos /bin/bash
yum install -y gcc vim openssh-server openssh-clients java-1.7.0-openjdk-devel.x86_64 tar wget
这里java使用了openjdk1.7.0的版本,因为安装方便
vim /etc/ssh/sshd_config
放开PermitEmptyPasswords no更改UsePAM no放开PermitRootLogin yes
/etc/rc.d/init.d/sshd start
ssh-keygen -t rsa -P ''
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
配置/etc/profile
export JAVA_HOME=/usr/lib/jvm/java-1.7.0-openjdk.x86_64/export CLASSPATH=.:$JAVA_HOME/lib/dt:$JAVA_HOME/lib/tools.jarexport HADOOP_HOME=/root/hadoop-2.7.1export PATH=$JAVA_HOME/bin:$PATH:$HADOOP_HOME/bin
11. 配置core-site.xml
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://master:9000</value> </property> <property> <name>fs.defaultFS</name> <value>hdfs://master:9000</value> </property> <property> <name>io.file.buffer.size</name> <value>131702</value> </property> <property> <name>hadoop.tmp.dir</name> <value>file:/root/hadoop-2.7.1/tmp</value> </property></configuration>
12. 配置hdfs-site.xml
<configuration> <property> <name>dfs.namenode.name.dir</name> <value>file:/root/hadoop-2.7.1/dfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:/root/hadoop-2.7.1/dfs/data</value> </property> <property> <name>dfs.replication</name> <value>2</value> </property> <property> <name>dfs.namenode.secondary.http-address</name> <value>master:9001</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property></configuration>
13. 配置mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>master:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>master:19888</value> </property></configuration>
14. 配置yarn.site.xml
<configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.resourcemanager.address</name> <value>master:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address</name> <value>master:8030</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>master:8031</value> </property> <property> <name>yarn.resourcemanager.admin.address</name> <value>master:8033</value> </property> <property> <name>yarn.resourcemanager.webapp.address</name> <value>master:8088</value> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>1024</value> </property></configuration>
15. 配置etc/hadoop/slaves文件
追加slave1slave2slave3
16. 配置hadoop-env.sh和yarn-env.sh
在文件第一行中添加:export JAVA_HOME=/usr/lib/jvm/java-1.7.0-openjdk.x86_64/
17. 测试链接文件
ldd /root/hadoop-2.7.1/lib/native/libhadoop.so.1.0.0/root/hadoop-2.7.1/lib/native/libhadoop.so.1.0.0: /lib64/libc.so.6: version `GLIBC_2.14' not found (required by /root/hadoop-2.7.1/lib/native/libhadoop.so.1.0.0)linux-vdso.so.1 => (0x00007fff24dbc000)libdl.so.2 => /lib64/libdl.so.2 (0x00007ff8c6371000)libc.so.6 => /lib64/libc.so.6 (0x00007ff8c5fdc000)/lib64/ld-linux-x86-64.so.2 (0x00007ff8c679b000)
18. 安装glibc-2.14
tar zxvf glibc-2.14.tar.gzcd glibc-2.14mkdir buildcd build../configure --prefix=/usr/local/glibc-2.14makemake installln -sf /usr/local/glibc-2.14/lib/libc-2.14.so /lib64/libc.so.6
19. 测试链接文件
ldd /root/hadoop-2.7.1/lib/native/libhadoop.so.1.0.0linux-vdso.so.1 => (0x00007fff72b7c000)libdl.so.2 => /lib64/libdl.so.2 (0x00007fb996ce9000)libc.so.6 => /lib64/libc.so.6 (0x00007fb99695c000)/lib64/ld-linux-x86-64.so.2 (0x00007fb997113000
20. 提交改镜像即可
docker commit master ice/hadoop
21. 查看镜像列表
docker imagesREPOSITORY TAG IMAGE IDCREATED VIRTUAL SIZEice/hadoop latest 385a97765871 7 hours ago 1.731 GBdaocloud.io/library/centos centos6.7 3fba1048142f 11 days ago 190.6 MBdaocloud.io/daocloud/daocloud-toolset latest aa5dc2eecd4a 6 weeks ago 145.8 MBdaocloud.io/daocloud/daomonit latest ae375c157c27 7 weeks ago 149 MB
22. 干掉该配置镜像
docker rm master
23. 启动集群脚本
docker run --rm -it -p 50070:50070 -p 19888:19888 -p 8088:8088 -p 50030:50030 -h master --name master ice/hadoop /bin/bashdocker run --rm -it -h slave1 --name slave1 ice/hadoop /bin/bashdocker run --rm -it -h slave2 --name slave2 ice/hadoop /bin/bashdocker run --rm -it -h slave3 --name slave3 ice/hadoop /bin/bash
24. 配置环境变量和sshd服务
source /etc/profile/etc/rc.d/init.d/sshd start
25. 查看各个节点的IP
docker inspect --format='{{.NetworkSettings.IPAddress}}' masterdocker inspect --format='{{.NetworkSettings.IPAddress}}' slave1docker inspect --format='{{.NetworkSettings.IPAddress}}' slave2docker inspect --format='{{.NetworkSettings.IPAddress}}' slave3
26. 启动Hadoop集群
hadoop namenode -format/root/hadoop-2.7.1/sbin/start-dfs.sh/root/hadoop-2.7.1/sbin/start-yarn.sh注意??第一执行脚本时,需要确认一次
27. 关闭脚本
/root/hadoop-2.7.1/sbin/stop-dfs.sh/root/hadoop-2.7.1/sbin/stop-yarn.sh
28. 测试命令
hadoop fs -mkdir /inputhadoop fs -ls /hadoop fs -put /root/hadoop-2.7.1/etc/hadoop/* /input/hadoop jar /root/hadoop-2.7.1/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount /input/ /output/wordcount/