hadoop 2.5.2 最小配置

测试YARN-2578时,搭了一个简单的集群。记录下备用。

线上集群的配置太复杂,改起来也很麻烦。
既然是测试用,只要daemon能跑起来就可以了。

3个虚拟机:hadoop1/2/3。改hostname、改hosts的过程不说了。互相网络能通就行。

安装zookeeper的过程不说了。很简单。

直接去官网下载编译好的tar包:hadoop-2.5.2.tar.gz

解压后改一些配置文件:

core-site.xml
1
2
3
4
5
6
7
8
9
10
<property>
<name>fs.defaultFS</name>
<value>hdfs://fk</value>
<final>true</final>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop1:2181,hadoop2:2181,hadoop3:2181</value>
</property>
hdfs-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
<property>
<name>dfs.nameservices</name>
<value>fk</value>
</property>
<property>
<name>dfs.ha.namenodes.fk</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.fk.nn1</name>
<value>hadoop1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.fk.nn2</name>
<value>hadoop2:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.fk.nn1</name>
<value>hadoop1:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.fk.nn2</name>
<value>hadoop2:50070</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.fk</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- JournalNode Configuration -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop1:8485;hadoop2:8485;hadoop3:8485/fk</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/foolbear/hadoop_deploy/jn</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/home/foolbear/hadoop_deploy/name</value>
<final>true</final>
</property>
<property>
<name>dfs.data.dir</name>
<value>/home/foolbear/hadoop_deploy/data</value>
<final>true</final>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>shell(/bin/true)</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
yarn-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop1:2181,hadoop2:2181,hadoop3:2181</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-timeout-ms</name>
<value>90000</value>
</property>
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>3</value>
</property>
<property>
<name>yarn.resourcemanager.state-store.max-completed-applications</name>
<value>20</value>
</property>
<!--ResourceManager HA-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>fkyarn</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop3</value>
</property>
<!--aux services-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<!--local and log dir-->
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/home/foolbear/hadoop_deploy/yarn_local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/home/foolbear/hadoop_deploy/yarn_logs</value>
</property>
<!--机器烂,这个设小一点-->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>1</value>
</property>
mapred-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--这3个属性也是因为机器烂才设置的-->
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>800</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>800</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>800</value>
</property>

理论上还要在hadoop-env.sh里设置下JAVA_HOME。但我有全局的JAVA_HOME了,就没单独设置。

启动过程也不说了,跟正常的一样,记着先formatZK、format namenode。

跑个wordcount试试:

1
./hadoop jar ../share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.2.jar wordcount /user/foolbear/hadoop/hdfs-site.xml /user/foolbear/output4