在eclipse远程运行map/reduce例子-白红宇

在eclipse远程运行map/reduce例子

阅读量：7012 次

发布时间：2019-06-28

本文共 6857 字，大约阅读时间需要 22 分钟。

先配置windows 的host文件，加入hadoop集群的ip和名字

192.168.7.11 intel-hadoop-11

192.168.7.12 intel-hadoop-12

192.168.7.13 intel-hadoop-13

2. 需要把生成的jar包放在eclipse的项目根目录

3. 代码示例，三个class文件，

注意点，因为是远程调用hadoop，所以需要Configuration _conf = new Configuration();

在_conf配置hadoop的连接信息。信息在mapred-site.xml、core-site.xml文件中查看。

package com.test;

//注意事项

/* *

* 设置系统host 包含hadoop 主机ip

* */

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.compress.CompressionCodec;

import org.apache.hadoop.io.compress.GzipCodec;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

import com.test.WordCountMap;

import com.test.WordCountReduce;

public class WordCountDriver {

//Drive

public static void main(String[] args) throws Exception

{

Configuration _conf = new Configuration();//本次设置只是对本程序有效

_conf.set("mapred.job.tracker", "192.168.7.11:54311"); //设置hadoop job tracker的连接

_conf.set("fs.default.name", "hdfs://192.168.7.11:8020"); //设置hdfs的连接

_conf.set("hadoop.job.ugi", "hadoop"); //

_conf.set("Hadoop.tmp.dir", "/user/"); //

_conf.set("dfs.permissions","false");//设置权限关闭，这句话是不必要的。

_conf.set("mapred.jar", "com.test.jar");//设置map reduce 使用到的包，把他们生成jar放在工程根目录

//_conf.setBoolean("mapred.output.compress",true);

//_conf.setClass("mapred.output.compression.codec",GzipCodec.class,CompressionCodec.class);

//String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();

//if (otherArgs.length != 2)

//{

//System.err.println("Usage: wordcount <in> <out>");

//System.exit(2);

//}

Job job = new Job(_conf, "word count");

job.setJarByClass(WordCountDriver.class);//设置job包

//job.setNumReduceTasks(5);

FileInputFormat.addInputPath(job, new Path("/user/input"));//输入目录

FileOutputFormat.setOutputPath(job, new Path("/user/output"));//输出目录

job.setMapperClass(WordCountMap.class);//map 包

job.setReducerClass(WordCountReduce.class);//reduce 包

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

package com.test;

import java.io.File;

import java.io.FileWriter;

import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Mapper.Context;

public class WordCountMap extends Mapper<Object, Text, Text, IntWritable>{

private static IntWritable value_=new IntWritable(1);

private static Text _key=new Text();

public void map(Object key, Text value, Context context) throws IOException, InterruptedException

{

StringTokenizer _value=new StringTokenizer(value.toString().trim());

while(_value.hasMoreTokens())

{

_key.set(_value.nextToken().trim());

context.write(_key, value_);

}

package com.test;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Reducer.Context;

public class WordCountReduce extends Reducer<Text,IntWritable,Text,IntWritable> {

private static IntWritable _sum=new IntWritable();

public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException

{

int sum=0;

for(IntWritable value : values)

{

sum+=value.get();

}

_sum.set(sum);

context.write(key, _sum);

}

运行结果

14/06/26 10:46:00 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.

14/06/26 10:46:01 INFO input.FileInputFormat: Total input paths to process : 1

14/06/26 10:46:01 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

14/06/26 10:46:01 WARN snappy.LoadSnappy: Snappy native library not loaded

14/06/26 10:46:02 INFO mapred.JobClient: Running job: job_201406251739_0002

14/06/26 10:46:03 INFO mapred.JobClient: map 0% reduce 0%

14/06/26 10:46:16 INFO mapred.JobClient: map 100% reduce 0%

14/06/26 10:46:26 INFO mapred.JobClient: map 100% reduce 33%

14/06/26 10:46:29 INFO mapred.JobClient: map 100% reduce 100%

14/06/26 10:46:33 INFO mapred.JobClient: Job complete: job_201406251739_0002

14/06/26 10:46:33 INFO mapred.JobClient: Counters: 29

14/06/26 10:46:33 INFO mapred.JobClient: Job Counters

14/06/26 10:46:33 INFO mapred.JobClient: Launched reduce tasks=1

14/06/26 10:46:33 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=15193

14/06/26 10:46:33 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0

14/06/26 10:46:33 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0

14/06/26 10:46:33 INFO mapred.JobClient: Launched map tasks=1

14/06/26 10:46:33 INFO mapred.JobClient: Data-local map tasks=1

14/06/26 10:46:33 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=10845

14/06/26 10:46:33 INFO mapred.JobClient: File Output Format Counters

14/06/26 10:46:33 INFO mapred.JobClient: Bytes Written=626

14/06/26 10:46:33 INFO mapred.JobClient: FileSystemCounters

14/06/26 10:46:33 INFO mapred.JobClient: FILE_BYTES_READ=1272

14/06/26 10:46:33 INFO mapred.JobClient: HDFS_BYTES_READ=738

14/06/26 10:46:33 INFO mapred.JobClient: FILE_BYTES_WRITTEN=51545

14/06/26 10:46:33 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=626

14/06/26 10:46:33 INFO mapred.JobClient: File Input Format Counters

14/06/26 10:46:33 INFO mapred.JobClient: Bytes Read=630

14/06/26 10:46:33 INFO mapred.JobClient: Map-Reduce Framework

14/06/26 10:46:33 INFO mapred.JobClient: Map output materialized bytes=1272

14/06/26 10:46:33 INFO mapred.JobClient: Map input records=1

14/06/26 10:46:33 INFO mapred.JobClient: Reduce shuffle bytes=1272

14/06/26 10:46:33 INFO mapred.JobClient: Spilled Records=212

14/06/26 10:46:33 INFO mapred.JobClient: Map output bytes=1054

14/06/26 10:46:33 INFO mapred.JobClient: CPU time spent (ms)=2650

14/06/26 10:46:33 INFO mapred.JobClient: Total committed heap usage (bytes)=301006848

14/06/26 10:46:33 INFO mapred.JobClient: Combine input records=0

14/06/26 10:46:33 INFO mapred.JobClient: SPLIT_RAW_BYTES=108

14/06/26 10:46:33 INFO mapred.JobClient: Reduce input records=106

14/06/26 10:46:33 INFO mapred.JobClient: Reduce input groups=71

14/06/26 10:46:33 INFO mapred.JobClient: Combine output records=0

14/06/26 10:46:33 INFO mapred.JobClient: Physical memory (bytes) snapshot=289206272

14/06/26 10:46:33 INFO mapred.JobClient: Reduce output records=71

14/06/26 10:46:33 INFO mapred.JobClient: Virtual memory (bytes) snapshot=1508102144

14/06/26 10:46:33 INFO mapred.JobClient: Map output records=106

转载于:https://my.oschina.net/huotui/blog/284320

你可能感兴趣的文章

关于for in和for循环的遍历

查看>>

完成端口(CompletionPort)详解 - 手把手教你玩转网络编程系列之三

查看>>

JSP Struts之HTML标签库详解

查看>>

Hp服务器 raid 磁盘故障数据库数据恢复解决方案

查看>>

运维角度浅谈MySQL数据库优化

查看>>

【Spark亚太研究院系-构建Spark集群-配置Hadoop单机模式并运行Wordcount（2）

查看>>

Java通过POI为Excel添加数据验证

查看>>

修改vim的配色方案

查看>>

程矢Axure夜话：程序员眼中的原型设计视频教程之书到用时方恨少

Hibernate中save、persist和saveOrUpdate这三个方法的区别

linux因环境变量修改错误,造成命令查找不到，且无法登陆系统解决办法

Android项目之旅三简易Mp3播放器从获取服务器端Mp3信息

查看>>