先配置windows 的host文件,加入hadoop集群的ip和名字
192.168.7.11 intel-hadoop-11
192.168.7.12 intel-hadoop-12
192.168.7.13 intel-hadoop-13
2. 需要把生成的jar包放在eclipse的项目根目录
3. 代码示例,三个class文件,
注意点,因为是远程调用hadoop,所以需要Configuration _conf = new Configuration();
在_conf配置hadoop的连接信息。信息在mapred-site.xml、core-site.xml文件中查看。
package com.test;
//注意事项
/* *
* 设置系统host 包含hadoop 主机ip
*
* */
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import com.test.WordCountMap;
import com.test.WordCountReduce;
public class WordCountDriver {
//Drive
public static void main(String[] args) throws Exception
{
Configuration _conf = new Configuration();//本次设置只是对本程序有效
_conf.set("mapred.job.tracker", "192.168.7.11:54311"); //设置hadoop job tracker的连接
_conf.set("fs.default.name", "hdfs://192.168.7.11:8020"); //设置hdfs的连接
_conf.set("hadoop.job.ugi", "hadoop"); //
_conf.set("Hadoop.tmp.dir", "/user/"); //
_conf.set("dfs.permissions","false");//设置权限关闭,这句话是不必要的。
_conf.set("mapred.jar", "com.test.jar");//设置map reduce 使用到的包,把他们生成jar放在工程根目录
//_conf.setBoolean("mapred.output.compress",true);
//_conf.setClass("mapred.output.compression.codec",GzipCodec.class,CompressionCodec.class);
//String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();
//if (otherArgs.length != 2)
//{
//System.err.println("Usage: wordcount <in> <out>");
//System.exit(2);
//}
Job job = new Job(_conf, "word count");
job.setJarByClass(WordCountDriver.class);//设置job包
//job.setNumReduceTasks(5);
FileInputFormat.addInputPath(job, new Path("/user/input"));//输入目录
FileOutputFormat.setOutputPath(job, new Path("/user/output"));//输出目录
job.setMapperClass(WordCountMap.class);//map 包
job.setReducerClass(WordCountReduce.class);//reduce 包
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
package com.test;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class WordCountMap extends Mapper<Object, Text, Text, IntWritable>{
private static IntWritable value_=new IntWritable(1);
private static Text _key=new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException
{
StringTokenizer _value=new StringTokenizer(value.toString().trim());
while(_value.hasMoreTokens())
{
_key.set(_value.nextToken().trim());
context.write(_key, value_);
}
}
}
package com.test;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class WordCountReduce extends Reducer<Text,IntWritable,Text,IntWritable> {
private static IntWritable _sum=new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException
{
int sum=0;
for(IntWritable value : values)
{
sum+=value.get();
}
_sum.set(sum);
context.write(key, _sum);
}
}
运行结果
14/06/26 10:46:00 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
14/06/26 10:46:01 INFO input.FileInputFormat: Total input paths to process : 1
14/06/26 10:46:01 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
14/06/26 10:46:01 WARN snappy.LoadSnappy: Snappy native library not loaded
14/06/26 10:46:02 INFO mapred.JobClient: Running job: job_201406251739_0002
14/06/26 10:46:03 INFO mapred.JobClient: map 0% reduce 0%
14/06/26 10:46:16 INFO mapred.JobClient: map 100% reduce 0%
14/06/26 10:46:26 INFO mapred.JobClient: map 100% reduce 33%
14/06/26 10:46:29 INFO mapred.JobClient: map 100% reduce 100%
14/06/26 10:46:33 INFO mapred.JobClient: Job complete: job_201406251739_0002
14/06/26 10:46:33 INFO mapred.JobClient: Counters: 29
14/06/26 10:46:33 INFO mapred.JobClient: Job Counters
14/06/26 10:46:33 INFO mapred.JobClient: Launched reduce tasks=1
14/06/26 10:46:33 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=15193
14/06/26 10:46:33 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
14/06/26 10:46:33 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
14/06/26 10:46:33 INFO mapred.JobClient: Launched map tasks=1
14/06/26 10:46:33 INFO mapred.JobClient: Data-local map tasks=1
14/06/26 10:46:33 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=10845
14/06/26 10:46:33 INFO mapred.JobClient: File Output Format Counters
14/06/26 10:46:33 INFO mapred.JobClient: Bytes Written=626
14/06/26 10:46:33 INFO mapred.JobClient: FileSystemCounters
14/06/26 10:46:33 INFO mapred.JobClient: FILE_BYTES_READ=1272
14/06/26 10:46:33 INFO mapred.JobClient: HDFS_BYTES_READ=738
14/06/26 10:46:33 INFO mapred.JobClient: FILE_BYTES_WRITTEN=51545
14/06/26 10:46:33 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=626
14/06/26 10:46:33 INFO mapred.JobClient: File Input Format Counters
14/06/26 10:46:33 INFO mapred.JobClient: Bytes Read=630
14/06/26 10:46:33 INFO mapred.JobClient: Map-Reduce Framework
14/06/26 10:46:33 INFO mapred.JobClient: Map output materialized bytes=1272
14/06/26 10:46:33 INFO mapred.JobClient: Map input records=1
14/06/26 10:46:33 INFO mapred.JobClient: Reduce shuffle bytes=1272
14/06/26 10:46:33 INFO mapred.JobClient: Spilled Records=212
14/06/26 10:46:33 INFO mapred.JobClient: Map output bytes=1054
14/06/26 10:46:33 INFO mapred.JobClient: CPU time spent (ms)=2650
14/06/26 10:46:33 INFO mapred.JobClient: Total committed heap usage (bytes)=301006848
14/06/26 10:46:33 INFO mapred.JobClient: Combine input records=0
14/06/26 10:46:33 INFO mapred.JobClient: SPLIT_RAW_BYTES=108
14/06/26 10:46:33 INFO mapred.JobClient: Reduce input records=106
14/06/26 10:46:33 INFO mapred.JobClient: Reduce input groups=71
14/06/26 10:46:33 INFO mapred.JobClient: Combine output records=0
14/06/26 10:46:33 INFO mapred.JobClient: Physical memory (bytes) snapshot=289206272
14/06/26 10:46:33 INFO mapred.JobClient: Reduce output records=71
14/06/26 10:46:33 INFO mapred.JobClient: Virtual memory (bytes) snapshot=1508102144
14/06/26 10:46:33 INFO mapred.JobClient: Map output records=106