当前位置 > CPDA数据分析师 > “数”业专攻 > 如何用hadoop定义一个公共类WordCount1

如何用hadoop定义一个公共类WordCount1

来源:数据分析师 CPDA | 时间:2015-12-11 | 作者:admin

QQ图片20151211174101

public static class WordCountMapper //定义一个公共静态类WordCountMapper
extends Mapper<Object,Text,Text,IntWritable> //继承Mapper对象,泛型定义key、value值
{

private final static IntWritable one = new IntWritable(1); //实例化IntWritable,定义私有最终静态常量,并赋值为1
private Text word = new Text(); //实例化对象Text

public void map(Object key,Text value,Context context) //定义map方法,为(key,Text,context),并且无返回值
throws IOException, InterruptedException { //抛出异常
String[] words = value.toString().split(" "); //定义字符串数组words,调用value下的Text中的tostring方法,以" "作为分片标准

for (String str: words) //增强for循环,遍历words数组
{
word.set(str); //从text中找到由字符串组成单词
context.write(word,one); //将从text中截取的字符串以key/value值输出
}

}
}

public static class WordCountReducer //定义一个公共静态类WordCountReducer
extends Reducer<Text,IntWritable,Text,IntWritable> { //继承Reducer对象,泛型定义输入和输出的key、value值
public void reduce(Text key,Iterable<IntWritable> values,Context context) //定义reduce方法,为(key,迭代算法values,context),无返回值

throws IOException, InterruptedException { //抛出异常

int total=0; //输入整形变量total,初始赋值为0
for (IntWritable val : values){ //加强for循环,定义val类型,遍历values数组
total++; //遇到相同的字符,total值自加1
}
context.write(key, new IntWritable(total)); //将所得到的值以key/values值输出
}

}

public static void main (String[] args) throws Exception{ //定义一个静态主函数
Configuration conf = new Configuration();

conf.set("mapred.jar","wc1.jar");

Job job = new Job(conf, "word count");
job.setJarByClass(WordCount1.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}