鹅厂Tencent面试题
今天面试被虐。。。好久没写MapRedue 忘记了。额额。。。
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
*
*
* File Name: PageVisitAge_Avg.java
*
* General Description: Copyright and file header.
*
* Revision History:
* Modification
* Author Date(MM/DD/YYYY) JiraID Description of Changes
* --------------------- ------------ ---------- -----------------------------
* @author Bill Zhang 2017年9月6日
*
*/
public class PageVisitAge_Avg extends Configured implements Tool{
static class PageVisitMapper extends Mapper<LongWritable,Text,Text,Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String url = line.split(",")[1];
context.write(new Text(url), value);
}
}
static class PageVisitReducer extends Reducer<Text, Text, Text, Text>{
private ConcurrentHashMap<String, String> dictTable = new ConcurrentHashMap<String, String>();
@Override
protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
URI[] path_arr = context.getCacheFiles();
if (path_arr.length == 0) {
throw new FileNotFoundException("Distributed cache file not found.");
}
URI dict_uri = path_arr[0];
FileSystem fs = FileSystem.get(context.getConfiguration());
FSDataInputStream in = fs.open(new Path(dict_uri));
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String user_dict = null;
while ((user_dict = br.readLine()) != null) {
String[] records = user_dict.split(",");
//加载字典表
dictTable.put(records[0], user_dict);
}
}
@Override
protected void reduce(Text arg0, Iterable<Text> arg1, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
SummaryStatistics statistics = new SummaryStatistics ();
//计算访问访问page的平均年龄
for(Text value : arg1){
String id = value.toString().split(",")[0];
String userInfo = dictTable.get(id);
String age = userInfo.split(",")[2];
statistics.addValue(Double.valueOf(age));
System.out.println(value);
}
double avg = statistics.getMean();
context.write(arg0, new Text(avg+""));
}
}
@Override
public int run(String[] args) throws Exception {
Configuration conf =getConf();
conf.set("mapreduce.input.fileinputformat.input.dir.recursive", "true");
Job job = Job.getInstance(conf,PageVisitAge_Avg.class.getSimpleName());
job.setJarByClass(getClass());
/**
* 用户访问的网页历史
* 例如:
* 1,http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
* 2,http://docs.oracle.com/javase/8/
* 3,http://docs.oracle.com/javase/8/docs/api/index.html
*/
FileInputFormat.addInputPath(job,new Path(args[0]));
/**
* 用户字典
* 例如:
* 1,zhangsan,20,male
* 2,zhangsi,23,female
* 3,susan,30,male
*
*/
job.addCacheFile(new URI(args[1]));
FileOutputFormat.setOutputPath(job,new Path(args[2]));
job.setMapOutputKeyClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(PageVisitMapper.class);
job.setReducerClass(PageVisitReducer.class);
job.setNumReduceTasks(10);
return job.waitForCompletion(true)?0:1;
}
/**
*
* @param args
* @throws Exception
*/
public static void main(String [] args ) throws Exception{
int ec = ToolRunner.run(new Configuration(), new PageVisitAge_Avg(), args);
System.exit(ec);
}
} 相关推荐
Kafka 2020-09-18
Wepe0 2020-10-30
windle 2020-10-29
mengzuchao 2020-10-22
Junzizhiai 2020-10-10
bxqybxqy 2020-09-30
风之沙城 2020-09-24
kingszelda 2020-09-22
大唐帝国前营 2020-08-18
yixu0 2020-08-17
TangCuYu 2020-08-15
xiaoboliu00 2020-08-15
songshijiazuaa 2020-08-15
xclxcl 2020-08-03
zmzmmf 2020-08-03
newfarhui 2020-08-03
likesyour 2020-08-01