Hadoop:倒排索引源代码

mapper端实现

public static class Mapclass extends MapReduceBase implements 
		Mapper<LongWritable,Text,Text,Text>{
			
		public Text name = new Text();
		public Text location = new Text();
		public void map(LongWritable key,Text value,
				OutputCollector<Text,Text> output,Reporter reporter)throws IOException{
			FileSplit fs = (FileSplit) reporter.getInputSplit();
			String filename = fs.getPath().getName();
			this.location.set(filename);
			
			StringTokenizer itr = new StringTokenizer(value.toString());
			while(itr.hasMoreTokens()){
				this.name.set(itr.nextToken());
				output.collect(this.name, this.location);
			}
		}
	}

Reducer端实现:

public static class Reducerclass extends MapReduceBase implements 
	Reducer<Text,Text,Text,Text>{
		public void reduce(Text key,Iterator<Text> values,
				OutputCollector<Text,Text> output,Reporter reporter) throws IOException{

			String first = ((Text) values.next()).toString();
			
			String curr = first;
			int sum = 1;
			StringBuffer sb = new StringBuffer();
			while (values.hasNext()) {
				
				String next = ((Text) values.next()).toString();
			
				if (curr.equals(next)) {
					sum++;
				} else {
					sb.append(curr).append("\t").append(sum).append("\t");
					sum = 1;
					curr = next;
				}
			};
			sb.append(curr).append("\t").append(sum).append("\t");
			output.collect(key, new Text(sb.toString()));
				
		}//end reducer
	}

其他job输入输出路径就自己写吧

相关推荐