Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

MapReduce Job not showing my print statements on the terminal

I am currently trying to figure out when you run a MapReduce job what happens by making some system.out.println() at certain places on the code but know of those print statement gets to print on my terminal when the job runs. Can someone help me out figure out what exactly am i doing wrong here.

import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.StatusReporter;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountJob {
    public static int iterations;
  public static class TokenizerMapper 
     extends Mapper<Object, Text, Text, IntWritable>{

 private final static IntWritable one = new IntWritable(1);
 private Text word = new Text();
@Override
 public void map(Object key, Text value, Context context
                ) throws IOException, InterruptedException {
    System.out.println("blalblbfbbfbbbgghghghghghgh");
  StringTokenizer itr = new StringTokenizer(value.toString());
  while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    String myWord = itr.nextToken();
    int n = 0;
    while(n< 5){
        myWord = myWord+ "Test my appending words";
        n++;
    }
    System.out.println("Print my word:  "+myWord);
    word.set(myWord);
    context.write(word, one);
   }
 }
}

public static class IntSumReducer 
   extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();

public void reduce(Text key, Iterable<IntWritable> values, 
                   Context context
                   ) throws IOException, InterruptedException {
  int sum = 0;
  for (IntWritable val : values) {
    sum += val.get();
  }
  result.set(sum);
  context.write(key, result);
  }
}

public static void main(String[] args) throws Exception {
 Configuration conf = new Configuration();
 TaskAttemptID taskid = new TaskAttemptID();
 TokenizerMapper my = new TokenizerMapper();

if (args.length != 3) {
  System.err.println("Usage: WordCountJob <in> <out> <iterations>");
  System.exit(2);
}
iterations = new Integer(args[2]);
Path inPath = new Path(args[0]);
Path outPath =  null;
for (int i = 0; i<iterations; ++i){
    System.out.println("Iteration number: "+i);
    outPath = new Path(args[1]+i);
    Job job = new Job(conf, "WordCountJob");
    job.setJarByClass(WordCountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, inPath);
    FileOutputFormat.setOutputPath(job, outPath);
    job.waitForCompletion(true);
    inPath = outPath;
   }
 }
}
like image 460
asembereng Avatar asked Jul 11 '11 03:07

asembereng


2 Answers

This depends on how you are submitting your job, I think you're submitting it using bin/hadoop jar yourJar.jar right?

Your System.out.println() is only available in your main method, that is because the mapper/reducer is executed inside of hadoop in a different JVM, all outputs are redirected to special log files (out/log-files). And I would recommend to use your own Apache-commons log using:

Log log = LogFactory.getLog(YOUR_MAPPER_CLASS.class)

And therefore do some info logging:

log.info("Your message");

If you're in "local"-mode then you can see this log in your shell, otherwise this log will be stored somewhere on the machine where the task gets executed. Please use the jobtracker's web UI to look at these log files, it is quite convenient. By default the job tracker runs on port 50030.

like image 188
Thomas Jungblut Avatar answered Nov 16 '22 01:11

Thomas Jungblut


Alternatively, you can make use of MultipleOutputs class and re-direct all your log data into one output file(log).

MultipleOutputs<Text, Text> mos = new MultipleOutputs<Text, Text>(context);
Text tKey = new Text("key");
Text tVal = new Text("log message");
mos.write(tKey, tVal, <lOG_FILE>);
like image 32
Satya Pavan Avatar answered Nov 15 '22 23:11

Satya Pavan