Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  /*
   * Copyright 2010-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   *
   * Licensed under the Apache License, Version 2.0 (the "License").
   * You may not use this file except in compliance with the License.
   * A copy of the License is located at
   *
   *  http://aws.amazon.com/apache2.0
   *
  * or in the "license" file accompanying this file. This file is distributed
  * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  * express or implied. See the License for the specific language governing
  * permissions and limitations under the License.
  */
 package com.amazonaws.services.elasticmapreduce.util;
 
 import java.util.List;
 import java.util.Map;
 
Class that makes it easy to define Hadoop Streaming steps.

See also: Hadoop Streaming

 AWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey);
 AmazonElasticMapReduce emr = new AmazonElasticMapReduceClient(credentials);

 HadoopJarStepConfig config = new StreamingStep()
     .withInputs("s3://elasticmapreduce/samples/wordcount/input")
     .withOutput("s3://my-bucket/output/")
     .withMapper("s3://elasticmapreduce/samples/wordcount/wordSplitter.py")
     .withReducer("aggregate")
     .toHadoopJarStepConfig();

 StepConfig wordCount = new StepConfig()
     .withName("Word Count")
     .withActionOnFailure("TERMINATE_JOB_FLOW")
     .withHadoopJarStep(config);

 RunJobFlowRequest request = new RunJobFlowRequest()
     .withName("Word Count")
     .withSteps(wordCount)
     .withLogUri("s3://log-bucket/")
     .withInstances(new JobFlowInstancesConfig()
         .withEc2KeyName("keypairt")
         .withHadoopVersion("0.20")
         .withInstanceCount(5)
         .withKeepJobFlowAliveWhenNoSteps(true)
         .withMasterInstanceType("m1.small")
         .withSlaveInstanceType("m1.small"));

 RunJobFlowResult result = emr.runJobFlow(request);
 
 
 public class StreamingStep {
   private List<Stringinputs = new ArrayList<String>();
   private String output;
   private String mapper;
   private String reducer;
   private Map<StringStringhadoopConfig = new HashMap<StringString>();


  
Creates a new default StreamingStep.
 
   public StreamingStep() {
   }


  
Get list of step input paths.

Returns:
List of step inputs
 
   public List<StringgetInputs() {
     return ;
   }

  
Set the list of step input paths.

Parameters:
inputs List of step inputs.
 
   public void setInputs(Collection<Stringinputs) {
     List<StringnewInputs = new ArrayList<String>();
     if (inputs != null) {
       newInputs.addAll(inputs);
     }
     this. = newInputs;
   }

  
Add more input paths to this step.

Parameters:
inputs A list of inputs to this step.
Returns:
A reference to this updated object so that method calls can be chained together.
  public StreamingStep withInputs(String ... inputs) {
    for (String input : inputs) {
      this..add(input);
    }
    return this;
  }


  
Get output path.

Returns:
Output path.
  public String getOutput() {
    return ;
  }

  
Set the output path for this step.

Parameters:
output Output path.
  public void setOutput(String output) {
    this. = output;
  }

  
Set the output path for this step.

Parameters:
output Output path
Returns:
A reference to this updated object so that method calls can be chained together.
  public StreamingStep withOutput(String output) {
    this. = output;
    return this;
  }


  
Get the mapper.

Returns:
Mapper.
  public String getMapper() {
    return ;
  }

  
Set the mapper.

Parameters:
mapper Mapper
  public void setMapper(String mapper) {
    this. = mapper;
  }

  
Set the mapper

Parameters:
mapper Mapper
Returns:
A reference to this updated object so that method calls can be chained together.
  public StreamingStep withMapper(String mapper) {
    this. = mapper;
    return this;
  }


  
Get the reducer

Returns:
Reducer
  public String getReducer() {
    return ;
  }

  
Set the reducer

Parameters:
reducer Reducer
  public void setReducer(String reducer) {
    this. = reducer;
  }

  
Set the reducer

Parameters:
reducer Reducer
Returns:
A reference to this updated object so that method calls can be chained together.
  public StreamingStep withReducer(String reducer) {
    this. = reducer;
    return this;
  }


  
Get the Hadoop config overrides (-D values).

Returns:
Hadoop config.
  public Map<StringStringgetHadoopConfig() {
    return ;
  }

  
Set the Hadoop config overrides (-D values).

Parameters:
hadoopConfig Hadoop config.
  public void setHadoopConfig(Map<StringStringhadoopConfig) {
    this. = hadoopConfig;
  }

  
Add a Hadoop config override (-D value).

Parameters:
key Hadoop configuration key.
value Configuration value.
Returns:
A reference to this updated object so that method calls can be chained together.
  public StreamingStep withHadoopConfig(String keyString value) {
    .put(keyvalue);
    return this;
  }


  
Creates the final HadoopJarStepConfig once you are done configuring the step. You can use this as you would any other HadoopJarStepConfig.

Returns:
HadoopJarStepConfig representing this streaming step.
    List<Stringargs = new ArrayList<String>();
    if ( == null) {
      .put("mapred.reduce.tasks""0");
    }
    for (Map.Entry<StringStringentry : .entrySet()) {
      args.add("-D");
      args.add(entry.getKey() + "=" + entry.getValue());
    }
    for (String input : ) {
      args.add("-input");
      args.add(input);
    }
    if ( != null) {
      args.add("-output");
      args.add();
    }
    if ( != null) {
      args.add("-mapper");
      args.add();
    }
    if ( != null) {
      args.add("-reducer");
      args.add();
    }
    return new HadoopJarStepConfig()
      .withJar("/home/hadoop/contrib/streaming/hadoop-streaming.jar")
      .withArgs(args);
  }
New to GrepCode? Check out our FAQ X