Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 
 
 package org.apache.mahout.cf.taste.hadoop;
 
 import  org.apache.commons.cli2.CommandLine;
 import  org.apache.commons.cli2.Group;
 import  org.apache.commons.cli2.Option;
 import  org.apache.commons.cli2.OptionException;
 import  org.apache.commons.cli2.builder.ArgumentBuilder;
 import  org.apache.commons.cli2.builder.DefaultOptionBuilder;
 import  org.apache.commons.cli2.builder.GroupBuilder;
 import  org.apache.commons.cli2.commandline.Parser;
 import  org.apache.hadoop.conf.Configuration;
 import  org.apache.hadoop.fs.FileSystem;
 import  org.apache.hadoop.fs.Path;
 import  org.apache.hadoop.io.LongWritable;
 import  org.apache.hadoop.mapreduce.InputFormat;
 import  org.apache.hadoop.mapreduce.Job;
 import  org.apache.hadoop.mapreduce.Mapper;
 import  org.apache.hadoop.mapreduce.Reducer;
 import  org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 import  org.apache.hadoop.util.StringUtils;
 

This class configures and runs a RecommenderMapper using Hadoop.

Command line arguments are:

  1. Fully-qualified class name of Recommender to use to make recommendations. Note that it must have a constructor which takes a org.apache.mahout.cf.taste.model.DataModel argument.
  2. Number of recommendations to compute per user
  3. Location of a text file containing user IDs for which recommendations should be computed, one per line
  4. Location of a data model file containing preference data, suitable for use with org.apache.mahout.cf.taste.impl.model.file.FileDataModel
  5. Output path where reducer output should go

Example arguments:

org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender 10 path/to/users.txt path/to/data.csv path/to/reducerOutputDir

Set up Hadoop in a pseudo-distributed manner: http://hadoop.apache.org/common/docs/current/quickstart.html You can stop at the point where it instructs you to copy files into HDFS. Instead, proceed as follow.

hadoop fs -mkdir input hadoop fs -mkdir output

We need to massage the BX input a little bit and also create a file of user IDs:

tail +2 BX-Book-Ratings.csv | tr -cd '[:digit:];\n' | tr ';' ',' | grep -v ',,' > input.csv # Mac users: put "export LC_ALL=C;" at the front of this command. You may want to "unset LC_ALL" after. cut -d, -f1 input.csv | uniq > users.txt

Now we put the file in input/ and prepare output/:

hadoop fs -put input.csv input/input.csv hadoop fs -put users.txt input/users.txt hadoop fs -mkdir output/

Now build Mahout code using your IDE, or Maven. Note where the compiled classes go. If you built with Maven, it'll be like (Mahout directory)/core/target/classes/. Prepare a .jar file for Hadoop:

jar cvf recommender.jar -C (classes directory) .

And launch:

hadoop jar recommender.jar org.apache.mahout.cf.taste.hadoop.RecommenderJob \ org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender \ 10 input/users.txt input/input.csv recommender.jar output
public final class RecommenderJob extends Job {
  private static final Logger log = Logger.getLogger(RecommenderJob.class);
  public RecommenderJob(Configuration jobConfthrows IOException {
    super(jobConf);
  }
  public static void main(String[] argsthrows Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
    Option recommendClassOpt = obuilder.withLongName("recommenderClassName").withRequired(true)
      .withShortName("r").withArgument(abuilder.withName("recommenderClassName").withMinimum(1)
      .withMaximum(1).create()).withDescription("Name of recommender class to use.").create();
    Option userRecommendOpt = obuilder.withLongName("userRec").withRequired(true)
      .withShortName("n").withArgument(abuilder.withName("userRec").withMinimum(1)
      .withMaximum(1).create()).withDescription("Desired number of recommendations per user.").create();
    Option userIDFileOpt = obuilder.withLongName("userIdFile").withRequired(true)
      .withShortName("f").withArgument(abuilder.withName("userIdFile").withMinimum(1)
      .withMaximum(1).create()).withDescription("File containing user ids.").create();
    Option dataModelFileOpt = obuilder.withLongName("dataModelFile").withRequired(true)
      .withShortName("m").withArgument(abuilder.withName("dataModelFile").withMinimum(1)
      .withMaximum(1).create()).withDescription("File containing data model.").create();
    Option jarFileOpt = obuilder.withLongName("jarFile").withRequired(true)
      .withShortName("m").withArgument(abuilder.withName("jarFile").withMinimum(1)
      .withMaximum(1).create()).withDescription("Implementation jar.").create();
    Option outputOpt = DefaultOptionCreator.outputOption(obuilderabuilder).create();
    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
    Group group = gbuilder.withName("Options").withOption(recommendClassOpt).withOption(userRecommendOpt)
      .withOption(userIDFileOpt).withOption(dataModelFileOpt).withOption(outputOpt).withOption(helpOpt).create();
    try {
      Parser parser = new Parser();
      parser.setGroup(group);
      CommandLine cmdLine = parser.parse(args);
      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
      String recommendClassName = cmdLine.getValue(recommendClassOpt).toString();
      int recommendationsPerUser = Integer.parseInt(cmdLine.getValue(userRecommendOpt).toString());
      String userIDFile = cmdLine.getValue(userIDFileOpt).toString();
      String dataModelFile = cmdLine.getValue(dataModelFileOpt).toString();
      String jarFile = cmdLine.getValue(jarFileOpt).toString();
      String outputPath = cmdLine.getValue(outputOpt).toString();
      Configuration jobConf =
          buildJobConf(recommendClassNamerecommendationsPerUseruserIDFiledataModelFilejarFileoutputPath);
      Job job = new RecommenderJob(jobConf);
      job.waitForCompletion(true); 
    } catch (OptionException e) {
      .error(e.getMessage());
      CommandLineUtil.printHelp(group);
    }
  }
  public static Configuration buildJobConf(String recommendClassName,
                                           int recommendationsPerUser,
                                           String userIDFile,
                                           String dataModelFile,
                                           String jarFile,
                                           String outputPaththrows IOException {
    Configuration jobConf = new Configuration();
    FileSystem fs = FileSystem.get(jobConf);
    Path userIDFilePath = new Path(userIDFile).makeQualified(fs);
    Path outputPathPath = new Path(outputPath).makeQualified(fs);
    if (fs.exists(outputPathPath)) {
      fs.delete(outputPathPathtrue);
    }
    jobConf.set("mapred.jar"jarFile);
    jobConf.set(.recommendClassName);
    jobConf.set(., String.valueOf(recommendationsPerUser));
    jobConf.set(.dataModelFile);
    jobConf.setClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class);
    jobConf.set("mapred.input.dir", StringUtils.escapeString(userIDFilePath.toString()));
    jobConf.setClass("mapred.mapper.class"RecommenderMapper.class, Mapper.class);
    jobConf.setClass("mapred.mapoutput.key.class", LongWritable.classObject.class);
    jobConf.setClass("mapred.mapoutput.value.class"RecommendedItemsWritable.classObject.class);
    jobConf.setClass("mapred.reducer.class"IdentityReducer.class, Reducer.class);
    jobConf.setClass("mapred.output.key.class", LongWritable.classObject.class);
    jobConf.setClass("mapred.output.value.class"RecommendedItemsWritable.classObject.class);
    //jobConf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class);
    jobConf.set("mapred.output.dir", StringUtils.escapeString(outputPathPath.toString()));
    return jobConf;
  }
New to GrepCode? Check out our FAQ X