Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 package org.apache.mahout.classifier.bayes.mapreduce.common;
 import  org.apache.hadoop.hbase.HBaseConfiguration;
 import  org.apache.hadoop.hbase.client.HTable;
 import  org.apache.hadoop.hbase.client.Put;
 import  org.apache.hadoop.hbase.util.Bytes;
 import  org.apache.hadoop.mapred.JobConf;
 import  org.apache.hadoop.mapred.MapReduceBase;
 import  org.apache.hadoop.mapred.OutputCollector;
 import  org.apache.hadoop.mapred.Reducer;
 import  org.apache.hadoop.mapred.Reporter;
Can also be used as a local Combiner beacuse only two values should be there inside the values
 public class BayesTfIdfReducer extends MapReduceBase implements
     Reducer<StringTuple, DoubleWritable, StringTuple, DoubleWritable> {
   private static final Logger log = LoggerFactory.getLogger(BayesTfIdfReducer.class);
   private HTable table;
   private boolean useHbase = false;
   public void reduce(StringTuple keyIterator<DoubleWritable> values,
       OutputCollector<StringTuple, DoubleWritable> output, Reporter reporter)
       throws IOException {
     // Key is label,word, value is the number of times we've seen this label
     // word per local node. Output is the same
       double vocabCount = 0.0;
       while (values.hasNext()) {
         reporter.setStatus("Bayes TfIdf Reducer: vocabCount " + vocabCount);
         vocabCount +=;
       if () {
         Put bu = new Put(Bytes.toBytes(.));
         bu.add(Bytes.toBytes(.), Bytes
             .toBytes(.), Bytes
       output.collect(keynew DoubleWritable(vocabCount));
     } else if (key.stringAt(0).equals(.)) {
       double idfTimes_D_ij = 1.0;
       int numberofValues = 0;
       while (values.hasNext()) {
         idfTimes_D_ij *=;
       if (numberofValues == 2) { // Found TFIdf
         String label = key.stringAt(1);
         String feature = key.stringAt(2);
         if () {
           Put bu = new Put(Bytes.toBytes(feature));
                  Bytes.toBytes(label), Bytes.toBytes(idfTimes_D_ij));
       reporter.setStatus("Bayes TfIdf Reducer: " + key + " => " + idfTimes_D_ij);
       output.collect(keynew DoubleWritable(idfTimes_D_ij));
     } else {
       throw new IllegalArgumentException("Unexpected StringTuple: " + key);
  public void configure(JobConf job) {
    try {
      Parameters params = Parameters
      if (params.get("dataSource").equals("hbase"))
         = true;
      HBaseConfiguration HBconf = new HBaseConfiguration(job);
       = new HTable(HBconfjob.get("output.table"));
    } catch (IOException e) {
      .error("Unexpected error during configuration"e);
  public void close() throws IOException {
    if () {
New to GrepCode? Check out our FAQ X