Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 
 
 package org.apache.hadoop.hive.ql.exec;
 
 import java.util.List;
 import java.util.Map;
 
 import  org.apache.hadoop.conf.Configuration;
 import  org.apache.hadoop.fs.FileStatus;
 import  org.apache.hadoop.fs.FileSystem;
 import  org.apache.hadoop.fs.Path;
 import  org.apache.hadoop.io.Writable;
 import  org.apache.hadoop.io.WritableComparable;
 import  org.apache.hadoop.mapred.InputFormat;
 import  org.apache.hadoop.mapred.InputSplit;
 import  org.apache.hadoop.mapred.JobConf;
 import  org.apache.hadoop.mapred.RecordReader;
 import  org.apache.hadoop.mapred.Reporter;
 import  org.apache.hadoop.util.ReflectionUtils;

FetchTask implementation.
 
 public class FetchOperator implements Serializable {
 
   static Log LOG = LogFactory.getLog(FetchOperator.class.getName());
   static LogHelper console = new LogHelper();
 
   private boolean isEmptyTable;
   private boolean isNativeTable;
   private FetchWork work;
   private int splitNum;
   private PartitionDesc currPart;
   private TableDesc currTbl;
   private boolean tblDataDone;
 
   private transient RecordReader<WritableComparable, Writable> currRecReader;
   private transient InputSplit[] inputSplits;
   private transient InputFormat inputFormat;
   private transient JobConf job;
   private transient WritableComparable key;
   private transient Writable value;
   private transient Deserializer serde;
   private transient Iterator<Path> iterPath;
   private transient Iterator<PartitionDesciterPartDesc;
   private transient Path currPath;
   private transient StructObjectInspector rowObjectInspector;
   private transient Object[] rowWithPart;
   public FetchOperator() {
   }
 
   public FetchOperator(FetchWork work, JobConf job) {
     this. = work;
     initialize(job);
   }
 
   public void initialize(JobConf job) {
     this. = job;
      = false;
      = new Object[2];
     if (.getTblDesc() != null) {
    } else {
       = true;
    }
  }
  public FetchWork getWork() {
    return ;
  }
  public void setWork(FetchWork work) {
    this. = work;
  }
  public int getSplitNum() {
    return ;
  }
  public void setSplitNum(int splitNum) {
    this. = splitNum;
  }
  public PartitionDesc getCurrPart() {
    return ;
  }
  public void setCurrPart(PartitionDesc currPart) {
    this. = currPart;
  }
  public TableDesc getCurrTbl() {
    return ;
  }
  public void setCurrTbl(TableDesc currTbl) {
    this. = currTbl;
  }
  public boolean isTblDataDone() {
    return ;
  }
  public void setTblDataDone(boolean tblDataDone) {
    this. = tblDataDone;
  }
  public boolean isEmptyTable() {
    return ;
  }
  public void setEmptyTable(boolean isEmptyTable) {
    this. = isEmptyTable;
  }

  
A cache of InputFormat instances.
  private static Map<Class, InputFormat<WritableComparable, Writable>> inputFormats = new HashMap<Class, InputFormat<WritableComparable, Writable>>();
  static InputFormat<WritableComparable, Writable> getInputFormatFromCache(Class inputFormatClass,
      Configuration confthrows IOException {
    if (!.containsKey(inputFormatClass)) {
      try {
        InputFormat<WritableComparable, Writable> newInstance = (InputFormat<WritableComparable, Writable>) ReflectionUtils
            .newInstance(inputFormatClassconf);
        .put(inputFormatClassnewInstance);
      } catch (Exception e) {
        throw new IOException("Cannot create an instance of InputFormat class "
            + inputFormatClass.getName() + " as specified in mapredWork!"e);
      }
    }
    return .get(inputFormatClass);
  }
  private void setPrtnDesc() throws Exception {
    List<StringpartNames = new ArrayList<String>();
    List<StringpartValues = new ArrayList<String>();
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);
    LinkedHashMap<StringStringpartSpec = .getPartSpec();
    List<ObjectInspectorpartObjectInspectors = new ArrayList<ObjectInspector>();
    String[] partKeys = pcols.trim().split("/");
    for (String key : partKeys) {
      partNames.add(key);
      partValues.add(partSpec.get(key));
    }
    StructObjectInspector partObjectInspector = ObjectInspectorFactory
        .getStandardStructObjectInspector(partNamespartObjectInspectors);
    [1] = partValues;
     = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
        .asList(new StructObjectInspector[] {partObjectInspector}));
  }
  private void getNextPath() throws Exception {
    // first time
    if ( == null) {
      if (.getTblDir() != null) {
        if (!) {
           = .getTblDirPath();
           = .getTblDesc();
          if () {
            FileSystem fs = .getFileSystem();
            if (fs.exists()) {
              FileStatus[] fStats = listStatusUnderPath(fs);
              for (FileStatus fStat : fStats) {
                if (fStat.getLen() > 0) {
                   = true;
                  break;
                }
              }
            }
          } else {
             = true;
          }
          if (!) {
             = null;
          }
          return;
        } else {
           = null;
           = null;
        }
        return;
      } else {
         = FetchWork.convertStringToPathArray(.getPartDir()).iterator();
         = .getPartDesc().iterator();
      }
    }
    while (.hasNext()) {
      Path nxt = .next();
      PartitionDesc prt = null;
      if ( != null) {
        prt = .next();
      }
      FileSystem fs = nxt.getFileSystem();
      if (fs.exists(nxt)) {
        FileStatus[] fStats = listStatusUnderPath(fsnxt);
        for (FileStatus fStat : fStats) {
          if (fStat.getLen() > 0) {
             = nxt;
            if ( != null) {
               = prt;
            }
            return;
          }
        }
      }
    }
  }
  private RecordReader<WritableComparable, Writable> getRecordReader() throws Exception {
    if ( == null) {
      getNextPath();
      if ( == null) {
        return null;
      }
      // not using FileInputFormat.setInputPaths() here because it forces a
      // connection
      // to the default file system - which may or may not be online during pure
      // metadata
      // operations
      .set("mapred.input.dir", org.apache.hadoop.util.StringUtils.escapeString(
          .toString()));
      PartitionDesc tmp;
      if ( == null) {
        tmp = ;
      } else {
        tmp = new PartitionDesc(null);
      }
      Utilities.copyTableJobPropertiesToConf(tmp.getTableDesc(), );
       = .getSplits(, 1);
       = 0;
       = tmp.getDeserializerClass().newInstance();
      .initialize(tmp.getProperties());
      if (.isDebugEnabled()) {
        .debug("Creating fetchTask with deserializer typeinfo: "
            + .getObjectInspector().getTypeName());
        .debug("deserializer properties: " + tmp.getProperties());
      }
      if ( != null) {
        setPrtnDesc();
      }
    }
    if ( >= .) {
      if (currRecReader != null) {
        currRecReader.close();
        currRecReader = null;
      }
       = null;
      return getRecordReader();
    }
    currRecReader = .getRecordReader([++], , Reporter.NULL);
     = currRecReader.createKey();
     = currRecReader.createValue();
    return currRecReader;
  }

  
Get the next row. The fetch context is modified appropriately.
  public InspectableObject getNextRow() throws IOException {
    try {
      while (true) {
        if (currRecReader == null) {
          currRecReader = getRecordReader();
          if (currRecReader == null) {
            return null;
          }
        }
        boolean ret = currRecReader.next();
        if (ret) {
          if (this. == null) {
            Object obj = .deserialize();
            return new InspectableObject(obj.getObjectInspector());
          } else {
            [0] = .deserialize();
            return new InspectableObject();
          }
        } else {
          currRecReader.close();
          currRecReader = null;
        }
      }
    } catch (Exception e) {
      throw new IOException(e);
    }
  }

  
Clear the context, if anything needs to be done.
  public void clearFetchContext() throws HiveException {
    try {
      if (currRecReader != null) {
        currRecReader.close();
        currRecReader = null;
      }
      this. = null;
      this. = null;
      this. = null;
    } catch (Exception e) {
      throw new HiveException("Failed with exception " + e.getMessage()
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
  }

  
used for bucket map join. there is a hack for getting partitionDesc. bucket map join right now only allow one partition present in bucket map join.
  public void setupContext(Iterator<Path> iterPathIterator<PartitionDesciterPartDesc) {
    this. = iterPath;
    this. = iterPartDesc;
    if (iterPartDesc == null) {
      if (.getTblDir() != null) {
        this. = .getTblDesc();
      } else {
        // hack, get the first.
        List<PartitionDesclistParts = .getPartDesc();
         = listParts.get(0);
      }
    }
  }
    try {
      if (.getTblDir() != null) {
        TableDesc tbl = .getTblDesc();
        Deserializer serde = tbl.getDeserializerClass().newInstance();
        serde.initialize(tbl.getProperties());
        return serde.getObjectInspector();
      } else if (.getPartDesc() != null) {
        List<PartitionDesclistParts = .getPartDesc();
        if(listParts.size() == 0) {
          return null;
        }
         = listParts.get(0);
        setPrtnDesc();
         = null;
        return ;
      } else {
        return null;
      }
    } catch (Exception e) {
      throw new HiveException("Failed with exception " + e.getMessage()
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
  }

  
Lists status for all files under a given path. Whether or not this is recursive depends on the setting of job configuration parameter mapred.input.dir.recursive.

Parameters:
fs file system
p path in file system
Returns:
list of file status entries
  private FileStatus[] listStatusUnderPath(FileSystem fs, Path pthrows IOException {
    HiveConf hiveConf = new HiveConf(FetchOperator.class);
    boolean recursive = hiveConf.getBoolVar(..);
    if (!recursive) {
      return fs.listStatus(p);
    }
    List<FileStatus> results = new ArrayList<FileStatus>();
    for (FileStatus stat : fs.listStatus(p)) {
      FileUtils.listStatusRecursively(fsstatresults);
    }
    return results.toArray(new FileStatus[results.size()]);
  }
New to GrepCode? Check out our FAQ X