Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 
 
 package org.apache.hadoop.hive.ql.plan;
 
 import java.util.List;
 import java.util.Map;
 
 import  org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import  org.apache.hadoop.hive.metastore.api.FieldSchema;
 import  org.apache.hadoop.mapred.InputFormat;
 import  org.apache.hadoop.mapred.SequenceFileInputFormat;
 import  org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import  org.apache.hadoop.mapred.TextInputFormat;

PlanUtils.
 
 public final class PlanUtils {
 
   protected static final Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.plan.PlanUtils");

  
ExpressionTypes.
 
   public static enum ExpressionTypes {
     FIELD, JEXL
   };
 
   @SuppressWarnings("nls")
   public static MapredWork getMapRedWork() {
     try {
       return new MapredWork(""new LinkedHashMap<StringArrayList<String>>(),
         new LinkedHashMap<StringPartitionDesc>(),
         new LinkedHashMap<StringOperator<? extends Serializable>>(),
         new TableDesc(), new ArrayList<TableDesc>(), null, Integer.valueOf(1),
         null, Hive.get().getConf().getBoolVar(
     } catch (HiveException ex) {
       throw new RuntimeException(ex);
     }
   }

  
Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode and column names (comma separated string).
 
   public static TableDesc getDefaultTableDesc(String separatorCode,
       String columns) {
    return getDefaultTableDesc(separatorCodecolumnsfalse);
  }

  
Generate the table descriptor of given serde with the separatorCode and column names (comma separated string).
  public static TableDesc getTableDesc(
      Class<? extends DeserializerserdeClassString separatorCode,
      String columns) {
    return getTableDesc(serdeClassseparatorCodecolumnsfalse);
  }

  
Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode and column names (comma separated string), and whether the last column should take the rest of the line.
  public static TableDesc getDefaultTableDesc(String separatorCode,
      String columnsboolean lastColumnTakesRestOfTheLine) {
    return getDefaultTableDesc(separatorCodecolumnsnull,
        lastColumnTakesRestOfTheLine);
  }

  
Generate the table descriptor of the serde specified with the separatorCode and column names (comma separated string), and whether the last column should take the rest of the line.
  public static TableDesc getTableDesc(
      Class<? extends DeserializerserdeClassString separatorCode,
      String columnsboolean lastColumnTakesRestOfTheLine) {
    return getTableDesc(serdeClassseparatorCodecolumnsnull,
        lastColumnTakesRestOfTheLine);
  }

  
Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode and column names (comma separated string), and whether the last column should take the rest of the line.
  public static TableDesc getDefaultTableDesc(String separatorCode,
      String columnsString columnTypesboolean lastColumnTakesRestOfTheLine) {
    return getTableDesc(LazySimpleSerDe.classseparatorCodecolumns,
        columnTypeslastColumnTakesRestOfTheLine);
  }
  public static TableDesc getTableDesc(
      Class<? extends DeserializerserdeClassString separatorCode,
      String columnsString columnTypesboolean lastColumnTakesRestOfTheLine) {
    return getTableDesc(serdeClassseparatorCodecolumnscolumnTypes,
        lastColumnTakesRestOfTheLinefalse);
  }
  public static TableDesc getTableDesc(
      Class<? extends DeserializerserdeClassString separatorCode,
      String columnsString columnTypesboolean lastColumnTakesRestOfTheLine,
      boolean useDelimitedJSON) {
    return getTableDesc(serdeClassseparatorCodecolumnscolumnTypes,
        lastColumnTakesRestOfTheLineuseDelimitedJSON"TextFile");
 }
  public static TableDesc getTableDesc(
      Class<? extends DeserializerserdeClassString separatorCode,
      String columnsString columnTypesboolean lastColumnTakesRestOfTheLine,
      boolean useDelimitedJSONString fileFormat) {
    Properties properties = Utilities.makeProperties(
        .separatorCode.,
        columns);
    if (!separatorCode.equals(Integer.toString(.))) {
      properties.setProperty(.separatorCode);
    }
    if (columnTypes != null) {
      properties.setProperty(.columnTypes);
    }
    if (lastColumnTakesRestOfTheLine) {
          "true");
    }
    // It is not a very clean way, and should be modified later - due to
    // compatiblity reasons,
    // user sees the results as json for custom scripts and has no way for
    // specifying that.
    // Right now, it is hard-coded in the code
    if (useDelimitedJSON) {
      serdeClass = DelimitedJSONSerDe.class;
    }
    Class inputFormatoutputFormat;
    // get the input & output file formats
    if ("SequenceFile".equalsIgnoreCase(fileFormat)) {
      inputFormat = SequenceFileInputFormat.class;
      outputFormat = SequenceFileOutputFormat.class;
    } else if ("RCFile".equalsIgnoreCase(fileFormat)) {
      inputFormat = RCFileInputFormat.class;
      outputFormat = RCFileOutputFormat.class;
      assert serdeClass == ColumnarSerDe.class;
    } else { // use TextFile by default
      inputFormat = TextInputFormat.class;
      outputFormat = IgnoreKeyTextOutputFormat.class;
    }
    return new TableDesc(serdeClassinputFormatoutputFormatproperties);
  }
  public static TableDesc getDefaultQueryOutputTableDesc(String colsString colTypes,
      String fileFormat) {
    return getTableDesc(LazySimpleSerDe.class"" + .colscolTypes,
        falsefalsefileFormat);
  }

 
Generate a table descriptor from a createTableDesc.
  public static TableDesc getTableDesc(CreateTableDesc crtTblDescString cols,
      String colTypes) {
    Class<? extends DeserializerserdeClass = LazySimpleSerDe.class;
    String separatorCode = Integer.toString(.);
    String columns = cols;
    String columnTypes = colTypes;
    boolean lastColumnTakesRestOfTheLine = false;
    TableDesc ret;
    try {
      if (crtTblDesc.getSerName() != null) {
        Class c = Class.forName(crtTblDesc.getSerName());
        serdeClass = c;
      }
      if (crtTblDesc.getFieldDelim() != null) {
        separatorCode = crtTblDesc.getFieldDelim();
      }
      ret = getTableDesc(serdeClassseparatorCodecolumnscolumnTypes,
          lastColumnTakesRestOfTheLinefalse);
      // set other table properties
      Properties properties = ret.getProperties();
      if (crtTblDesc.getCollItemDelim() != null) {
        properties.setProperty(.crtTblDesc
            .getCollItemDelim());
      }
      if (crtTblDesc.getMapKeyDelim() != null) {
        properties.setProperty(.crtTblDesc
            .getMapKeyDelim());
      }
      if (crtTblDesc.getFieldEscape() != null) {
        properties.setProperty(.crtTblDesc
            .getFieldEscape());
      }
      if (crtTblDesc.getLineDelim() != null) {
        properties.setProperty(.crtTblDesc.getLineDelim());
      }
      // replace the default input & output file format with those found in
      // crtTblDesc
      Class c1 = Class.forName(crtTblDesc.getInputFormat());
      Class c2 = Class.forName(crtTblDesc.getOutputFormat());
      Class<? extends InputFormat> in_class = c1;
      Class<? extends HiveOutputFormatout_class = c2;
      ret.setInputFileFormatClass(in_class);
      ret.setOutputFileFormatClass(out_class);
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
      return null;
    }
    return ret;
  }

  
Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode. MetaDataTypedColumnsetSerDe is used because LazySimpleSerDe does not support a table with a single column "col" with type "array<string>".
  public static TableDesc getDefaultTableDesc(String separatorCode) {
    return new TableDesc(MetadataTypedColumnsetSerDe.class,
        TextInputFormat.classIgnoreKeyTextOutputFormat.class, Utilities
        .makeProperties(
        separatorCode));
  }

  
Generate the table descriptor for reduce key.
  public static TableDesc getReduceKeyTableDesc(List<FieldSchema> fieldSchemas,
      String order) {
    return new TableDesc(BinarySortableSerDe.class,
        SequenceFileInputFormat.class, SequenceFileOutputFormat.class,
        Utilities.makeProperties(., MetaStoreUtils
        .getColumnNamesFromFieldSchema(fieldSchemas),
        ., MetaStoreUtils
        .getColumnTypesFromFieldSchema(fieldSchemas),
        .order));
  }

  
Generate the table descriptor for Map-side join key.
  public static TableDesc getMapJoinKeyTableDesc(List<FieldSchema> fieldSchemas) {
    return new TableDesc(LazyBinarySerDe.class, SequenceFileInputFormat.class,
        SequenceFileOutputFormat.class, Utilities.makeProperties("columns",
        MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas),
        "columns.types", MetaStoreUtils
        .getColumnTypesFromFieldSchema(fieldSchemas),
        ."\\"));
  }

  
Generate the table descriptor for Map-side join key.
      List<FieldSchema> fieldSchemas) {
    return new TableDesc(LazyBinarySerDe.class, SequenceFileInputFormat.class,
        SequenceFileOutputFormat.class, Utilities.makeProperties("columns",
        MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas),
        "columns.types", MetaStoreUtils
        .getColumnTypesFromFieldSchema(fieldSchemas),
        ."\\"));
  }

  
Generate the table descriptor for intermediate files.
      List<FieldSchema> fieldSchemas) {
    return new TableDesc(LazyBinarySerDe.class, SequenceFileInputFormat.class,
        SequenceFileOutputFormat.class, Utilities.makeProperties(
        ., MetaStoreUtils
        .getColumnNamesFromFieldSchema(fieldSchemas),
        ., MetaStoreUtils
        .getColumnTypesFromFieldSchema(fieldSchemas),
        ."\\"));
  }

  
Generate the table descriptor for intermediate files.
  public static TableDesc getReduceValueTableDesc(List<FieldSchema> fieldSchemas) {
    return new TableDesc(LazyBinarySerDe.class, SequenceFileInputFormat.class,
        SequenceFileOutputFormat.class, Utilities.makeProperties(
        ., MetaStoreUtils
        .getColumnNamesFromFieldSchema(fieldSchemas),
        ., MetaStoreUtils
        .getColumnTypesFromFieldSchema(fieldSchemas),
        ."\\"));
  }

  
Convert the ColumnList to FieldSchema list. Adds uniontype for distinctColIndices.
  public static List<FieldSchema> getFieldSchemasFromColumnListWithLength(
      List<ExprNodeDesccolsList<List<Integer>> distinctColIndices,
      List<StringoutputColumnNamesint length,
      String fieldPrefix) {
    // last one for union column.
    List<FieldSchema> schemas = new ArrayList<FieldSchema>(length + 1);
    for (int i = 0; i < lengthi++) {
      schemas.add(MetaStoreUtils.getFieldSchemaFromTypeInfo(
          fieldPrefix + outputColumnNames.get(i), cols.get(i).getTypeInfo()));
    }
    List<TypeInfounionTypes = new ArrayList<TypeInfo>();
    for (List<IntegerdistinctCols : distinctColIndices) {
      List<Stringnames = new ArrayList<String>();
      List<TypeInfotypes = new ArrayList<TypeInfo>();
      int numExprs = 0;
      for (int i : distinctCols) {
        names.add(HiveConf.getColumnInternalName(numExprs));
        types.add(cols.get(i).getTypeInfo());
        numExprs++;
      }
      unionTypes.add(TypeInfoFactory.getStructTypeInfo(namestypes));
    }
    if (cols.size() - length > 0) {
      schemas.add(MetaStoreUtils.getFieldSchemaFromTypeInfo(
          fieldPrefix + outputColumnNames.get(length),
          TypeInfoFactory.getUnionTypeInfo(unionTypes)));
    }
    return schemas;
  }

  
Convert the ColumnList to FieldSchema list.
  public static List<FieldSchema> getFieldSchemasFromColumnList(
      List<ExprNodeDesccolsList<StringoutputColumnNamesint start,
      String fieldPrefix) {
    List<FieldSchema> schemas = new ArrayList<FieldSchema>(cols.size());
    for (int i = 0; i < cols.size(); i++) {
      schemas.add(MetaStoreUtils.getFieldSchemaFromTypeInfo(fieldPrefix
          + outputColumnNames.get(i + start), cols.get(i).getTypeInfo()));
    }
    return schemas;
  }

  
Convert the ColumnList to FieldSchema list.
  public static List<FieldSchema> getFieldSchemasFromColumnList(
      List<ExprNodeDesccolsString fieldPrefix) {
    List<FieldSchema> schemas = new ArrayList<FieldSchema>(cols.size());
    for (int i = 0; i < cols.size(); i++) {
      schemas.add(MetaStoreUtils.getFieldSchemaFromTypeInfo(fieldPrefix + i,
          cols.get(i).getTypeInfo()));
    }
    return schemas;
  }

  
Convert the RowSchema to FieldSchema list.
  public static List<FieldSchema> getFieldSchemasFromRowSchema(RowSchema row,
      String fieldPrefix) {
    ArrayList<ColumnInfoc = row.getSignature();
    return getFieldSchemasFromColumnInfo(cfieldPrefix);
  }

  
Convert the ColumnInfo to FieldSchema.
  public static List<FieldSchema> getFieldSchemasFromColumnInfo(
      ArrayList<ColumnInfocolsString fieldPrefix) {
    if ((cols == null) || (cols.size() == 0)) {
      return new ArrayList<FieldSchema>();
    }
    List<FieldSchema> schemas = new ArrayList<FieldSchema>(cols.size());
    for (int i = 0; i < cols.size(); i++) {
      String name = cols.get(i).getInternalName();
      if (name.equals(Integer.valueOf(i).toString())) {
        name = fieldPrefix + name;
      }
      schemas.add(MetaStoreUtils.getFieldSchemaFromTypeInfo(namecols.get(i)
          .getType()));
    }
    return schemas;
  }
  public static List<FieldSchema> sortFieldSchemas(List<FieldSchema> schema) {
    Collections.sort(schemanew Comparator<FieldSchema>() {
      @Override
      public int compare(FieldSchema o1, FieldSchema o2) {
        return o1.getName().compareTo(o2.getName());
      }
    });
    return schema;
  }

  
Create the reduce sink descriptor.

Parameters:
keyCols The columns to be stored in the key
valueCols The columns to be stored in the value
outputColumnNames The output columns names
tag The tag for this reducesink
partitionCols The columns for partitioning.
numReducers The number of reducers, set to -1 for automatic inference based on input data size.
Returns:
The reduceSinkDesc object.
  public static ReduceSinkDesc getReduceSinkDesc(
      ArrayList<ExprNodeDesckeyColsArrayList<ExprNodeDescvalueCols,
      List<StringoutputColumnNamesboolean includeKeyColsint tag,
      ArrayList<ExprNodeDescpartitionColsString orderint numReducers) {
    return getReduceSinkDesc(keyColskeyCols.size(), valueCols,
        new ArrayList<List<Integer>>(),
        includeKeyCols ? outputColumnNames.subList(0, keyCols.size()) :
          new ArrayList<String>(),
        includeKeyCols ? outputColumnNames.subList(keyCols.size(),
            outputColumnNames.size()) : outputColumnNames,
        includeKeyColstagpartitionColsordernumReducers);
  }

  
Create the reduce sink descriptor.

Parameters:
keyCols The columns to be stored in the key
numKeys number of distribution key numbers. Equals to group-by-key numbers usually.
valueCols The columns to be stored in the value
distinctColIndices column indices for distinct aggregate parameters
outputKeyColumnNames The output key columns names
outputValueColumnNames The output value columns names
tag The tag for this reducesink
partitionCols The columns for partitioning.
numReducers The number of reducers, set to -1 for automatic inference based on input data size.
Returns:
The reduceSinkDesc object.
  public static ReduceSinkDesc getReduceSinkDesc(
      final ArrayList<ExprNodeDesckeyColsint numKeys,
      ArrayList<ExprNodeDescvalueCols,
      List<List<Integer>> distinctColIndices,
      List<StringoutputKeyColumnNames,
      List<StringoutputValueColumnNames,
      boolean includeKeyColsint tag,
      ArrayList<ExprNodeDescpartitionColsString orderint numReducers) {
    TableDesc keyTable = null;
    TableDesc valueTable = null;
    ArrayList<StringoutputKeyCols = new ArrayList<String>();
    ArrayList<StringoutputValCols = new ArrayList<String>();
    if (includeKeyCols) {
          keyColsdistinctColIndicesoutputKeyColumnNamesnumKeys""),
          order);
      outputKeyCols.addAll(outputKeyColumnNames);
    } else {
          keyCols"reducesinkkey"),order);
     for (int i = 0; i < keyCols.size(); i++) {
        outputKeyCols.add("reducesinkkey" + i);
      }
    }
        valueColsoutputValueColumnNames, 0, ""));
    outputValCols.addAll(outputValueColumnNames);
    return new ReduceSinkDesc(keyColsnumKeysvalueColsoutputKeyCols,
        distinctColIndicesoutputValCols,
        tagpartitionColsnumReducerskeyTable,
        valueTable);
  }

  
Create the reduce sink descriptor.

Parameters:
keyCols The columns to be stored in the key
valueCols The columns to be stored in the value
outputColumnNames The output columns names
tag The tag for this reducesink
numPartitionFields The first numPartitionFields of keyCols will be partition columns. If numPartitionFields=-1, then partition randomly.
numReducers The number of reducers, set to -1 for automatic inference based on input data size.
Returns:
The reduceSinkDesc object.
  public static ReduceSinkDesc getReduceSinkDesc(
      ArrayList<ExprNodeDesckeyColsArrayList<ExprNodeDescvalueCols,
      List<StringoutputColumnNamesboolean includeKeyint tag,
      int numPartitionFieldsint numReducersthrows SemanticException {
    return getReduceSinkDesc(keyColskeyCols.size(), valueCols,
        new ArrayList<List<Integer>>(),
        includeKey ? outputColumnNames.subList(0, keyCols.size()) :
          new ArrayList<String>(),
        includeKey ?
            outputColumnNames.subList(keyCols.size(), outputColumnNames.size())
            : outputColumnNames,
        includeKeytagnumPartitionFieldsnumReducers);
  }

  
Create the reduce sink descriptor.

Parameters:
keyCols The columns to be stored in the key
numKeys number of distribution keys. Equals to group-by-key numbers usually.
valueCols The columns to be stored in the value
distinctColIndices column indices for distinct aggregates
outputKeyColumnNames The output key columns names
outputValueColumnNames The output value columns names
tag The tag for this reducesink
numPartitionFields The first numPartitionFields of keyCols will be partition columns. If numPartitionFields=-1, then partition randomly.
numReducers The number of reducers, set to -1 for automatic inference based on input data size.
Returns:
The reduceSinkDesc object.
  public static ReduceSinkDesc getReduceSinkDesc(
      ArrayList<ExprNodeDesckeyColsint numKeys,
      ArrayList<ExprNodeDescvalueCols,
      List<List<Integer>> distinctColIndices,
      List<StringoutputKeyColumnNamesList<StringoutputValueColumnNames,
      boolean includeKeyint tag,
      int numPartitionFieldsint numReducersthrows SemanticException {
    ArrayList<ExprNodeDescpartitionCols = null;
    if (numPartitionFields >= keyCols.size()) {
      partitionCols = keyCols;
    } else if (numPartitionFields >= 0) {
      partitionCols = new ArrayList<ExprNodeDesc>(numPartitionFields);
      for (int i = 0; i < numPartitionFieldsi++) {
        partitionCols.add(keyCols.get(i));
      }
    } else {
      // numPartitionFields = -1 means random partitioning
      partitionCols = new ArrayList<ExprNodeDesc>(1);
      partitionCols.add(TypeCheckProcFactory.DefaultExprProcessor
          .getFuncExprNodeDesc("rand"));
    }
    StringBuilder order = new StringBuilder();
    for (int i = 0; i < keyCols.size(); i++) {
      order.append("+");
    }
    return getReduceSinkDesc(keyColsnumKeysvalueColsdistinctColIndices,
        outputKeyColumnNamesoutputValueColumnNamesincludeKeytag,
        partitionColsorder.toString(), numReducers);
  }

  
Loads the storage handler (if one exists) for the given table and invokes HiveStorageHandler.configureTableJobProperties.

Parameters:
tableDesc table descriptor
    TableDesc tableDesc) {
    if (tableDesc == null) {
      return;
    }
    try {
      HiveStorageHandler storageHandler =
        HiveUtils.getStorageHandler(
          Hive.get().getConf(),
          tableDesc.getProperties().getProperty(
            org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE));
      if (storageHandler != null) {
        Map<StringStringjobProperties = new LinkedHashMap<StringString>();
        storageHandler.configureTableJobProperties(
          tableDesc,
          jobProperties);
        // Job properties are only relevant for non-native tables, so
        // for native tables, leave it null to avoid cluttering up
        // plans.
        if (!jobProperties.isEmpty()) {
          tableDesc.setJobProperties(jobProperties);
        }
      }
    } catch (HiveException ex) {
      throw new RuntimeException(ex);
    }
  }
  private PlanUtils() {
    // prevent instantiation
  }
New to GrepCode? Check out our FAQ X