Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
   /*
    * Licensed to the Apache Software Foundation (ASF) under one
    * or more contributor license agreements.  See the NOTICE file
    * distributed with this work for additional information
    * regarding copyright ownership.  The ASF licenses this file
    * to you under the Apache License, Version 2.0 (the
    * "License"); you may not use this file except in compliance
    * with the License.  You may obtain a copy of the License at
    *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  package org.apache.pig;
  
  import java.io.File;
  import java.net.URL;
  import java.util.Deque;
  import java.util.HashMap;
  import java.util.HashSet;
  import java.util.List;
  import java.util.Map;
  import java.util.Queue;
  import java.util.Set;
  
A class for Java programs to connect to Pig. Typically a program will create a PigServer instance. The programmer then registers queries using registerQuery() and retrieves results using openIterator() or store(). After doing so, the shutdown() method should be called to free any resources used by the current PigServer instance. Not doing so could result in a memory leak.
 
 public class PigServer {
 
     protected final Log log = LogFactory.getLog(getClass());
 
     public static final String PRETTY_PRINT_SCHEMA_PROPERTY = "pig.pretty.print.schema";
     private static final String PIG_LOCATION_CHECK_STRICT = "pig.location.check.strict";
 
     /*
      * The data structure to support grunt shell operations.
      * The grunt shell can only work on one graph at a time.
      * If a script is contained inside another script, the grunt
      * shell first saves the current graph on the stack and works
      * on a new graph. After the nested script is done, the grunt
      * shell pops up the saved graph and continues working on it.
      */
     protected final Deque<Graphgraphs = new LinkedList<Graph>();
 
     /*
      * The current Graph the grunt shell is working on.
      */
     private Graph currDAG;
 
     protected final PigContext pigContext;
 
     private String jobName;
 
     private String jobPriority;
 
     private final static AtomicInteger scopeCounter = new AtomicInteger(0);
 
     protected final String scope = constructScope();
 
 
     private boolean isMultiQuery = true;
     private boolean aggregateWarning = true;
 
     private boolean validateEachStatement = false;
     private boolean skipParseInRegisterForBatch = false;
 
     private String constructScope() {
         // scope servers for now as a session id
 
         // String user = System.getProperty("user.name", "DEFAULT_USER_ID");
         // String date = (new Date()).toString();
 
         // scope is not really used in the system right now. It will
         // however make your explain statements look lengthy if set to
         // username-date. For now let's simplify the scope, if a real
         // scope is needed again, we might need to update all the
         // operators to not include scope in their name().
         return "" + .incrementAndGet();
     }

    

Parameters:
execTypeString can be 'mapreduce' or 'local'. Local mode will use Hadoop's local job runner to execute the job on the local machine. Mapreduce mode will connect to a cluster to execute the job.
Throws:
org.apache.pig.backend.executionengine.ExecException
java.io.IOException
 
     public PigServer(String execTypeStringthrows ExecExceptionIOException {
         this(ExecType.fromString(execTypeString));
     }

    

Parameters:
execType execution type to start the engine. Local mode will use Hadoop's local job runner to execute the job on the local machine. Mapreduce mode will connect to a cluster to execute the job.
Throws:
org.apache.pig.backend.executionengine.ExecException
 
     public PigServer(ExecType execTypethrows ExecException {
         this(execType, PropertiesUtil.loadDefaultProperties());
     }
 
     public PigServer(ExecType execTypeProperties propertiesthrows ExecException {
         this(new PigContext(execTypeproperties));
     }
 
     public PigServer(ExecType execTypeConfiguration confthrows ExecException {
         this(new PigContext(execTypeconf));
     }
 
     public PigServer(PigContext contextthrows ExecException {
         this(contexttrue);
     }
 
     public PigServer(PigContext contextboolean connectthrows ExecException {
         this. = context;
          = new Graph(false);
 
          = "true".equalsIgnoreCase(.getProperties().getProperty("aggregate.warning"));
          = "true".equalsIgnoreCase(.getProperties().getProperty("opt.multiquery","true"));
 
                 .,
                 . + ":DefaultJobName");
 
         if (connect) {
             .connect();
         }
 
         addJarsFromProperties();
     }
 
     private void addJarsFromProperties() throws ExecException {
         //add jars from properties to extraJars
         String jar_str = .getProperties().getProperty("pig.additional.jars");
 
         if(jar_str != null){
             // Use File.pathSeparator (":" on Linux, ";" on Windows)
             // to correctly handle path aggregates as they are represented
             // on the Operating System.
             for(String jar : jar_str.split(.)){
                 try {
                     registerJar(jar);
                 } catch (IOException e) {
                     int errCode = 4010;
                     String msg =
                         "Failed to register jar :" + jar + ". Caught exception.";
                     throw new ExecException(
                             msg,
                             errCode,
                             .,
                             e
                     );
                 }
             }
         }
     }
 
     public PigContext getPigContext(){
         return ;
     }

    
Current DAG

Returns:
 
     public Graph getCurrentDAG() {
         return this.;
     }
    
Set the logging level to DEBUG.
 
     public void debugOn() {
         Logger.getLogger("org.apache.pig").setLevel(.);
         .getLog4jProperties().setProperty("log4j.logger.org.apache.pig"..toString());
     }

    
Set the logging level to the default.
 
     public void debugOff() {
         Logger.getLogger("org.apache.pig").setLevel(.getDefaultLogLevel());
         .getLog4jProperties().setProperty("log4j.logger.org.apache.pig".getDefaultLogLevel().toString());
     }

    
Set the default parallelism for this job

Parameters:
p default number of reducers to use for this job.
 
     public void setDefaultParallel(int p) {
         . = p;
     }

    
Starts batch execution mode.
 
     public void setBatchOn() {
         .debug("Create a new graph.");
 
         if ( != null) {
             .push();
         }
          = new Graph();
     }

    
Retrieve the current execution mode.

Returns:
true if the execution mode is batch; false otherwise.
 
     public boolean isBatchOn() {
         // Batch is on when there are multiple graphs on the
         // stack. That gives the right response even if multiquery was
         // turned off.
         return .size() > 0;
     }

    
Returns whether there is anything to process in the current batch.

Returns:
true if there are no stores to process in the current batch, false otherwise.
Throws:
org.apache.pig.impl.logicalLayer.FrontendException
 
     public boolean isBatchEmpty() throws FrontendException {
         if ( == null) {
             int errCode = 1083;
             String msg = "setBatchOn() must be called first.";
             throw new FrontendException(msgerrCode.);
         }
 
         return .isBatchEmpty();
     }

    
This method parses the scripts and builds the LogicalPlan. This method should be followed by executeBatch(boolean) with argument as false. Do Not use executeBatch() after calling this method as that will re-parse and build the script.

 
     public void parseAndBuild() throws IOException {
         if ( == null || !isBatchOn()) {
             int errCode = 1083;
             String msg = "setBatchOn() must be called first.";
             throw new FrontendException(msgerrCode.);
         }
         .parseQuery();
         .buildPlannull );
     }

    
Submits a batch of Pig commands for execution.

Returns:
list of jobs being executed
Throws:
java.io.IOException
 
     public List<ExecJobexecuteBatch() throws IOException {
         return executeBatch(true);
     }

    
Submits a batch of Pig commands for execution. Parse and build of script should be skipped if user called parseAndBuild() before. Pass false as an argument in which case.

Parameters:
parseAndBuild
Returns:
Throws:
java.io.IOException
 
     public List<ExecJobexecuteBatch(boolean parseAndBuildthrows IOException {
         if (parseAndBuild) {
             parseAndBuild();
         }
 
         PigStats stats = null;
         if( ! ) {
             // ignore if multiquery is off
             stats = PigStats.get();
         } else {
             stats = execute();
         }
 
         return getJobs(stats);
     }

    
Retrieves a list of Job objects from the PigStats object

Parameters:
stats
Returns:
A list of ExecJob objects
 
     protected List<ExecJobgetJobs(PigStats stats) {
         LinkedList<ExecJobjobs = new LinkedList<ExecJob>();
         JobGraph jGraph = stats.getJobGraph();
         Iterator<JobStatsiter = jGraph.iterator();
         while (iter.hasNext()) {
             JobStats js = iter.next();
             for (OutputStats output : js.getOutputs()) {
                 if (js.isSuccessful()) {
                     jobs.add(new HJob(..output
                             .getPOStore(), output.getAlias(), stats));
                 } else {
                     HJob hjob = new HJob(..output
                             .getPOStore(), output.getAlias(), stats);
                     hjob.setException(js.getException());
                     jobs.add(hjob);
                 }
             }
         }
         return jobs;
     }

    
Discards a batch of Pig commands.

 
     public void discardBatch() throws FrontendException {
         if ( == null || !isBatchOn()) {
             int errCode = 1083;
             String msg = "setBatchOn() must be called first.";
             throw new FrontendException(msgerrCode.);
         }
 
          = .pop();
     }

    
Add a path to be skipped while automatically shipping binaries for streaming.

Parameters:
path path to be skipped
 
     public void addPathToSkip(String path) {
         .addPathToSkip(path);
     }

    
Defines an alias for the given function spec. This is useful for functions that require arguments to the constructor.

Parameters:
function - the new function alias to define.
funcSpec - the FuncSpec object representing the name of the function class and any arguments to constructor.
 
     public void registerFunction(String functionFuncSpec funcSpec) {
         .registerFunction(functionfuncSpec);
     }

    
Defines an alias for the given streaming command.

Parameters:
commandAlias - the new command alias to define
command - streaming command to be executed
 
     public void registerStreamingCommand(String commandAliasStreamingCommand command) {
         .registerStreamCmd(commandAliascommand);
     }
 
     private URL locateJarFromResources(String jarNamethrows IOException {
         Enumeration<URLurls = ClassLoader.getSystemResources(jarName);
         URL resourceLocation = null;
 
         if (urls.hasMoreElements()) {
             resourceLocation = urls.nextElement();
         }
 
         if (urls.hasMoreElements()) {
             StringBuffer sb = new StringBuffer("Found multiple resources that match ");
             sb.append(jarName);
             sb.append(": ");
             sb.append(resourceLocation);
 
             while (urls.hasMoreElements()) {
                 sb.append(urls.nextElement());
                 sb.append("; ");
             }
 
             .debug(sb.toString());
         }
 
         return resourceLocation;
     }

    
Registers a jar file. Name of the jar file can be an absolute or relative path. If multiple resources are found with the specified name, the first one is registered as returned by getSystemResources. A warning is issued to inform the user.

Parameters:
name of the jar file to register
Throws:
java.io.IOException
 
     public void registerJar(String namethrows IOException {
         if (.hasJar(name)) {
             .debug("Ignoring duplicate registration for jar " + name);
             return;
         }
 
         // first try to locate jar via system resources
         // if this fails, try by using "name" as File (this preserves
         // compatibility with case when user passes absolute path or path
         // relative to current working directory.)
         if (name != null) {
             if (name.isEmpty()) {
                 .warn("Empty string specified for jar path");
                 return;
             }
 
             URL resource = locateJarFromResources(name);
 
             if (resource == null) {
                 FetchFileRet[] files = FileLocalizer.fetchFiles(.getProperties(), name);
                 for (FetchFileRet file : files) {
                     File f = file.file;
                     if (!f.canRead()) {
                         int errCode = 4002;
                         String msg = "Can't read jar file: " + name;
                         throw new FrontendException(msgerrCode.);
                     }
 
                     .addJar(f.toURI().toURL(), name);
                 }
             } else {
                 .addJar(resourcename);
             }
         }
     }

    
Universal Scripting Language Support, see PIG-928

Parameters:
path path of the script file
scriptingLang language keyword or scriptingEngine used to interpret the script
namespace namespace defined for functions of this script
Throws:
java.io.IOException
 
     public void registerCode(String pathString scriptingLangString namespace)
                              throws IOException {
         if (..containsKey(path) &&
             ..get(path).equals(namespace)) {
             .debug("Ignoring duplicate registration for scripting udf file " + path + " in namespace " + namespace);
             return;
         } else {
             ..put(pathnamespace);
         }
 
         File f = FileLocalizer.fetchFile(.getProperties(), path).;
         if (!f.canRead()) {
             int errCode = 4002;
             String msg = "Can't read file: " + path;
             throw new FrontendException(msgerrCode,
                     .);
         }
         String cwd = new File(".").getCanonicalPath();
         String filePath = f.getCanonicalPath();
         //Use the relative path in the jar, if the path specified is relative
         String nameInJar = filePath.equals(cwd + . + path) ?
                 filePath.substring(cwd.length() + 1) : filePath;
         .addScriptFile(nameInJarfilePath);
         if(scriptingLang != null) {
             ScriptEngine se = ScriptEngine.getInstance(scriptingLang);
             se.registerFunctions(nameInJarnamespace);
         }
     }

    
Register a query with the Pig runtime. The query is parsed and registered, but it is not executed until it is needed.

Parameters:
query a Pig Latin expression to be evaluated.
startLine line number of the query within the whole script
Throws:
java.io.IOException
 
     public void registerQuery(String queryint startLinethrows IOException {
     }

    
Register a query with the Pig runtime. The query is parsed and registered, but it is not executed until it is needed. Equivalent to calling registerQuery(java.lang.String,int) with startLine set to 1.

Parameters:
query a Pig Latin expression to be evaluated.
Throws:
java.io.IOException
 
     public void registerQuery(String querythrows IOException {
         registerQuery(query, 1);
     }

    
Register a pig script from InputStream source which is more general and extensible the pig script can be from local file, then you can use FileInputStream. or pig script can be in memory which you build it dynamically, the you can use ByteArrayInputStream even pig script can be in remote machine, which you get wrap it as SocketInputStream

Parameters:
in
Throws:
java.io.IOException
 
     public void registerScript(InputStream inthrows IOException{
         registerScript(innullnull);
     }

    
Register a pig script from InputStream source which is more general and extensible the pig script can be from local file, then you can use FileInputStream. or pig script can be in memory which you build it dynamically, the you can use ByteArrayInputStream even pig script can be in remote machine, which you get wrap it as SocketInputStream. The parameters in the pig script will be substituted with the values in params

Parameters:
in
params the key is the parameter name, and the value is the parameter value
Throws:
java.io.IOException
 
     public void registerScript(InputStream inMap<String,Stringparamsthrows IOException{
         registerScript(inparamsnull);
     }

    
Register a pig script from InputStream source which is more general and extensible the pig script can be from local file, then you can use FileInputStream. or pig script can be in memory which you build it dynamically, the you can use ByteArrayInputStream even pig script can be in remote machine, which you get wrap it as SocketInputStream The parameters in the pig script will be substituted with the values in the parameter files

Parameters:
in
paramsFiles files which have the parameter setting
Throws:
java.io.IOException
 
     public void registerScript(InputStream inList<StringparamsFilesthrows IOException {
         registerScript(innullparamsFiles);
     }

    
Register a pig script from InputStream.
The pig script can be from local file, then you can use FileInputStream. Or pig script can be in memory which you build it dynamically, the you can use ByteArrayInputStream Pig script can even be in remote machine, which you get wrap it as SocketInputStream.
The parameters in the pig script will be substituted with the values in the map and the parameter files. The values in params Map will override the value in parameter file if they have the same parameter

Parameters:
in
params the key is the parameter name, and the value is the parameter value
paramsFiles files which have the parameter setting
Throws:
java.io.IOException
 
     public void registerScript(InputStream inMap<String,Stringparams,List<StringparamsFilesthrows IOException {
         try {
             String substituted = .doParamSubstitution(inparamMapToList(params), paramsFiles);
             GruntParser grunt = new GruntParser(new StringReader(substituted));
             grunt.setInteractive(false);
             grunt.setParams(this);
             grunt.parseStopOnError(true);
         } catch (org.apache.pig.tools.pigscript.parser.ParseException e) {
             .error(e.getLocalizedMessage());
             throw new IOException(e.getCause());
         }
     }
 
     protected List<StringparamMapToList(Map<StringStringparams) {
         List<StringparamList = new ArrayList<String>();
         if (params != null) {
             for (Map.Entry<StringStringentry : params.entrySet()) {
                 paramList.add(entry.getKey() + "=" + entry.getValue());
             }
         }
         return paramList;
     }

    
Creates a clone of the current DAG

Returns:
A Graph object which is a clone of the current DAG
Throws:
java.io.IOException
 
     protected Graph getClonedGraph() throws IOException {
         Graph graph = .duplicate();
 
         if (graph == null) {
             int errCode = 2127;
             String msg = "Cloning of plan failed.";
             throw new FrontendException(msgerrCode.);
         }
         return graph;
     }


    
Register a query with the Pig runtime. The query will be read from the indicated file.

Parameters:
fileName file to read query from.
Throws:
java.io.IOException
 
     public void registerScript(String fileNamethrows IOException {
         registerScript(fileNamenullnull);
     }

    
Register a pig script file. The parameters in the file will be substituted with the values in params

Parameters:
fileName pig script file
params the key is the parameter name, and the value is the parameter value
Throws:
java.io.IOException
 
     public void registerScript(String fileNameMap<String,Stringparamsthrows IOException {
         registerScript(fileNameparamsnull);
     }



    
Register a pig script file. The parameters in the file will be substituted with the values in the parameter files

Parameters:
fileName pig script file
paramsFiles files which have the parameter setting
Throws:
java.io.IOException
 
     public void registerScript(String fileNameList<StringparamsFilesthrows IOException {
         registerScript(fileNamenullparamsFiles);
     }

    
Register a pig script file. The parameters in the file will be substituted with the values in the map and the parameter files The values in params Map will override the value in parameter file if they have the same parameter

Parameters:
fileName pig script
params the key is the parameter name, and the value is the parameter value
paramsFiles files which have the parameter setting
Throws:
java.io.IOException
 
     public void registerScript(String fileNameMap<String,Stringparams,List<StringparamsFilesthrows IOException {
         FileInputStream fis = null;
         try{
             fis = new FileInputStream(fileName);
             registerScript(fisparamsparamsFiles);
         }catch (FileNotFoundException e){
             .error(e.getLocalizedMessage());
             throw new IOException(e.getCause());
         } finally {
             if (fis != null) {
                 fis.close();
             }
         }
     }

    
Intended to be used by unit tests only. Print a list of all aliases in in the current Pig Latin script. Output is written to System.out.

 
     public void printAliases () throws FrontendException {
         ..println("aliases: " + .getAliasOp().keySet());
     }

    
Write the schema for an alias to System.out.

Parameters:
alias Alias whose schema will be written out
Returns:
Schema of alias dumped
Throws:
java.io.IOException
 
     public Schema dumpSchema(String aliasthrows IOException {
         try {
             if ("@".equals(alias)) {
                 alias = getLastRel();
             }
             LogicalRelationalOperator op = getOperatorForAliasalias );
             LogicalSchema schema = op.getSchema();
 
             boolean pretty = "true".equals(.getProperties()
                                    .getProperty());
 
             if (schema != null) {
                 Schema s = org.apache.pig.newplan.logical.Util.translateSchema(schema);
                 ..println(alias + ": " + (pretty ? s.prettyPrint() : s.toString()));
                 return s;
             } else {
                 ..println("Schema for " + alias + " unknown.");
                 return null;
             }
         } catch (FrontendException fee) {
             int errCode = 1001;
             String msg = "Unable to describe schema for alias " + alias;
             throw new FrontendException (msgerrCode.falsenullfee);
         }
     }

    
Write the schema for a nestedAlias to System.out. Denoted by alias::nestedAlias.

Parameters:
alias Alias whose schema has nestedAlias
nestedAlias Alias whose schema will be written out
Returns:
Schema of alias dumped
Throws:
java.io.IOException
 
     public Schema dumpSchemaNested(String aliasString nestedAliasthrows IOException {
         if ("@".equals(alias)) {
             alias = getLastRel();
         }
         Operator op = getOperatorForAliasalias );
         ifop instanceof LOForEach ) {
             LogicalSchema nestedSc = ((LOForEach)op).dumpNestedSchema(aliasnestedAlias);
             if (nestedSc!=null) {
                 Schema s = org.apache.pig.newplan.logical.Util.translateSchema(nestedSc);
                 ..println(alias"::" + nestedAlias + ": " + s.toString());
                 return s;
             }
             else {
                 ..println("Schema for "alias"::" + nestedAlias + " unknown.");
                 return null;
             }
         }
         else {
             int errCode = 1001;
             String msg = "Unable to describe schema for " + alias + "::" + nestedAlias;
             throw new FrontendException (msgerrCode.falsenull);
         }
     }

    
Set the name of the job. This name will get translated to mapred.job.name.

Parameters:
name of job
 
     public void setJobName(String name) {
          = . + ":" + name;
     }

    
Set Hadoop job priority. This value will get translated to mapred.job.priority.

Parameters:
priority valid values are found in org.apache.hadoop.mapred.JobPriority
 
     public void setJobPriority(String priority) {
          = priority;
     }

    
Executes a Pig Latin script up to and including indicated alias. That is, if a user does:
 PigServer server = new PigServer();
 server.registerQuery("A = load 'foo';");
 server.registerQuery("B = filter A by $0 > 0;");
 server.registerQuery("C = order B by $1;");
 
Then
 server.openIterator("B");
 
filtered but unsorted data will be returned. If instead a user does
 server.openIterator("C");
 
filtered and sorted data will be returned.

Parameters:
id Alias to open iterator for
Returns:
iterator of tuples returned from the script
Throws:
java.io.IOException
 
     public Iterator<TupleopenIterator(String idthrows IOException {
         try {
             if != null ) {
                 .getProperties().setProperty. );
             }
             ExecJob job = store(id, FileLocalizer.getTemporaryPath()
                     .toString(), Utils.getTmpFileCompressorName()
                     + "()");
 
             // invocation of "execute" is synchronous!
 
             if (job.getStatus() == .) {
                 return job.getResults();
             } else if (job.getStatus() == .
                     && job.getException() != null) {
                 // throw the backend exception in the failed case
                 Exception e = job.getException();
                 int errCode = 1066;
                 String msg = "Unable to open iterator for alias " + id
                         + ". Backend error : " + e.getMessage();
                 throw new FrontendException(msgerrCode.e);
             } else {
                 throw new IOException("Job terminated with anomalous status "
                         + job.getStatus().toString());
             }
         } catch (FrontendException e) {
             throw e;
         } catch (Exception e) {
             int errCode = 1066;
             String msg = "Unable to open iterator for alias " + id;
             throw new FrontendException(msgerrCode.e);
         }
     }

    
Executes a Pig Latin script up to and including indicated alias and stores the resulting records into a file. That is, if a user does:
 PigServer server = new PigServer();
 server.registerQuery("A = load 'foo';");
 server.registerQuery("B = filter A by $0 > 0;");
 server.registerQuery("C = order B by $1;");
 
Then
 server.store("B", "bar");
 
filtered but unsorted data will be stored to the file bar. If instead a user does
 server.store("C", "bar");
 
filtered and sorted data will be stored to the file bar. Equivalent to calling store(java.lang.String,java.lang.String,java.lang.String) with org.apache.pig.PigStorage as the store function.

Parameters:
id The alias to store
filename The file to which to store to
Returns:
org.apache.pig.backend.executionengine.ExecJob containing information about this job
Throws:
java.io.IOException
 
     public ExecJob store(String idString filenamethrows IOException {
         return store(idfilenamePigStorage.class.getName() + "()");   // SFPig is the default store function
     }

    
Executes a Pig Latin script up to and including indicated alias and stores the resulting records into a file. That is, if a user does:
 PigServer server = new PigServer();
 server.registerQuery("A = load 'foo';");
 server.registerQuery("B = filter A by $0 > 0;");
 server.registerQuery("C = order B by $1;");
 
Then
 server.store("B", "bar", "mystorefunc");
 
filtered but unsorted data will be stored to the file bar using mystorefunc. If instead a user does
 server.store("C", "bar", "mystorefunc");
 
filtered and sorted data will be stored to the file bar using mystorefunc.

Parameters:
id The alias to store
filename The file to which to store to
func store function to use
Returns:
org.apache.pig.backend.executionengine.ExecJob containing information about this job
Throws:
java.io.IOException
 
     public ExecJob store(String idString filenameString func)
             throws IOException {
         PigStats stats = storeEx(idfilenamefunc);
         if (stats.getOutputStats().size() < 1) {
             throw new IOException("Couldn't retrieve job.");
         }
         OutputStats output = stats.getOutputStats().get(0);
 
         if(stats.isSuccessful()){
             return  new HJob(.output
                     .getPOStore(), output.getAlias(), stats);
         }else{
             HJob job = new HJob(.,
                     output.getPOStore(), output.getAlias(), stats);
 
             //check for exception
             Exception ex = null;
             for(JobStats js : stats.getJobGraph()){
                 if(js.getException() != null) {
                     ex = js.getException();
                 }
             }
             job.setException(ex);
             return job;
         }
     }
 
     private PigStats storeEx(String aliasString filenameString func)
     throws IOException {
         if ("@".equals(alias)) {
             alias = getLastRel();
         }
         .parseQuery();
         .buildPlanalias );
 
         try {
             QueryParserUtils.attachStorePlan(.filenamefunc.getOperatoralias ), alias);
             .compile();
             return executeCompiledLogicalPlan();
         } catch (PigException e) {
             int errCode = 1002;
             String msg = "Unable to store alias " + alias;
             throw new PigException(msgerrCode.e);
         }
     }

    
Provide information on how a pig query will be executed. For now this information is very developer focussed, and probably not very useful to the average user.

Parameters:
alias Name of alias to explain.
stream PrintStream to write explanation to.
Throws:
java.io.IOException if the requested alias cannot be found.
 
     public void explain(String alias,
                         PrintStream streamthrows IOException {
         explain(alias"text"truefalsestreamstreamstream);
     }

    
Provide information on how a pig query will be executed.

Parameters:
alias Name of alias to explain.
format Format in which the explain should be printed. If text, then the plan will be printed in plain text. Otherwise, the execution plan will be printed in DOT format.
verbose Controls the amount of information printed
markAsExecute When set will treat the explain like a call to execute in the respoect that all the pending stores are marked as complete.
lps Stream to print the logical tree
pps Stream to print the physical tree
eps Stream to print the execution tree
Throws:
java.io.IOException if the requested alias cannot be found.
    @SuppressWarnings("unchecked")
    public void explain(String alias,
                        String format,
                        boolean verbose,
                        boolean markAsExecute,
                        PrintStream lps,
                        PrintStream pps,
                        PrintStream epsthrows IOException {
        try {
            . = true;
            buildStorePlanalias );
            
            //Only add root xml node if all plans are being written to same stream.
            if (format == "xml" && lps == eps) {
                lps.println("<plan>");
            }
            PhysicalPlan pp = compilePp();
            ..explain(lpsformatverbose);
            if..size() == 0 ) {
                if (format == "xml" && lps == eps) {
                    lps.println("</plan>");
                }
                return;
            }
            pp.explain(ppsformatverbose);
            MapRedUtil.checkLeafIsStore(pp);
            MapReduceLauncher launcher = new MapReduceLauncher();
            launcher.explain(ppepsformatverbose);
            if (format.equals("xml") && lps == eps) {
                lps.println("</plan>");
            }
            
            if (markAsExecute) {
                .markAsExecuted();
            }
        } catch (Exception e) {
            int errCode = 1067;
            String msg = "Unable to explain alias " + alias;
            throw new FrontendException(msgerrCode.e);
        } finally {
            . = false;
        }
    }

    
Returns the unused byte capacity of an HDFS filesystem. This value does not take into account a replication factor, as that can vary from file to file. Thus if you are using this to determine if you data set will fit in the HDFS, you need to divide the result of this call by your specific replication setting.

Returns:
unused byte capacity of the file system.
Throws:
java.io.IOException
    public long capacity() throws IOException {
        if (.getExecType() == .) {
            throw new IOException("capacity only supported for non-local execution");
        }
        else {
            DataStorage dds = .getDfs();
            Map<StringObjectstats = dds.getStatistics();
            String rawCapacityStr = (Stringstats.get(.);
            String rawUsedStr = (Stringstats.get(.);
            if ((rawCapacityStr == null) || (rawUsedStr == null)) {
                throw new IOException("Failed to retrieve capacity stats");
            }
            long rawCapacityBytes = new Long(rawCapacityStr).longValue();
            long rawUsedBytes = new Long(rawUsedStr).longValue();
            return rawCapacityBytes - rawUsedBytes;
        }
    }

    
Returns the length of a file in bytes which exists in the HDFS (accounts for replication).

Parameters:
filename
Returns:
length of the file in bytes
Throws:
java.io.IOException
    public long fileSize(String filenamethrows IOException {
        DataStorage dfs = .getDfs();
        ElementDescriptor elem = dfs.asElement(filename);
        Map<StringObjectstats = elem.getStatistics();
        long length = (Longstats.get(.);
        int replication = (Shortstats
                .get(.);
        return length * replication;
    }

    
Test whether a file exists.

Parameters:
filename to test
Returns:
true if file exists, false otherwise
Throws:
java.io.IOException
    public boolean existsFile(String filenamethrows IOException {
        ElementDescriptor elem = .getDfs().asElement(filename);
        return elem.exists();
    }

    
Delete a file.

Parameters:
filename to delete
Returns:
true
Throws:
java.io.IOException
    public boolean deleteFile(String filenamethrows IOException {
        ElementDescriptor elem = .getDfs().asElement(filename);
        elem.delete();
        return true;
    }

    
Rename a file.

Parameters:
source file to rename
target new file name
Returns:
true
Throws:
java.io.IOException
    public boolean renameFile(String sourceString targetthrows IOException {
        .rename(sourcetarget);
        return true;
    }

    
Make a directory.

Parameters:
dirs directory to make
Returns:
true
Throws:
java.io.IOException
    public boolean mkdirs(String dirsthrows IOException {
        ContainerDescriptor container = .getDfs().asContainer(dirs);
        container.create();
        return true;
    }

    
List the contents of a directory.

Parameters:
dir name of directory to list
Returns:
array of strings, one for each file name
Throws:
java.io.IOException
    public String[] listPaths(String dirthrows IOException {
        Collection<StringallPaths = new ArrayList<String>();
        ContainerDescriptor container = .getDfs().asContainer(dir);
        Iterator<ElementDescriptoriter = container.iterator();
        while (iter.hasNext()) {
            ElementDescriptor elem = iter.next();
            allPaths.add(elem.toString());
        }
        String[] type = new String[1];
        return allPaths.toArray(type);
    }

    
Return a map containing the logical plan associated with each alias.

Returns:
map
    public Map<StringLogicalPlangetAliases() {
        Map<StringLogicalPlanaliasPlans = new HashMap<StringLogicalPlan>();
        for (LogicalRelationalOperator op : .getAliases().keySet()) {
            String alias = op.getAlias();
            if(null != alias) {
                aliasPlans.put(alias.getAliases().get(op));
            }
        }
        return aliasPlans;
    }

    
Reclaims resources used by this instance of PigServer. This method deletes all temporary files generated by the current thread while executing Pig commands.
    public void shutdown() {
        // clean-up activities
        // TODO: reclaim scope to free up resources. Currently
        // this is not implemented and throws an exception
        // hence, for now, we won't call it.
        //
        // pigContext.getExecutionEngine().reclaimScope(this.scope);
        FileLocalizer.deleteTempFiles();
    }

    
Get the set of all current aliases.

Returns:
set
    public Set<StringgetAliasKeySet() {
        return .getAliasOp().keySet();
    }
    public Map<OperatorDataBaggetExamples(String aliasthrows IOException {
        try {
            if (.isBatchOn() && alias != null) {
                .parseQuery();
                .buildPlannull );
                execute();
            }
            .parseQuery();
            .buildPlanalias );
            .compile();
        } catch (IOException e) {
            //Since the original script is parsed anyway, there should not be an
            //error in this parsing. The only reason there can be an error is when
            //the files being loaded in load don't exist anymore.
            e.printStackTrace();
        }
        ExampleGenerator exgen = new ExampleGenerator. );
        try {
            return exgen.getExamples();
        } catch (ExecException e) {
            e.printStackTrace(.);
            throw new IOException("ExecException" , e);
        } catch (Exception e) {
            e.printStackTrace(.);
            throw new IOException("Exception "e);
        }
    }
    public void printHistory(boolean withNumbers) {
        List<Stringsc = .getScriptCache();
        if(!sc.isEmpty()) {
            for(int i = 0 ; i < sc.size(); i++) {
                if(withNumbers..print((i+1)+"   ");
                ..println(sc.get(i));
            }
        }
    }
    private void buildStorePlan(String aliasthrows IOException {
        .parseQuery();
        .buildPlanalias );
        if( !isBatchOn() || alias != null ) {
            // MRCompiler needs a store to be the leaf - hence
            // add a store to the plan to explain
            QueryParserUtils.attachStorePlan(."fakefile"null.getOperatoralias ),
                    "fake" );
        }
        .compile();
    }

    
Compile and execute the current plan.

Returns:
Throws:
java.io.IOException
    private PigStats execute() throws IOException {
        if != null ) {
        }
        // In this plan, all stores in the plan will be executed. They should be ignored if the plan is reused.
        .countExecutedStores();
        .compile();
        if..size() == 0 ) {
           return PigStats.get();
        }
        .getProperties().setProperty("pig.logical.plan.signature"..getSignature());
        PigStats stats = executeCompiledLogicalPlan();
        return stats;
    }
        // discover pig features used in this script
        ScriptState.get().setScriptFeatures. );
        PhysicalPlan pp = compilePp();
        return launchPlan(pp"job_pigexec_");
    }

    
A common method for launching the jobs according to the physical plan

Parameters:
pp The physical plan
jobName A String containing the job name to be used
Returns:
The PigStats object
Throws:
org.apache.pig.backend.executionengine.ExecException
org.apache.pig.impl.logicalLayer.FrontendException
    protected PigStats launchPlan(PhysicalPlan ppString jobNamethrows ExecExceptionFrontendException {
        MapReduceLauncher launcher = new MapReduceLauncher();
        PigStats stats = null;
        try {
            stats = launcher.launchPig(ppjobName);
        } catch (Exception e) {
            // There are a lot of exceptions thrown by the launcher.  If this
            // is an ExecException, just let it through.  Else wrap it.
            if (e instanceof ExecException){
                throw (ExecException)e;
            } else if (e instanceof FrontendException) {
                throw (FrontendException)e;
            } else {
                int errCode = 2043;
                String msg = "Unexpected error during execution.";
                throw new ExecException(msgerrCode.e);
            }
        } finally {
            launcher.reset();
        }
        for (OutputStats output : stats.getOutputStats()) {
            if (!output.isSuccessful()) {
                POStore store = output.getPOStore();
                try {
                    store.getStoreFunc().cleanupOnFailure(
                            store.getSFile().getFileName(),
                            new Job(output.getConf()));
                } catch (IOException e) {
                    throw new ExecException(e);
                }
            } else {
                POStore store = output.getPOStore();
                try {
                    store.getStoreFunc().cleanupOnSuccess(
                            store.getSFile().getFileName(),
                            new Job(output.getConf()));
                } catch (IOException e) {