Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   *
   * This Source Code Form is subject to the terms of the Mozilla Public
   * License, v. 2.0. If a copy of the MPL was not distributed with this
   * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  
  package org.mozilla.javascript;
  
The following class save decompilation information about the source. Source information is returned from the parser as a String associated with function nodes and with the toplevel script. When saved in the constant pool of a class, this string will be UTF-8 encoded, and token values will occupy a single byte. Source is saved (mostly) as token numbers. The tokens saved pretty much correspond to the token stream of a 'canonical' representation of the input program, as directed by the parser. (There were a few cases where tokens could have been left out where decompiler could easily reconstruct them, but I left them in for clarity). (I also looked adding source collection to TokenStream instead, where I could have limited the changes to a few lines in getToken... but this wouldn't have saved any space in the resulting source representation, and would have meant that I'd have to duplicate parser logic in the decompiler to disambiguate situations where newlines are important.) The function decompile expands the tokens back into their string representations, using simple lookahead to correct spacing and indentation. Assignments are saved as two-token pairs (Token.ASSIGN, op). Number tokens are stored inline, as a NUMBER token, a character representing the type, and either 1 or 4 characters representing the bit-encoding of the number. String types NAME, STRING and OBJECT are currently stored as a token type, followed by a character giving the length of the string (assumed to be less than 2^16), followed by the characters of the string inlined into the source string. Changing this to some reference to to the string in the compiled class' constant pool would probably save a lot of space... but would require some method of deriving the final constant pool entry from information available at parse time.
 
 public class Decompiler
 {
    
Flag to indicate that the decompilation should omit the function header and trailing brace.
 
     public static final int ONLY_BODY_FLAG = 1 << 0;

    
Flag to indicate that the decompilation generates toSource result.
 
     public static final int TO_SOURCE_FLAG = 1 << 1;

    
Decompilation property to specify initial ident value.
 
     public static final int INITIAL_INDENT_PROP = 1;

    
Decompilation property to specify default identation offset.
 
     public static final int INDENT_GAP_PROP = 2;

    
Decompilation property to specify identation offset for case labels.
 
     public static final int CASE_GAP_PROP = 3;
 
     // Marker to denote the last RC of function so it can be distinguished from
     // the last RC of object literals in case of function expressions
     private static final int FUNCTION_END = . + 1;
 
     {
         return sourceToString(0);
     }
 
     int getCurrentOffset()
     {
         return ;
     }
 
     int markFunctionStart(int functionType)
     {
         int savedOffset = getCurrentOffset();
         addToken(.);
         append((char)functionType);
         return savedOffset;
     }
 
     int markFunctionEnd(int functionStart)
     {
         int offset = getCurrentOffset();
         append((char));
         return offset;
     }
    void addToken(int token)
    {
        if (!(0 <= token && token <= .))
            throw new IllegalArgumentException();
        append((char)token);
    }
    void addEOL(int token)
    {
        if (!(0 <= token && token <= .))
            throw new IllegalArgumentException();
        append((char)token);
        append((char).);
    }
    void addName(String str)
    {
        addToken(.);
        appendString(str);
    }
    void addString(String str)
    {
        addToken(.);
        appendString(str);
    }
    void addRegexp(String regexpString flags)
    {
        addToken(.);
        appendString('/' + regexp + '/' + flags);
    }
    void addNumber(double n)
    {
        addToken(.);
        /* encode the number in the source stream.
         * Save as NUMBER type (char | char char char char)
         * where type is
         * 'D' - double, 'S' - short, 'J' - long.
         * We need to retain float vs. integer type info to keep the
         * behavior of liveconnect type-guessing the same after
         * decompilation.  (Liveconnect tries to present 1.0 to Java
         * as a float/double)
         * OPT: This is no longer true. We could compress the format.
         * This may not be the most space-efficient encoding;
         * the chars created below may take up to 3 bytes in
         * constant pool UTF-8 encoding, so a Double could take
         * up to 12 bytes.
         */
        long lbits = (long)n;
        if (lbits != n) {
            // if it's floating point, save as a Double bit pattern.
            // (12/15/97 our scanner only returns Double for f.p.)
            lbits = Double.doubleToLongBits(n);
            append('D');
            append((char)(lbits >> 48));
            append((char)(lbits >> 32));
            append((char)(lbits >> 16));
            append((char)lbits);
        }
        else {
            // we can ignore negative values, bc they're already prefixed
            // by NEG
               if (lbits < 0) Kit.codeBug();
            // will it fit in a char?
            // this gives a short encoding for integer values up to 2^16.
            if (lbits <= .) {
                append('S');
                append((char)lbits);
            }
            else { // Integral, but won't fit in a char. Store as a long.
                append('J');
                append((char)(lbits >> 48));
                append((char)(lbits >> 32));
                append((char)(lbits >> 16));
                append((char)lbits);
            }
        }
    }
    private void appendString(String str)
    {
        int L = str.length();
        int lengthEncodingSize = 1;
        if (L >= 0x8000) {
            lengthEncodingSize = 2;
        }
        int nextTop =  + lengthEncodingSize + L;
        if (nextTop > .) {
            increaseSourceCapacity(nextTop);
        }
        if (L >= 0x8000) {
            // Use 2 chars to encode strings exceeding 32K, were the highest
            // bit in the first char indicates presence of the next byte
            [] = (char)(0x8000 | (L >>> 16));
            ++;
        }
        [] = (char)L;
        ++;
        str.getChars(0, L);
         = nextTop;
    }
    private void append(char c)
    {
        if ( == .) {
            increaseSourceCapacity( + 1);
        }
        [] = c;
        ++;
    }
    private void increaseSourceCapacity(int minimalCapacity)
    {
        // Call this only when capacity increase is must
        if (minimalCapacity <= .) Kit.codeBug();
        int newCapacity = . * 2;
        if (newCapacity < minimalCapacity) {
            newCapacity = minimalCapacity;
        }
        char[] tmp = new char[newCapacity];
        System.arraycopy(, 0, tmp, 0, );
         = tmp;
    }
    private String sourceToString(int offset)
    {
        if (offset < 0 ||  < offset) Kit.codeBug();
        return new String(offset - offset);
    }

    
Decompile the source information associated with this js function/script back into a string. For the most part, this just means translating tokens back to their string representations; there's a little bit of lookahead logic to decide the proper spacing/indentation. Most of the work in mapping the original source to the prettyprinted decompiled version is done by the parser.

Parameters:
source encoded source tree presentation
flags flags to select output format
properties indentation properties
    public static String decompile(String sourceint flags,
                                   UintMap properties)
    {
        int length = source.length();
        if (length == 0) { return ""; }
        int indent = properties.getInt(, 0);
        if (indent < 0) throw new IllegalArgumentException();
        int indentGap = properties.getInt(, 4);
        if (indentGap < 0) throw new IllegalArgumentException();
        int caseGap = properties.getInt(, 2);
        if (caseGap < 0) throw new IllegalArgumentException();
        StringBuffer result = new StringBuffer();
        boolean justFunctionBody = (0 != (flags & .));
        boolean toSource = (0 != (flags & .));
        // Spew tokens in source, for debugging.
        // as TYPE number char
        if () {
            ..println("length:" + length);
            for (int i = 0; i < length; ++i) {
                // Note that tokenToName will fail unless Context.printTrees
                // is true.
                String tokenname = null;
                if (.) {
                    tokenname = Token.name(source.charAt(i));
                }
                if (tokenname == null) {
                    tokenname = "---";
                }
                String pad = tokenname.length() > 7
                    ? "\t"
                    : "\t\t";
                ..println
                    (tokenname
                     + pad + (int)source.charAt(i)
                     + "\t'" + ScriptRuntime.escapeString
                     (source.substring(ii+1))
                     + "'");
            }
            ..println();
        }
        int braceNesting = 0;
        boolean afterFirstEOL = false;
        int i = 0;
        int topFunctionType;
        if (source.charAt(i) == .) {
            ++i;
            topFunctionType = -1;
        } else {
            topFunctionType = source.charAt(i + 1);
        }
        if (!toSource) {
            // add an initial newline to exactly match js.
            result.append('\n');
            for (int j = 0; j < indentj++)
                result.append(' ');
        } else {
            if (topFunctionType == .) {
                result.append('(');
            }
        }
        while (i < length) {
            switch(source.charAt(i)) {
            case .:
            case .:
                result.append(source.charAt(i) == . ? "get " : "set ");
                ++i;
                i = printSourceString(sourcei + 1, falseresult);
                // Now increment one more to get past the FUNCTION token
                ++i;
                break;
            case .:
            case .:  // re-wrapped in '/'s in parser...
                i = printSourceString(sourcei + 1, falseresult);
                continue;
            case .:
                i = printSourceString(sourcei + 1, trueresult);
                continue;
            case .:
                i = printSourceNumber(sourcei + 1, result);
                continue;
            case .:
                result.append("true");
                break;
            case .:
                result.append("false");
                break;
            case .:
                result.append("null");
                break;
            case .:
                result.append("this");
                break;
            case .:
                ++i// skip function type
                result.append("function ");
                break;
            case :
                // Do nothing
                break;
            case .:
                result.append(", ");
                break;
            case .:
                ++braceNesting;
                if (. == getNext(sourcelengthi))
                    indent += indentGap;
                result.append('{');
                break;
            case .: {
                --braceNesting;
                /* don't print the closing RC if it closes the
                 * toplevel function and we're called from
                 * decompileFunctionBody.
                 */
                if (justFunctionBody && braceNesting == 0)
                    break;
                result.append('}');
                switch (getNext(sourcelengthi)) {
                    case .:
                    case :
                        indent -= indentGap;
                        break;
                    case .:
                    case .:
                        indent -= indentGap;
                        result.append(' ');
                        break;
                }
                break;
            }
            case .:
                result.append('(');
                break;
            case .:
                result.append(')');
                if (. == getNext(sourcelengthi))
                    result.append(' ');
                break;
            case .:
                result.append('[');
                break;
            case .:
                result.append(']');
                break;
            case .: {
                if (toSourcebreak;
                boolean newLine = true;
                if (!afterFirstEOL) {
                    afterFirstEOL = true;
                    if (justFunctionBody) {
                        /* throw away just added 'function name(...) {'
                         * and restore the original indent
                         */
                        result.setLength(0);
                        indent -= indentGap;
                        newLine = false;
                    }
                }
                if (newLine) {
                    result.append('\n');
                }
                /* add indent if any tokens remain,
                 * less setback if next token is
                 * a label, case or default.
                 */
                if (i + 1 < length) {
                    int less = 0;
                    int nextToken = source.charAt(i + 1);
                    if (nextToken == .
                        || nextToken == .)
                    {
                        less = indentGap - caseGap;
                    } else if (nextToken == .) {
                        less = indentGap;
                    }
                    /* elaborate check against label... skip past a
                     * following inlined NAME and look for a COLON.
                     */
                    else if (nextToken == .) {
                        int afterName = getSourceStringEnd(sourcei + 2);
                        if (source.charAt(afterName) == .)
                            less = indentGap;
                    }
                    for (; less < indentless++)
                        result.append(' ');
                }
                break;
            }
            case .:
                result.append('.');
                break;
            case .:
                result.append("new ");
                break;
            case .:
                result.append("delete ");
                break;
            case .:
                result.append("if ");
                break;
            case .:
                result.append("else ");
                break;
            case .:
                result.append("for ");
                break;
            case .:
                result.append(" in ");
                break;
            case .:
                result.append("with ");
                break;
            case .:
                result.append("while ");
                break;
            case .:
                result.append("do ");
                break;
            case .:
                result.append("try ");
                break;
            case .:
                result.append("catch ");
                break;
            case .:
                result.append("finally ");
                break;
            case .:
                result.append("throw ");
                break;
            case .:
                result.append("switch ");
                break;
            case .:
                result.append("break");
                if (. == getNext(sourcelengthi))
                    result.append(' ');
                break;
            case .:
                result.append("continue");
                if (. == getNext(sourcelengthi))
                    result.append(' ');
                break;
            case .:
                result.append("case ");
                break;
            case .:
                result.append("default");
                break;
            case .:
                result.append("return");
                if (. != getNext(sourcelengthi))
                    result.append(' ');
                break;
            case .:
                result.append("var ");
                break;
            case .:
              result.append("let ");
              break;
            case .:
                result.append(';');
                if (. != getNext(sourcelengthi)) {
                    // separators in FOR
                    result.append(' ');
                }
                break;
            case .:
                result.append(" = ");
                break;
            case .:
                result.append(" += ");
                break;
            case .:
                result.append(" -= ");
                break;
            case .:
                result.append(" *= ");
                break;
            case .:
                result.append(" /= ");
                break;
            case .:
                result.append(" %= ");
                break;
            case .:
                result.append(" |= ");
                break;
            case .:
                result.append(" ^= ");
                break;
            case .:
                result.append(" &= ");
                break;
            case .:
                result.append(" <<= ");
                break;
            case .:
                result.append(" >>= ");
                break;
            case .:
                result.append(" >>>= ");
                break;
            case .:
                result.append(" ? ");
                break;
            case .:
                // pun OBJECTLIT to mean colon in objlit property
                // initialization.
                // This needs to be distinct from COLON in the general case
                // to distinguish from the colon in a ternary... which needs
                // different spacing.
                result.append(": ");
                break;
            case .:
                if (. == getNext(sourcelengthi))
                    // it's the end of a label
                    result.append(':');
                else
                    // it's the middle part of a ternary
                    result.append(" : ");
                break;
            case .:
                result.append(" || ");
                break;
            case .:
                result.append(" && ");
                break;
            case .:
                result.append(" | ");
                break;
            case .:
                result.append(" ^ ");
                break;
            case .:
                result.append(" & ");
                break;
            case .:
                result.append(" === ");
                break;
            case .:
                result.append(" !== ");
                break;
            case .:
                result.append(" == ");
                break;
            case .:
                result.append(" != ");
                break;
            case .:
                result.append(" <= ");
                break;
            case .:
                result.append(" < ");
                break;
            case .:
                result.append(" >= ");
                break;
            case .:
                result.append(" > ");
                break;
            case .:
                result.append(" instanceof ");
                break;
            case .:
                result.append(" << ");
                break;
            case .:
                result.append(" >> ");
                break;
            case .:
                result.append(" >>> ");
                break;
            case .:
                result.append("typeof ");
                break;
            case .:
                result.append("void ");
                break;
            case .:
                result.append("const ");
                break;
            case .:
                result.append("yield ");
                break;
            case .:
                result.append('!');
                break;
            case .:
                result.append('~');
                break;
            case .:
                result.append('+');
                break;
            case .:
                result.append('-');
                break;
            case .:
                result.append("++");
                break;
            case .:
                result.append("--");
                break;
            case .:
                result.append(" + ");
                break;
            case .:
                result.append(" - ");
                break;
            case .:
                result.append(" * ");
                break;
            case .:
                result.append(" / ");
                break;
            case .:
                result.append(" % ");
                break;
            case .:
                result.append("::");
                break;
            case .:
                result.append("..");
                break;
            case .:
                result.append(".(");
                break;
            case .:
                result.append('@');
                break;
            case .:
                result.append("debugger;\n");
                break;
            default:
                // If we don't know how to decompile it, raise an exception.
                throw new RuntimeException("Token: " +
                                               Token.name(source.charAt(i)));
            }
            ++i;
        }
        if (!toSource) {
            // add that trailing newline if it's an outermost function.
            if (!justFunctionBody)
                result.append('\n');
        } else {
            if (topFunctionType == .) {
                result.append(')');
            }
        }
        return result.toString();
    }
    private static int getNext(String sourceint lengthint i)
    {
        return (i + 1 < length) ? source.charAt(i + 1) : .;
    }
    private static int getSourceStringEnd(String sourceint offset)
    {
        return printSourceString(sourceoffsetfalsenull);
    }
    private static int printSourceString(String sourceint offset,
                                         boolean asQuotedString,
                                         StringBuffer sb)
    {
        int length = source.charAt(offset);
        ++offset;
        if ((0x8000 & length) != 0) {
            length = ((0x7FFF & length) << 16) | source.charAt(offset);
            ++offset;
        }
        if (sb != null) {
            String str = source.substring(offsetoffset + length);
            if (!asQuotedString) {
                sb.append(str);
            } else {
                sb.append('"');
                sb.append(ScriptRuntime.escapeString(str));
                sb.append('"');
            }
        }
        return offset + length;
    }
    private static int printSourceNumber(String sourceint offset,
                                         StringBuffer sb)
    {
        double number = 0.0;
        char type = source.charAt(offset);
        ++offset;
        if (type == 'S') {
            if (sb != null) {
                int ival = source.charAt(offset);
                number = ival;
            }
            ++offset;
        } else if (type == 'J' || type == 'D') {
            if (sb != null) {
                long lbits;
                lbits = (long)source.charAt(offset) << 48;
                lbits |= (long)source.charAt(offset + 1) << 32;
                lbits |= (long)source.charAt(offset + 2) << 16;
                lbits |= source.charAt(offset + 3);
                if (type == 'J') {
                    number = lbits;
                } else {
                    number = Double.longBitsToDouble(lbits);
                }
            }
            offset += 4;
        } else {
            // Bad source
            throw new RuntimeException();
        }
        if (sb != null) {
            sb.append(ScriptRuntime.numberToString(number, 10));
        }
        return offset;
    }
    private char[] sourceBuffer = new char[128];
// Per script/function source buffer top: parent source does not include a
// nested functions source and uses function index as a reference instead.
    private int sourceTop;
// whether to do a debug print of the source information, when decompiling.
    private static final boolean printSource = false;
New to GrepCode? Check out our FAQ X