Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  package com.fasterxml.jackson.core.io;
  
  
Helper class used for efficient encoding of JSON String values (including JSON field names) into Strings or UTF-8 byte arrays.

Note that methods in here are somewhat optimized, but not ridiculously so. Reason is that conversion method results are expected to be cached so that these methods will not be hot spots during normal operation.

 
 public final class JsonStringEncoder
 {
     private final static char[] HEX_CHARS = CharTypes.copyHexChars();
 
     private final static byte[] HEX_BYTES = CharTypes.copyHexBytes();
 
     private final static int SURR1_FIRST = 0xD800;
     private final static int SURR1_LAST = 0xDBFF;
     private final static int SURR2_FIRST = 0xDC00;
     private final static int SURR2_LAST = 0xDFFF;
 
     private final static int INT_BACKSLASH = '\\';
     private final static int INT_U = 'u';
     private final static int INT_0 = '0';
    
    
This ThreadLocal contains a java.lang.ref.SoftReference to a com.fasterxml.jackson.core.util.BufferRecycler used to provide a low-cost buffer recycling between reader and writer instances.
 
     final protected static ThreadLocal<SoftReference<JsonStringEncoder>> _threadEncoder
         = new ThreadLocal<SoftReference<JsonStringEncoder>>();

    
Lazily constructed text buffer used to produce JSON encoded Strings as characters (without UTF-8 encoding)
 
     protected TextBuffer _textBuffer;

    
Lazily-constructed builder used for UTF-8 encoding of text values (quoted and unquoted)
 
     protected ByteArrayBuilder _byteBuilder;
    
    
Temporary buffer used for composing quote/escape sequences
 
     protected final char[] _quoteBuffer;
     
     /*
     /**********************************************************
     /* Construction, instance access
     /**********************************************************
      */
     
     public JsonStringEncoder()
     {
          = new char[6];
         [0] = '\\';
         [2] = '0';
         [3] = '0';
     }
    
    
Factory method for getting an instance; this is either recycled per-thread instance, or a newly constructed one.
 
     public static JsonStringEncoder getInstance()
     {
         SoftReference<JsonStringEncoderref = .get();
         JsonStringEncoder enc = (ref == null) ? null : ref.get();
 
         if (enc == null) {
             enc = new JsonStringEncoder();
             .set(new SoftReference<JsonStringEncoder>(enc));
         }
         return enc;
     }
 
     /*
     /**********************************************************
     /* Public API
     /**********************************************************
      */

    
Method that will quote text contents using JSON standard quoting, and return results as a character array
 
     public char[] quoteAsString(String input)
     {
         TextBuffer textBuffer = ;
        if (textBuffer == null) {
            // no allocator; can add if we must, shouldn't need to
             = textBuffer = new TextBuffer(null);
        }
        char[] outputBuffer = textBuffer.emptyAndGetCurrentSegment();
        final int[] escCodes = CharTypes.get7BitOutputEscapes();
        final int escCodeCount = escCodes.length;
        int inPtr = 0;
        final int inputLen = input.length();
        int outPtr = 0;
 
        outer_loop:
        while (inPtr < inputLen) {
            tight_loop:
            while (true) {
                char c = input.charAt(inPtr);
                if (c < escCodeCount && escCodes[c] != 0) {
                    break tight_loop;
                }
                if (outPtr >= outputBuffer.length) {
                    outputBuffer = textBuffer.finishCurrentSegment();
                    outPtr = 0;
                }
                outputBuffer[outPtr++] = c;
                if (++inPtr >= inputLen) {
                    break outer_loop;
                }
            }
            // something to escape; 2 or 6-char variant? 
            char d = input.charAt(inPtr++);
            int escCode = escCodes[d];
            int length = (escCode < 0)
                    ? _appendNumericEscape(d)
                    : _appendNamedEscape(escCode);
                    ;
            if ((outPtr + length) > outputBuffer.length) {
                int first = outputBuffer.length - outPtr;
                if (first > 0) {
                    System.arraycopy(, 0, outputBufferoutPtrfirst);
                }
                outputBuffer = textBuffer.finishCurrentSegment();
                int second = length - first;
                System.arraycopy(firstoutputBuffer, 0, second);
                outPtr = second;
            } else {
                System.arraycopy(, 0, outputBufferoutPtrlength);
                outPtr += length;
            }
        }
        textBuffer.setCurrentLength(outPtr);
        return textBuffer.contentsAsArray();
    }

    
Will quote given JSON String value using standard quoting, encode results as UTF-8, and return result as a byte array.
    public byte[] quoteAsUTF8(String text)
    {
        ByteArrayBuilder byteBuilder = ;
        if (byteBuilder == null) {
            // no allocator; can add if we must, shouldn't need to
             = byteBuilder = new ByteArrayBuilder(null);
        }
        int inputPtr = 0;
        int inputEnd = text.length();
        int outputPtr = 0;
        byte[] outputBuffer = byteBuilder.resetAndGetFirstSegment();
        
        main_loop:
        while (inputPtr < inputEnd) {
            final int[] escCodes = CharTypes.get7BitOutputEscapes();
            inner_loop: // ASCII and escapes
            while (true) {
                int ch = text.charAt(inputPtr);
                if (ch > 0x7F || escCodes[ch] != 0) {
                    break inner_loop;
                }
                if (outputPtr >= outputBuffer.length) {
                    outputBuffer = byteBuilder.finishCurrentSegment();
                    outputPtr = 0;
                }
                outputBuffer[outputPtr++] = (bytech;
                if (++inputPtr >= inputEnd) {
                    break main_loop;
                }
            }                
            if (outputPtr >= outputBuffer.length) {
                outputBuffer = byteBuilder.finishCurrentSegment();
                outputPtr = 0;
            }
            // Ok, so what did we hit?
            int ch = (inttext.charAt(inputPtr++);
            if (ch <= 0x7F) { // needs quoting
                int escape = escCodes[ch];
                // ctrl-char, 6-byte escape...
                outputPtr = _appendByteEscape(chescapebyteBuilderoutputPtr);
                outputBuffer = byteBuilder.getCurrentSegment();
                continue main_loop;
            } else if (ch <= 0x7FF) { // fine, just needs 2 byte output
                outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
                ch = (0x80 | (ch & 0x3f));
            } else { // 3 or 4 bytes
                // Surrogates?
                if (ch <  || ch > ) { // nope
                    outputBuffer[outputPtr++] = (byte) (0xe0 | (ch >> 12));
                    if (outputPtr >= outputBuffer.length) {
                        outputBuffer = byteBuilder.finishCurrentSegment();
                        outputPtr = 0;
                    }
                    outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f));
                    ch = (0x80 | (ch & 0x3f));
                } else { // yes, surrogate pair
                    if (ch > ) { // must be from first range
                        _throwIllegalSurrogate(ch);
                    }
                    // and if so, followed by another from next range
                    if (inputPtr >= inputEnd) {
                        _throwIllegalSurrogate(ch);
                    }
                    ch = _convertSurrogate(chtext.charAt(inputPtr++));
                    if (ch > 0x10FFFF) { // illegal, as per RFC 4627
                        _throwIllegalSurrogate(ch);
                    }
                    outputBuffer[outputPtr++] = (byte) (0xf0 | (ch >> 18));
                    if (outputPtr >= outputBuffer.length) {
                        outputBuffer = byteBuilder.finishCurrentSegment();
                        outputPtr = 0;
                    }
                    outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 12) & 0x3f));
                    if (outputPtr >= outputBuffer.length) {
                        outputBuffer = byteBuilder.finishCurrentSegment();
                        outputPtr = 0;
                    }
                    outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f));
                    ch = (0x80 | (ch & 0x3f));
                }
            }
            if (outputPtr >= outputBuffer.length) {
                outputBuffer = byteBuilder.finishCurrentSegment();
                outputPtr = 0;
            }
            outputBuffer[outputPtr++] = (bytech;
        }
        return .completeAndCoalesce(outputPtr);
    }
    
    
Will encode given String as UTF-8 (without any quoting), return resulting byte array.
    public byte[] encodeAsUTF8(String text)
    {
        ByteArrayBuilder byteBuilder = ;
        if (byteBuilder == null) {
            // no allocator; can add if we must, shouldn't need to
             = byteBuilder = new ByteArrayBuilder(null);
        }
        int inputPtr = 0;
        int inputEnd = text.length();
        int outputPtr = 0;
        byte[] outputBuffer = byteBuilder.resetAndGetFirstSegment();
        int outputEnd = outputBuffer.length;
        
        main_loop:
        while (inputPtr < inputEnd) {
            int c = text.charAt(inputPtr++);
            // first tight loop for ascii
            while (c <= 0x7F) {
                if (outputPtr >= outputEnd) {
                    outputBuffer = byteBuilder.finishCurrentSegment();
                    outputEnd = outputBuffer.length;
                    outputPtr = 0;
                }
                outputBuffer[outputPtr++] = (bytec;
                if (inputPtr >= inputEnd) {
                    break main_loop;
                }
                c = text.charAt(inputPtr++);
            }
            // then multi-byte...
            if (outputPtr >= outputEnd) {
                outputBuffer = byteBuilder.finishCurrentSegment();
                outputEnd = outputBuffer.length;
                outputPtr = 0;
            }
            if (c < 0x800) { // 2-byte
                outputBuffer[outputPtr++] = (byte) (0xc0 | (c >> 6));
            } else { // 3 or 4 bytes
                // Surrogates?
                if (c <  || c > ) { // nope
                    outputBuffer[outputPtr++] = (byte) (0xe0 | (c >> 12));
                    if (outputPtr >= outputEnd) {
                        outputBuffer = byteBuilder.finishCurrentSegment();
                        outputEnd = outputBuffer.length;
                        outputPtr = 0;
                    }
                    outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
                } else { // yes, surrogate pair
                    if (c > ) { // must be from first range
                        _throwIllegalSurrogate(c);
                    }
                    // and if so, followed by another from next range
                    if (inputPtr >= inputEnd) {
                        _throwIllegalSurrogate(c);
                    }
                    c = _convertSurrogate(ctext.charAt(inputPtr++));
                    if (c > 0x10FFFF) { // illegal, as per RFC 4627
                        _throwIllegalSurrogate(c);
                    }
                    outputBuffer[outputPtr++] = (byte) (0xf0 | (c >> 18));
                    if (outputPtr >= outputEnd) {
                        outputBuffer = byteBuilder.finishCurrentSegment();
                        outputEnd = outputBuffer.length;
                        outputPtr = 0;
                    }
                    outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
                    if (outputPtr >= outputEnd) {
                        outputBuffer = byteBuilder.finishCurrentSegment();
                        outputEnd = outputBuffer.length;
                        outputPtr = 0;
                    }
                    outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
                }
            }
            if (outputPtr >= outputEnd) {
                outputBuffer = byteBuilder.finishCurrentSegment();
                outputEnd = outputBuffer.length;
                outputPtr = 0;
            }
            outputBuffer[outputPtr++] = (byte) (0x80 | (c & 0x3f));
        }
        return .completeAndCoalesce(outputPtr);
    }
    
    /*
    /**********************************************************
    /* Internal methods
    /**********************************************************
     */
    private int _appendNumericEscape(int valuechar[] quoteBuffer)
    {
        quoteBuffer[1] = 'u';
        // We know it's a control char, so only the last 2 chars are non-0
        quoteBuffer[4] = [value >> 4];
        quoteBuffer[5] = [value & 0xF];
        return 6;
    }
    private int _appendNamedEscape(int escCodechar[] quoteBuffer)
    {
        quoteBuffer[1] = (charescCode;
        return 2;
    }
    private int _appendByteEscape(int chint escCodeByteArrayBuilder byteBuilderint ptr)
    {
        byteBuilder.setCurrentSegmentLength(ptr);
        byteBuilder.append();
        if (escCode < 0) { // standard escape
            byteBuilder.append();
            if (ch > 0xFF) {
                int hi = (ch >> 8);
                byteBuilder.append([hi >> 4]);
                byteBuilder.append([hi & 0xF]);
                ch &= 0xFF;
            } else {
                byteBuilder.append();
                byteBuilder.append();
            }
            byteBuilder.append([ch >> 4]);
            byteBuilder.append([ch & 0xF]);
        } else { // 2-char simple escape
            byteBuilder.append((byteescCode);
        }
        return byteBuilder.getCurrentSegmentLength();
    }
    
    
Method called to calculate UTF code point, from a surrogate pair.
    private int _convertSurrogate(int firstPartint secondPart)
    {
        // Ok, then, is the second part valid?
        if (secondPart <  || secondPart > ) {
            throw new IllegalArgumentException("Broken surrogate pair: first char 0x"+Integer.toHexString(firstPart)+", second 0x"+Integer.toHexString(secondPart)+"; illegal combination");
        }
        return 0x10000 + ((firstPart - ) << 10) + (secondPart - );
    }
    
    private void _throwIllegalSurrogate(int code)
    {
        if (code > 0x10FFFF) { // over max?
            throw new IllegalArgumentException("Illegal character point (0x"+Integer.toHexString(code)+") to output; max is 0x10FFFF as per RFC 4627");
        }
        if (code >= ) {
            if (code <= ) { // Unmatched first part (closing without second part?)
                throw new IllegalArgumentException("Unmatched first part of surrogate pair (0x"+Integer.toHexString(code)+")");
            }
            throw new IllegalArgumentException("Unmatched second part of surrogate pair (0x"+Integer.toHexString(code)+")");
        }
        // should we ever get this?
        throw new IllegalArgumentException("Illegal character point (0x"+Integer.toHexString(code)+") to output");
    }
New to GrepCode? Check out our FAQ X