Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
BEGIN LICENSE BLOCK ***** Version: CPL 1.0/GPL 2.0/LGPL 2.1 The contents of this file are subject to the Common Public License Version 1.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.eclipse.org/legal/cpl-v10.html Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de> Alternatively, the contents of this file may be used under the terms of either of the GNU General Public License Version 2 or later (the "GPL"), or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), in which case the provisions of the GPL or the LGPL are applicable instead of those above. If you wish to allow use of your version of this file only under the terms of either the GPL or the LGPL, and not to allow others to use your version of this file under the terms of the CPL, indicate your decision by deleting the provisions above and replace them with the notice and other provisions required by the GPL or the LGPL. If you do not delete the provisions above, a recipient may use your version of this file under the terms of any one of the CPL, the GPL or the LGPL. END LICENSE BLOCK ***
 
 package org.jruby.lexer.yacc;
 
 
 public class StringTerm extends StrTerm {
     // Expand variables, Indentation of final marker
     private int flags;
 
     // Start of string ([, (, {, <, ', ", \n) 
     private final char begin;
 
     // End of string (], ), }, >, ', ", \0)
     private final char end;
 
     // How many strings are nested in the current string term
     private int nest;
 
     public StringTerm(int flagsint beginint end) {
         this. = flags;
         this. = (charbegin;
         this.   = (charend;
         this.  = 0;
     }
 
     protected ByteList createByteList(RubyYaccLexer lexer) {
         if (lexer.isOneEight()) return new ByteList();
 
         return new ByteList(new byte[]{}, lexer.getEncoding());
     }
 
     private int endFound(RubyYaccLexer lexerLexerSource srcthrows IOException {
             if (( & .) != 0) {
                  = -1;
                 lexer.getPosition();
                 return ' ';
             }
 
             if (( & .) != 0) {
                 RegexpOptions options = parseRegexpFlags(src);
                 ByteList regexpBytelist = ByteList.create("");
 
                 lexer.setValue(new RegexpNode(src.getPosition(), regexpBytelistoptions));
                 return .;
             }
 
             lexer.setValue(new Token("\""lexer.getPosition()));
             return .;
     }
 
     public int parseString(RubyYaccLexer lexerLexerSource srcthrows IOException {
         boolean spaceSeen = false;
         int c;
 
         // FIXME: How much more obtuse can this be?
         // Heredoc already parsed this and saved string...Do not parse..just return
         if ( == -1) {
             lexer.setValue(new Token("\""lexer.getPosition()));
             return .;
         }
 
         c = src.read();
         if (( & .) != 0 && Character.isWhitespace(c)) {
             do { c = src.read(); } while (Character.isWhitespace(c));
             spaceSeen = true;
         }
        if (c ==  &&  == 0) return endFound(lexersrc);
        
        if (spaceSeen) {
            src.unread(c);
            lexer.getPosition();
            return ' ';
        }
        
        ByteList buffer = createByteList(lexer);
        if (( & .) != 0 && c == '#') {
            c = src.read();
            switch (c) {
            case '$':
            case '@':
                src.unread(c);
                lexer.setValue(new Token("#" + clexer.getPosition()));
                return .;
            case '{':
                lexer.setValue(new Token("#" + clexer.getPosition())); 
                return .;
            }
            buffer.append((byte'#');
        }
        src.unread(c);
        
        if (parseStringIntoBuffer(lexersrcbuffer) == .) {
            throw new SyntaxException(.src.getPosition(),
                    src.getCurrentLine(), "unterminated string meets end of file");
        }
        lexer.setValue(lexer.createStrNode(lexer.getPosition(), buffer));
        return .;
    }
    private RegexpOptions parseRegexpFlags(LexerSource srcthrows IOException {
        RegexpOptions options = new RegexpOptions();
        int c;
        StringBuilder unknownFlags = new StringBuilder(10);
        for (c = src.read(); c != .
                && Character.isLetter(c); c = src.read()) {
            switch (c) {
            case 'i':
                options.setIgnorecase(true);
                break;
            case 'x':
                options.setExtended(true);
                break;
            case 'm':
                options.setMultiline(true);
                break;
            case 'o':
                options.setOnce(true);
                break;
            case 'n':
                options.setExplicitKCode(.);
                break;
            case 'e':
                options.setExplicitKCode(.);
                break;
            case 's':
                options.setExplicitKCode(.);
                break;
            case 'u':
                options.setExplicitKCode(.);
                break;
            case 'j':
                options.setJava(true);
                break;
            default:
                unknownFlags.append((charc);
                break;
            }
        }
        src.unread(c);
        if (unknownFlags.length() != 0) {
            throw new SyntaxException(.src.getPosition(), "unknown regexp option"
                    + (unknownFlags.length() > 1 ? "s" : "") + " - "
                    + unknownFlags.toString(), unknownFlags.toString());
        }
        return options;
    }
    private void mixedEscape(RubyYaccLexer lexerEncoding foundEncodingEncoding parserEncoding) {
        throw new SyntaxException(.,lexer.getPosition(), "",
                foundEncoding + " mixed within " + parserEncoding);
    }
    // mri: parser_tokadd_string
    public int parseStringIntoBuffer(RubyYaccLexer lexerLexerSource srcByteList bufferthrows IOException {
        boolean qwords = ( & .) != 0;
        boolean expand = ( & .) != 0;
        boolean escape = ( & .) != 0;
        boolean regexp = ( & .) != 0;
        boolean symbol = ( & .) != 0;
        boolean hasNonAscii = false;
        int c;
        Encoding encoding = lexer.getEncoding();
        while ((c = src.read()) != .) {
            if ( != '\0' && c == ) {
                ++;
            } else if (c == ) {
                if ( == 0) {
                    src.unread(c);
                    break;
                }
                --;
            } else if (expand && c == '#' && !src.peek('\n')) {
                int c2 = src.read();
                if (c2 == '$' || c2 == '@' || c2 == '{') {
                    src.unread(c2);
                    src.unread(c);
                    break;
                }
                src.unread(c2);
            } else if (c == '\\') {
                c = src.read();
                switch (c) {
                case '\n':
                    if (qwordsbreak;
                    if (expandcontinue;
                    buffer.append('\\');
                    break;
                case '\\':
                    if (escapebuffer.append(c);
                    break;
                case 'u':
                    if (!lexer.isOneEight()) {
                        if (!expand) {
                            buffer.append('\\');
                            break;
                        }
                        if (regexp) {
                            lexer.readUTFEscapeRegexpLiteral(buffer);
                        } else {
                            lexer.readUTFEscape(buffertruesymbol);
                        }
                        if (hasNonAscii && buffer.getEncoding() != encoding) {
                            mixedEscape(lexerbuffer.getEncoding(), encoding);
                        }
                        continue;
                    }
                default:
                    if (regexp) {
                        src.unread(c);
                        parseEscapeIntoBuffer(srcbuffer);
                        if (hasNonAscii && buffer.getEncoding() != encoding) {
                            mixedEscape(lexerbuffer.getEncoding(), encoding);
                        }
                        
                        continue;
                    } else if (expand) {
                        src.unread(c);
                        if (escapebuffer.append('\\');
                        c = lexer.readEscape();
                    } else if (qwords && Character.isWhitespace(c)) {
                        /* ignore backslashed spaces in %w */
                    } else if (c !=  && !( != '\0' && c == )) {
                        buffer.append('\\');
                    }
                }
            } else if (!lexer.isOneEight() && !Encoding.isAscii((bytec)) {
                if (buffer.getEncoding() != encoding) {
                    mixedEscape(lexerbuffer.getEncoding(), encoding);
                }
                c = src.readCodepoint(cencoding);
                if (c == -2) { // FIXME: Hack
                    throw new SyntaxException(.lexer.getPosition(),
                            null"invalid multibyte char (" + encoding + ")");
                }
                // FIXME: We basically go from bytes to codepoint back to bytes to append them...fix this
                if (lexer.tokenAddMBC(cbuffer) == .return .;
                continue;
            } else if (qwords && Character.isWhitespace(c)) {
                src.unread(c);
                break;
            }
            if (!lexer.isOneEight()) {
                // Hmm did they change this?
/*                if (c == '\0' && symbol) {
                    throw new SyntaxException(PID.NUL_IN_SYMBOL, lexer.getPosition(),
                            src.getCurrentLine(), "symbol cannot contain '\\0'");
                } else*/ if ((c & 0x80) != 0) {
                    hasNonAscii = true;
                    if (buffer.getEncoding() != encoding) {
                        mixedEscape(lexerbuffer.getEncoding(), encoding);
                    }
                }
            }
            buffer.append(c);
        }
        
        return c;
    }
    // Was a goto in original ruby lexer
    private void escaped(LexerSource srcByteList bufferthrows java.io.IOException {
        int c;
        switch (c = src.read()) {
        case '\\':
            parseEscapeIntoBuffer(srcbuffer);
            break;
        case .:
            throw new SyntaxException(.src.getPosition(),
                    src.getCurrentLine(), "Invalid escape character syntax");
        default:
            buffer.append(c);
        }
    }
    private void parseEscapeIntoBuffer(LexerSource srcByteList bufferthrows java.io.IOException {
        int c;
        switch (c = src.read()) {
        case '\n':
            break/* just ignore */
        case '0':
        case '1':
        case '2':
        case '3'/* octal constant */
        case '4':
        case '5':
        case '6':
        case '7':
            buffer.append('\\');
            buffer.append(c);
            for (int i = 0; i < 2; i++) {
                c = src.read();
                if (c == .) {
                    throw new SyntaxException(.src.getPosition(),
                            src.getCurrentLine(), "Invalid escape character syntax");
                }
                if (!RubyYaccLexer.isOctChar(c)) {
                    src.unread(c);
                    break;
                }
                buffer.append(c);
            }
            break;
        case 'x'/* hex constant */
            buffer.append('\\');
            buffer.append(c);
            c = src.read();
            if (!RubyYaccLexer.isHexChar(c)) {
                throw new SyntaxException(.src.getPosition(),
                        src.getCurrentLine(), "Invalid escape character syntax");
            }
            buffer.append(c);
            c = src.read();
            if (RubyYaccLexer.isHexChar(c)) {
                buffer.append(c);
            } else {
                src.unread(c);
            }
            break;
        case 'M':
            if ((c = src.read()) != '-') {
                throw new SyntaxException(.src.getPosition(),
                        src.getCurrentLine(), "Invalid escape character syntax");
            }
            buffer.append(new byte[] { '\\''M''-' });
            escaped(srcbuffer);
            break;
        case 'C':
            if ((c = src.read()) != '-') {
                throw new SyntaxException(.src.getPosition(),
                        src.getCurrentLine(), "Invalid escape character syntax");
            }
            buffer.append(new byte[] { '\\''C''-' });
            escaped(srcbuffer);
            break;
        case 'c':
            buffer.append(new byte[] { '\\''c' });
            escaped(srcbuffer);
            break;
        case .:
            throw new SyntaxException(.src.getPosition(),
                    src.getCurrentLine(), "Invalid escape character syntax");
        default:
            if (c != '\\' || c != ) {
                buffer.append('\\');
            }
            buffer.append(c);
        }
    }
New to GrepCode? Check out our FAQ X