Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
BEGIN LICENSE BLOCK ***** Version: EPL 1.0/GPL 2.0/LGPL 2.1 The contents of this file are subject to the Eclipse Public License Version 1.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.eclipse.org/legal/epl-v10.html Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. Copyright (C) 2001 Alan Moore <alan_moore@gmx.net> Copyright (C) 2001-2002 Benoit Cerrina <b.cerrina@wanadoo.fr> Copyright (C) 2001-2004 Jan Arne Petersen <jpetersen@uni-bonn.de> Copyright (C) 2002-2004 Anders Bengtsson <ndrsbngtssn@yahoo.se> Copyright (C) 2004-2005 Thomas E Enebo <enebo@acm.org> Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de> Copyright (C) 2005 David Corbin <dcorbin@users.sourceforge.net> Copyright (C) 2006 Nick Sieger <nicksieger@gmail.com> Copyright (C) 2006 Miguel Covarrubias <mlcovarrubias@gmail.com> Alternatively, the contents of this file may be used under the terms of either of the GNU General Public License Version 2 or later (the "GPL"), or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), in which case the provisions of the GPL or the LGPL are applicable instead of those above. If you wish to allow use of your version of this file only under the terms of either the GPL or the LGPL, and not to allow others to use your version of this file under the terms of the EPL, indicate your decision by deleting the provisions above and replace them with the notice and other provisions required by the GPL or the LGPL. If you do not delete the provisions above, a recipient may use your version of this file under the terms of any one of the EPL, the GPL or the LGPL. END LICENSE BLOCK ***
  
  package org.jruby;
  
  import static org.jruby.anno.FrameField.BACKREF;
  import static org.jruby.anno.FrameField.LASTLINE;
  
  import java.util.Map;
  
  import org.joni.Matcher;
  import org.joni.Option;
  import org.joni.Regex;
  import org.joni.Region;
  import org.joni.Syntax;
  
  @JRubyClass(name="Regexp")
  public class RubyRegexp extends RubyObject implements ReOptionsEncodingCapableMarshalEncoding {
      private Regex pattern;
      private ByteList str = .;
      private RegexpOptions options;
  
      public static final int ARG_ENCODING_FIXED     =   16;
      public static final int ARG_ENCODING_NONE      =   32;
  
      public void setLiteral() {
          .setLiteral(true);
      }
  
      public void clearLiteral() {
          .setLiteral(false);
      }
  
      public boolean isLiteral() {
         return .isLiteral();
     }
 
     public boolean isKCodeDefault() {
         return .isKcodeDefault();
     }
 
     public void setEncodingNone() {
         .setEncodingNone(true);
     }
     
     public void clearEncodingNone() {
         .setEncodingNone(false);
     }
 
     public boolean isEncodingNone() {
         return .isEncodingNone();
     }
 
     public KCode getKCode() {
         return .getKCode();
     }
 
     @Override
     public Encoding getEncoding() {
         return .getEncoding();
     }
 
     @Override
     public void setEncoding(Encoding encoding) {
         // FIXME: Which encoding should be changed here?  
         // FIXME: transcode?
     }
 
     @Override
     public boolean shouldMarshalEncoding() {
         return getEncoding() != .;
     }
 
     @Override
     public Encoding getMarshalEncoding() {
         return getEncoding();
     }
 
     private static final class RegexpCache {
         private volatile SoftReference<Map<ByteListRegex>> cache = new SoftReference<Map<ByteListRegex>>(null);
         private Map<ByteListRegexget() {
             Map<ByteListRegexpatternCache = .get();
             if (patternCache == null) {
                 patternCache = new ConcurrentHashMap<ByteListRegex>(5);
                  = new SoftReference<Map<ByteListRegex>>(patternCache);
             }
             return patternCache;
         }
     }
 
     private static final RegexpCache patternCache = new RegexpCache();
     private static final RegexpCache quotedPatternCache = new RegexpCache();
     private static final RegexpCache preprocessedPatternCache = new RegexpCache();
 
     private static Regex makeRegexp(Ruby runtimeByteList bytesRegexpOptions optionsEncoding enc) {
         try {
             int p = bytes.getBegin();
             return new Regex(bytes.getUnsafeBytes(), pp + bytes.getRealSize(), options.toJoniOptions(), enc.runtime.getWarnings());
         } catch (Exception e) {
             if (runtime.is1_9()) {
                 raiseRegexpError19(runtimebytesencoptionse.getMessage());
             } else {
                 raiseRegexpError(runtimebytesencoptionse.getMessage());
             }
             return null// not reached
         }
     }
 
     static Regex getRegexpFromCache(Ruby runtimeByteList bytesEncoding encRegexpOptions options) {
         Map<ByteListRegexcache = .get();
         Regex regex = cache.get(bytes);
         if (regex != null && regex.getEncoding() == enc && regex.getOptions() == options.toJoniOptions()) return regex;
         regex = makeRegexp(runtimebytesoptionsenc);
         regex.setUserObject(bytes);
         cache.put(bytesregex);
         return regex;
     }
 
     static Regex getQuotedRegexpFromCache(Ruby runtimeByteList bytesEncoding encRegexpOptions options) {
         Map<ByteListRegexcache = .get();
         Regex regex = cache.get(bytes);
         if (regex != null && regex.getEncoding() == enc && regex.getOptions() == options.toJoniOptions()) return regex;
         ByteList quoted = quote(bytesenc);
         regex = makeRegexp(runtimequotedoptionsenc);
         regex.setUserObject(quoted);
         cache.put(bytesregex);
         return regex;
     }
 
     static Regex getQuotedRegexpFromCache19(Ruby runtimeByteList bytesRegexpOptions optionsboolean asciiOnly) {
         Map<ByteListRegexcache = .get();
         Regex regex = cache.get(bytes);
         Encoding enc = asciiOnly ? . : bytes.getEncoding();
         if (regex != null && regex.getEncoding() == enc && regex.getOptions() == options.toJoniOptions()) return regex;
         ByteList quoted = quote19(bytesasciiOnly);
         regex = makeRegexp(runtimequotedoptionsquoted.getEncoding());
         regex.setUserObject(quoted);
         cache.put(bytesregex);
         return regex;
     }
 
     private static Regex getPreprocessedRegexpFromCache(Ruby runtimeByteList bytesEncoding encRegexpOptions optionsErrorMode mode) {
         Map<ByteListRegexcache = .get();
         Regex regex = cache.get(bytes);
         if (regex != null && regex.getEncoding() == enc && regex.getOptions() == options.toJoniOptions()) return regex;
         ByteList preprocessed = preprocess(runtimebytesencnew Encoding[]{null}, .);
         regex = makeRegexp(runtimepreprocessedoptionsenc);
         regex.setUserObject(preprocessed);
         cache.put(bytesregex);
         return regex;
     }
 
     public static RubyClass createRegexpClass(Ruby runtime) {
         RubyClass regexpClass = runtime.defineClass("Regexp"runtime.getObject(), );
         runtime.setRegexp(regexpClass);
 
         regexpClass.index = .;
         regexpClass.setReifiedClass(RubyRegexp.class);
         
         regexpClass.kindOf = new RubyModule.JavaClassKindOf(RubyRegexp.class);
 
         regexpClass.defineConstant("IGNORECASE"runtime.newFixnum());
         regexpClass.defineConstant("EXTENDED"runtime.newFixnum());
         regexpClass.defineConstant("MULTILINE"runtime.newFixnum());
 
         if (runtime.is1_9()) {
             regexpClass.defineConstant("FIXEDENCODING"runtime.newFixnum());
             regexpClass.defineConstant("NOENCODING"runtime.newFixnum());
         }
 
         regexpClass.defineAnnotatedMethods(RubyRegexp.class);
         regexpClass.getSingletonClass().defineAlias("compile""new");
 
         return regexpClass;
     }
 
     private static ObjectAllocator REGEXP_ALLOCATOR = new ObjectAllocator() {
         @Override
         public IRubyObject allocate(Ruby runtimeRubyClass klass) {
             return new RubyRegexp(runtimeklass);
         }
     };
     
     public static int matcherSearch(Ruby runtimeMatcher matcherint startint rangeint option) {
         try {
             RubyThread thread = runtime.getCurrentContext().getThread();
             SearchMatchTask task = new SearchMatchTask(threadmatcherstartrangeoptionfalse);
             thread.executeBlockingTask(task);
             return task.retval;
         } catch (InterruptedException e) {
             throw runtime.newInterruptedRegexpError("Regexp Interrrupted");
         }
     }
     
     public static int matcherMatch(Ruby runtimeMatcher matcherint startint rangeint option) {
         try {
             RubyThread thread = runtime.getCurrentContext().getThread();
             SearchMatchTask task = new SearchMatchTask(threadmatcherstartrangeoptiontrue);
             thread.executeBlockingTask(task);
             return task.retval;
         } catch (InterruptedException e) {
             throw runtime.newInterruptedRegexpError("Regexp Interrrupted");
         }
     }
     
     private static class SearchMatchTask implements RubyThread.BlockingTask {
         int retval;
         final RubyThread thread;
         final Matcher matcher;
         final int start;
         final int range;
         final int option;
         final boolean match;
         
         SearchMatchTask(RubyThread threadMatcher matcherint startint rangeint optionboolean match) {
             this. = thread;
             this. = matcher;
             this. = start;
             this. = range;
             this. = option;
             this. = match;
         }
         
         @Override
         public void run() throws InterruptedException {
              =  ?
                     .matchInterruptible() :
                     .searchInterruptible();
         }
 
         @Override
         public void wakeup() {
             .getNativeThread().interrupt();
         }
     }
 
     @Override
     public int getNativeTypeIndex() {
         return .;
     }

    
used by allocator
 
     private RubyRegexp(Ruby runtimeRubyClass klass) {
         super(runtimeklass);
         this. = new RegexpOptions();
     }

    
default constructor
 
     private RubyRegexp(Ruby runtime) {
         super(runtimeruntime.getRegexp());
         this. = new RegexpOptions();
     }
 
     private RubyRegexp(Ruby runtimeByteList str) {
         this(runtime);
         str.getClass();
         this. = str;
         this. = getRegexpFromCache(runtimestrgetEncoding(runtimestr), .);
     }
 
     private RubyRegexp(Ruby runtimeByteList strRegexpOptions options) {
         this(runtime);
         str.getClass();
 
         if (runtime.is1_9()) {
             initializeCommon19(strstr.getEncoding(), options);
         } else {
             this. = options;
             this. = str;
             this. = getRegexpFromCache(runtimestrgetEncoding(runtimestr), options);
         }
     }
 
     private Encoding getEncoding(Ruby runtimeByteList str) {
         if (runtime.is1_9()) return str.getEncoding();
 
         // Whatever $KCODE is we should use
         if (.isKcodeDefault()) return runtime.getKCode().getEncoding();
         
         return .getKCode().getEncoding();
     }
 
     // used only by the compiler/interpreter (will set the literal flag)
     public static RubyRegexp newRegexp(Ruby runtimeString patternRegexpOptions options) {
         return newRegexp(runtime, ByteList.create(pattern), options);
     }
 
     // used only by the compiler/interpreter (will set the literal flag)
     public static RubyRegexp newRegexp(Ruby runtimeByteList patternRegexpOptions options) {
         try {
             return new RubyRegexp(runtimepattern, (RegexpOptions)options.clone());
         } catch (RaiseException re) {
             throw runtime.newSyntaxError(re.getMessage());
         }
     }
 
     // used only by the compiler/interpreter (will set the literal flag)
     public static RubyRegexp newDRegexp(Ruby runtimeRubyString patternRegexpOptions options) {
         try {
             return new RubyRegexp(runtimepattern.getByteList(), (RegexpOptions)options.clone());
         } catch (RaiseException re) {
             throw runtime.newRegexpError(re.getMessage());
         }
     }
 
     // used only by the compiler/interpreter (will set the literal flag)
     public static RubyRegexp newDRegexp(Ruby runtimeRubyString patternint joniOptions) {
         try {
             RegexpOptions options = RegexpOptions.fromJoniOptions(joniOptions);
             return new RubyRegexp(runtimepattern.getByteList(), options);
         } catch (RaiseException re) {
             throw runtime.newRegexpError(re.getMessage());
         }
     }
 
     // used only by the compiler/interpreter (will set the literal flag)
     public static RubyRegexp newDRegexpEmbedded(Ruby runtimeRubyString patternint embeddedOptions) {
         try {
             RegexpOptions options = RegexpOptions.fromEmbeddedOptions(embeddedOptions);
             // FIXME: Massive hack (fix in DRegexpNode too for interpreter)
             if (pattern.getEncoding() == .) {
                 pattern.setEncoding(.);
             }
             return new RubyRegexp(runtimepattern.getByteList(), options);
         } catch (RaiseException re) {
             throw runtime.newRegexpError(re.getMessage());
         }
     }
     
     public static RubyRegexp newDRegexpEmbedded19(Ruby runtimeIRubyObject[] stringsint embeddedOptions) {
         try {
             RegexpOptions options = RegexpOptions.fromEmbeddedOptions(embeddedOptions);
             RubyString pattern = preprocessDRegexp(runtimestringsoptions);
             
             return new RubyRegexp(runtimepattern.getByteList(), options);
         } catch (RaiseException re) {
             throw runtime.newRegexpError(re.getMessage());
         }
         
     }
     
     public static RubyRegexp newRegexp(Ruby runtimeByteList pattern) {
         return new RubyRegexp(runtimepattern);
     }
 
     static RubyRegexp newRegexp(Ruby runtimeByteList strRegex pattern) {
         RubyRegexp regexp = new RubyRegexp(runtime);
         str.getClass();
         regexp.str = str;
         regexp.options = RegexpOptions.fromJoniOptions(pattern.getOptions());
         regexp.pattern = pattern;
         return regexp;
     }
     
     // internal usage (Complex/Rational)
     static RubyRegexp newDummyRegexp(Ruby runtimeRegex regex) {
         RubyRegexp regexp = new RubyRegexp(runtime);
         regexp.pattern = regex;
         regexp.str = .;
         regexp.options.setFixed(true);
         return regexp;
     }

    
rb_reg_options
 
     public RegexpOptions getOptions() {
         check();
         return ;
     }
 
     public final Regex getPattern() {
         check();
         return ;
     }
 
     private static void encodingMatchError(Ruby runtimeRegex patternEncoding strEnc) {
         throw runtime.newEncodingCompatibilityError("incompatible encoding regexp match (" +
                 pattern.getEncoding() + " regexp with " + strEnc + " string)");
     }
 
     private Encoding checkEncoding(RubyString strboolean warn) {
         if (str.scanForCodeRange() == .) {
             throw getRuntime().newArgumentError("invalid byte sequence in " + str.getEncoding());
         }
         check();
         Encoding enc = str.getEncoding();
         if (!enc.isAsciiCompatible()) {
             if (enc != .getEncoding()) encodingMatchError(getRuntime(), enc);
         } else if (.isFixed()) {
             if (enc != .getEncoding() && 
                (!.getEncoding().isAsciiCompatible() ||
                str.scanForCodeRange() != .)) encodingMatchError(getRuntime(), enc);
             enc = .getEncoding();
         }
         if (warn && isEncodingNone() && enc != . && str.scanForCodeRange() != .) {
             getRuntime().getWarnings().warn(."regexp match /.../n against to " + enc + " string");
         }
         return enc;
     }
 
     public final Regex preparePattern(RubyString str) {
         check();
         Encoding enc = checkEncoding(strtrue);
         if (enc == .getEncoding()) return ;
         return getPreprocessedRegexpFromCache(getRuntime(), this.enc.);
     }
 
     static Regex preparePattern(Ruby runtimeRegex patternRubyString str) {
         if (str.scanForCodeRange() == .) {
             throw runtime.newArgumentError("invalid byte sequence in " + str.getEncoding());
         }
         Encoding enc = str.getEncoding();
         if (!enc.isAsciiCompatible()) {
             if (enc != pattern.getEncoding()) encodingMatchError(runtimepatternenc);
         }
         // TODO: check for isKCodeDefault() somehow
 //        if (warn && isEncodingNone() && enc != ASCIIEncoding.INSTANCE && str.scanForCodeRange() != StringSupport.CR_7BIT) {
 //            getRuntime().getWarnings().warn(ID.REGEXP_MATCH_AGAINST_STRING, "regexp match /.../n against to " + enc + " string");
 //        }
         if (enc == pattern.getEncoding()) return pattern;
         return getPreprocessedRegexpFromCache(runtime, (ByteList)pattern.getUserObject(), enc, RegexpOptions.fromJoniOptions(pattern.getOptions()), .);
     }
 
     private static enum ErrorMode {RAISE, PREPROCESS, DESC} 
 
     private static int raisePreprocessError(Ruby runtimeByteList strString errErrorMode mode) {
         switch (mode) {
         case :
             raiseRegexpError19(runtimestrstr.getEncoding(), .err);
         case :
             throw runtime.newArgumentError("regexp preprocess failed: " + err);
         case :
             // silent ?
         }
         return 0;
     }
 
     private static int readEscapedByte(Ruby runtimebyte[]toint toPbyte[]bytesint pint endByteList strErrorMode mode) {
         if (p == end || bytes[p++] != (byte)'\\'raisePreprocessError(runtimestr"too short escaped multibyte character"mode);
 
         boolean metaPrefix = falsectrlPrefix = false;
         int code = 0;
         while (true) {
             if (p == endraisePreprocessError(runtimestr"too short escape sequence"mode);
 
             switch (bytes[p++]) {
             case '\\'code = '\\'break;
             case 'n'code = '\n'break;
             case 't'code = '\t'break;
             case 'r'code = '\r'break;
             case 'f'code = '\f'break;
             case 'v'code = '\013'break;
             case 'a'code = '\007'break;
             case 'e'code = '\033'break;
 
             /* \OOO */
             case '0'case '1'case '2'case '3':
             case '4'case '5'case '6'case '7':
                 p--;
                 int olen = end < p + 3 ? end - p : 3;
                 code = StringSupport.scanOct(bytespolen);
                 p += StringSupport.octLength(bytespolen);
                 break;
 
             case 'x'/* \xHH */
                 int hlen = end < p + 2 ? end - p : 2;
                 code = StringSupport.scanHex(bytesphlen);
                 int len = StringSupport.hexLength(bytesphlen);
                 if (len < 1) raisePreprocessError(runtimestr"invalid hex escape"mode);
                 p += len;
                 break;
 
             case 'M'/* \M-X, \M-\C-X, \M-\cX */
                 if (metaPrefixraisePreprocessError(runtimestr"duplicate meta escape"mode);
                 metaPrefix = true;
                 if (p + 1 < end && bytes[p++] == (byte)'-' && (bytes[p] & 0x80) == 0) {
                     if (bytes[p] == (byte)'\\') {
                         p++;
                         continue;
                     } else {
                         code = bytes[p++] & 0xff;
                         break;
                     }
                 }
                 raisePreprocessError(runtimestr"too short meta escape"mode);
 
             case 'C'/* \C-X, \C-\M-X */
                 if (p == end || bytes[p++] != (byte)'-'raisePreprocessError(runtimestr"too short control escape"mode);
 
             case 'c'/* \cX, \c\M-X */
                 if (ctrlPrefixraisePreprocessError(runtimestr"duplicate control escape"mode);
                 ctrlPrefix = true;
                 if (p < end && (bytes[p] & 0x80) == 0) {
                     if (bytes[p] == (byte)'\\') {
                         p++;
                         continue;
                     } else {
                         code = bytes[p++] & 0xff;
                         break;
                     }
                 }
                 raisePreprocessError(runtimestr"too short control escape"mode);
             default:
                 raisePreprocessError(runtimestr"unexpected escape sequence"mode);
             } // switch
 
             if (code < 0 || code > 0xff) raisePreprocessError(runtimestr"invalid escape code"mode);
 
             if (ctrlPrefixcode &= 0x1f;
             if (metaPrefixcode |= 0x80;
 
             to[toP] = (byte)code;
             return p;
         } // while
     }

    
Unescape escaped non-ascii character at start position, appending all to the given bytelist if provided.

Parameters:
runtime current runtime
to output bytelist; if null, no appending will be done
bytes incoming bytes
p start position
end end position
enc bytes' encoding
encp out param for fixed encoding
str original bytes wrapper
mode error mode
Returns:
new position after performing unescaping
 
     // MRI: unescape_escapted_nonascii
     private static int unescapeEscapedNonAscii(Ruby runtimeByteList tobyte[]bytesint pint endEncoding encEncoding[]encpByteList strErrorMode mode) {
         byte[]chBuf = new byte[enc.maxLength()];
         int chLen = 0;
 
         p = readEscapedByte(runtimechBufchLen++, bytespendstrmode);
         while (chLen < enc.maxLength() && StringSupport.preciseLength(encchBuf, 0, chLen) < -1) { // MBCLEN_NEEDMORE_P
             p = readEscapedByte(runtimechBufchLen++, bytespendstrmode);
         }
 
         int cl = StringSupport.preciseLength(encchBuf, 0, chLen);
         if (cl == -1) {
             raisePreprocessError(runtimestr"invalid multibyte escape"mode); // MBCLEN_INVALID_P
         }
 
         if (chLen > 1 || (chBuf[0] & 0x80) != 0) {
             if (to != nullto.append(chBuf, 0, chLen);
 
             if (encp[0] == null) {
                 encp[0] = enc;
             } else if (encp[0] != enc) {
                 raisePreprocessError(runtimestr"escaped non ASCII character in UTF-8 regexp"mode);
             }
         } else {
             if (to != null) Sprintf.sprintf(runtimeto"\\x%02X"chBuf[0] & 0xff);
         }
         return p;
     }
 
     private static void checkUnicodeRange(Ruby runtimeint codeByteList strErrorMode mode) {
         // Unicode is can be only 21 bits long, int is enough
         if ((0xd800 <= code && code <= 0xdfff) /* Surrogates */ || 0x10ffff < code) {
             raisePreprocessError(runtimestr"invalid Unicode range"mode);
         }
     }

    
Append the given utf8 characters to the buffer, if given, checking for errors along the way.

Parameters:
runtime current runtime
to output buffer; if null, no appending will be done
code utf8 character code
enc output param for new encoding
str original wrapper of source bytes
mode error mode
 
     private static void appendUtf8(Ruby runtimeByteList toint codeEncoding[] encByteList strErrorMode mode) {
         checkUnicodeRange(runtimecodestrmode);
 
         if (code < 0x80) {
             if (to != null) Sprintf.sprintf(runtimeto"\\x%02X"code);
         } else {
             if (to != null) {
                 to.ensure(to.getRealSize() + 6);
                 to.setRealSize(to.getRealSize() + Pack.utf8Decode(runtimeto.getUnsafeBytes(), to.getBegin() + to.getRealSize(), code));
             }
             if (enc[0] == null) {
                 enc[0] = .;
             } else if (!(enc[0] instanceof UTF8Encoding)) { // do not load the class if not used
                 raisePreprocessError(runtimestr"UTF-8 character in non UTF-8 regexp"mode);
             }
         }
     }
    
    
Unescape unicode characters at given offset, appending to the given out buffer if provided.

Parameters:
runtime current runtime
to output buffer; if null, no appending will be done
bytes input bytes
p start position
end end position
encp out param for fixed encoding
str original bytes wrapper
mode error mode
Returns:
new position after unescaping
 
     private static int unescapeUnicodeList(Ruby runtimeByteList tobyte[]bytesint pint endEncoding[]encpByteList strErrorMode mode) {
         while (p < end && ..isSpace(bytes[p] & 0xff)) p++;
 
         boolean hasUnicode = false
         while (true) {
             int code = StringSupport.scanHex(bytespend - p);
             int len = StringSupport.hexLength(bytespend - p);
             if (len == 0) break;
             if (len > 6) raisePreprocessError(runtimestr"invalid Unicode range"mode);
             p += len;
             if (to != nullappendUtf8(runtimetocodeencpstrmode);
             hasUnicode = true;
             while (p < end && ..isSpace(bytes[p] & 0xff)) p++;
         }
 
         if (!hasUnicoderaisePreprocessError(runtimestr"invalid Unicode list"mode); 
         return p;
     }

    
Unescape unicode BMP char at given offset, appending to the specified buffer if non-null.

Parameters:
runtime current runtime
to output buffer; if null, no appending will be done
bytes input bytes
p start position
end end position
encp out param for fixed encoding
str original bytes wrapper
mode error mode
Returns:
new position after unescaping
 
     private static int unescapeUnicodeBmp(Ruby runtimeByteList tobyte[] bytesint pint endEncoding[] encpByteList strErrorMode mode) {
         if (p + 4 > endraisePreprocessError(runtimestr"invalid Unicode escape"mode);
         int code = StringSupport.scanHex(bytesp, 4);
         int len = StringSupport.hexLength(bytesp, 4);
         if (len != 4) raisePreprocessError(runtimestr"invalid Unicode escape"mode);
         appendUtf8(runtimetocodeencpstrmode);
         return p + 4;
     }

    
Unescape non-ascii elements in the given string, appending the results to the given bytelist if provided.

Parameters:
runtime current runtime
to output bytelist; if null, no appending will be done
bytes the bytes to unescape
p starting position
end ending position
enc bytes' encoding
encp out param for fixed encoding
str original wrapper for the bytes
mode error mode
Returns:
whether any propery elements were encountered while walking
 
     private static boolean unescapeNonAscii(Ruby runtimeByteList tobyte[]bytesint pint endEncoding encEncoding[]encpByteList strErrorMode mode) {
         boolean hasProperty = false;
 
         while (p < end) {
             int cl = StringSupport.preciseLength(encbytespend);
             if (cl <= 0) raisePreprocessError(runtimestr"invalid multibyte character"mode);
             if (cl > 1 || (bytes[p] & 0x80) != 0) {
                 if (to != nullto.append(bytespcl);
                 p += cl;
                 if (encp[0] == null) {
                     encp[0] = enc;
                 } else if (encp[0] != enc) {
                     raisePreprocessError(runtimestr"non ASCII character in UTF-8 regexp"mode);
                 }
                 continue;
             }
             int c;
             switch (c = bytes[p++] & 0xff) {
             case '\\':
                 if (p == endraisePreprocessError(runtimestr"too short escape sequence"mode);
 
                 switch (c = bytes[p++] & 0xff) {
                 case '1'case '2'case '3':
                 case '4'case '5'case '6'case '7'/* \O, \OO, \OOO or backref */
                     if (StringSupport.scanOct(bytesp - 1, end - (p - 1)) <= 0177) {
                         if (to != nullto.append('\\').append(c);
                         break;
                     }
 
                 case '0'/* \0, \0O, \0OO */
                 case 'x'/* \xHH */
                 case 'c'/* \cX, \c\M-X */
                 case 'C'/* \C-X, \C-\M-X */
                 case 'M'/* \M-X, \M-\C-X, \M-\cX */
                     p = unescapeEscapedNonAscii(runtimetobytesp - 2, endencencpstrmode);
                     break;
 
                 case 'u':
                     if (p == endraisePreprocessError(runtimestr"too short escape sequence"mode);
                     if (bytes[p] == (byte)'{') { /* \\u{H HH HHH HHHH HHHHH HHHHHH ...} */
                         p++;
                         p = unescapeUnicodeList(runtimetobytespendencpstrmode);
                         if (p == end || bytes[p++] != (byte)'}'raisePreprocessError(runtimestr"invalid Unicode list"mode);
                     } else { /* \\uHHHH */
                         p = unescapeUnicodeBmp(runtimetobytespendencpstrmode);
                     }
                     break;
                 case 'p'/* \p{Hiragana} */
                     if (encp[0] == nullhasProperty = true;
                     if (to != nullto.append('\\').append(c);
                     break;
 
                 default:
                     if (to != nullto.append('\\').append(c);
                     break;
                 } // inner switch
                 break;
 
             default:
                 if (to != nullto.append(c);
             } // switch
         } // while
         return hasProperty;
     }


    
Preprocess the given string for use in regexp, raising errors for encoding incompatibilities that arise. This version produces a new unescaped version of the string based on fixes performed while walking.

Parameters:
runtime current runtime
str string to preprocess
enc string's encoding
fixedEnc new encoding after fixing
mode mode of errors
Returns:
a new unescaped string
 
     private static ByteList preprocess(Ruby runtimeByteList strEncoding encEncoding[]fixedEncErrorMode mode) {
         ByteList to = new ByteList(str.getRealSize());
 
         if (enc.isAsciiCompatible()) {
             fixedEnc[0] = null;
         } else {
             fixedEnc[0] = enc;
             to.setEncoding(enc);
         }
 
         boolean hasProperty = unescapeNonAscii(runtimetostr.getUnsafeBytes(), str.getBegin(), str.getBegin() + str.getRealSize(), encfixedEncstrmode);
         if (hasProperty && fixedEnc[0] == nullfixedEnc[0] = enc;
         if (fixedEnc[0] != nullto.setEncoding(fixedEnc[0]);
         return to;
     }

    
Preprocess the given string for use in regexp, raising errors for encoding incompatibilities that arise. This version does not produce a new, unescaped version of the bytelist, and simply does the string-walking portion of the logic.

Parameters:
runtime current runtime
str string to preprocess
enc string's encoding
fixedEnc new encoding after fixing
mode mode of errors
 
     private static void preprocessLight(Ruby runtimeByteList strEncoding encEncoding[]fixedEncErrorMode mode) {
         if (enc.isAsciiCompatible()) {
             fixedEnc[0] = null;
         } else {
             fixedEnc[0] = enc;
         }
 
         boolean hasProperty = unescapeNonAscii(runtimenullstr.getUnsafeBytes(), str.getBegin(), str.getBegin() + str.getRealSize(), encfixedEncstrmode);
         if (hasProperty && fixedEnc[0] == nullfixedEnc[0] = enc;
     }
 
     public static void preprocessCheck(Ruby runtimeByteList bytes) {
         preprocess(runtimebytesbytes.getEncoding(), new Encoding[]{null}, .);
     }
     
     // rb_reg_preprocess_dregexp
     public static RubyString preprocessDRegexp(Ruby runtimeIRubyObject[] stringsRegexpOptions options) {
         RubyString string = null;
         Encoding regexpEnc = null;
         Encoding[] fixedEnc = new Encoding[1];
         
         for (int i = 0; i < strings.lengthi++) {
             RubyString str = strings[i].convertToString();
             Encoding strEnc = str.getEncoding();
             
             if (options.isEncodingNone() && strEnc != .) {
                 if (str.scanForCodeRange() != .) {
                     throw runtime.newRegexpError("/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
                 }
                 strEnc = .;
             }
             
             // This used to call preprocess, but the resulting bytelist was not
             // used. Since the preprocessing error-checking can be done without
             // creating a new bytelist, I added a "light" path.
             RubyRegexp.preprocessLight(runtimestr.getByteList(), strEncfixedEnc..);
             
             if (fixedEnc[0] != null) {
                 if (regexpEnc != null && regexpEnc != fixedEnc[0]) {
                     throw runtime.newRegexpError("encoding mismatch in dynamic regexp: " + new String(regexpEnc.getName()) + " and " + new String(fixedEnc[0].getName()));
                 }
                 regexpEnc = fixedEnc[0];
             }
             
             if (string == null) {
                 string = (RubyString)str.dup();
             } else {
                 string.append19(str);
             }
         }
         
         if (regexpEnc != null) {
             string.setEncoding(regexpEnc);
         }
 
         return string;
     }
 
     private void check() {
         if ( == nullthrow getRuntime().newTypeError("uninitialized Regexp");
     }
 
     @JRubyMethod(name = "try_convert", meta = true, compat = .)
     public static IRubyObject try_convert(ThreadContext contextIRubyObject recvIRubyObject args) {
         return TypeConverter.convertToTypeWithCheck(argscontext.runtime.getRegexp(), "to_regexp");
     }

    
rb_reg_s_quote
 
     @JRubyMethod(name = {"quote""escape"}, required = 1, optional = 1, meta = true, compat = .)
     public static RubyString quote(ThreadContext contextIRubyObject recvIRubyObject[] args) {
         Ruby runtime = context.runtime;
         final KCode code;
         if (args.length == 1 || args[1].isNil()) {
             code = runtime.getKCode();
         } else {
             code = KCode.create(runtimeargs[1].toString());
         }
 
         RubyString src = args[0].convertToString();
         RubyString dst = RubyString.newStringShared(runtimequote(src.getByteList(), code.getEncoding()));
         dst.infectBy(src);
         return dst;
     }
 
     @JRubyMethod(name = {"quote""escape"}, meta = true, compat = .)
     public static IRubyObject quote19(ThreadContext contextIRubyObject recvIRubyObject arg) {
         Ruby runtime = context.runtime;
         RubyString str = operandCheck(runtimearg);
         return RubyString.newStringShared(runtimequote19(str.getByteList(), str.isAsciiOnly()));
     }

    
rb_reg_quote
 
     private static ByteList quote(ByteList bsEncoding enc) {
         int p = bs.getBegin();
         int end = p + bs.getRealSize();
         byte[]bytes = bs.getUnsafeBytes();
 
         metaFound: do {
             for(; p < endp++) {
                 int c = bytes[p] & 0xff;
                 int cl = enc.length(bytespend);
                 if (cl != 1) {
                     while (cl-- > 0 && p < endp++;
                     p--;
                     continue;
                 }
                 switch (c) {
                 case '['case ']'case '{'case '}':
                 case '('case ')'case '|'case '-':
                 case '*'case '.'case '\\':
                 case '?'case '+'case '^'case '$':
                 case ' 'case '#':
                 case '\t'case '\f'case '\n'case '\r':
                     break metaFound;
                 }
             }
             return bs;
         } while (false);
 
         ByteList result = new ByteList(end * 2);
         byte[]obytes = result.getUnsafeBytes();
         int op = p - bs.getBegin();
         System.arraycopy(bytesbs.getBegin(), obytes, 0, op);
 
         for(; p < endp++) {
             int c = bytes[p] & 0xff;
             int cl = enc.length(bytespend);
             if (cl != 1) {
                 while (cl-- > 0 && p < endobytes[op++] = bytes[p++];
                 p--;
                 continue;
             }
 
             switch (c) {
             case '['case ']'case '{'case '}':
             case '('case ')'case '|'case '-':
             case '*'case '.'case '\\':
             case '?'case '+'case '^'case '$':
             case '#'obytes[op++] = '\\'break;
             case ' 'obytes[op++] = '\\'obytes[op++] = ' 'continue;
             case '\t':obytes[op++] = '\\'obytes[op++] = 't'continue;
             case '\n':obytes[op++] = '\\'obytes[op++] = 'n'continue;
             case '\r':obytes[op++] = '\\'obytes[op++] = 'r'continue;
             case '\f':obytes[op++] = '\\'obytes[op++] = 'f'continue;
             }
             obytes[op++] = (byte)c;
         }
 
         result.setRealSize(op);
         return result;
     }
 
    private static final int QUOTED_V = 11;
    static ByteList quote19(ByteList bsboolean asciiOnly) {
        int p = bs.getBegin();
        int end = p + bs.getRealSize();
        byte[]bytes = bs.getUnsafeBytes();
        Encoding enc = bs.getEncoding();
        metaFound: do {
            while (p < end) {
                final int c;
                final int cl;
                if (enc.isAsciiCompatible()) {
                    cl = 1;
                    c = bytes[p] & 0xff;
                } else {
                    cl = StringSupport.preciseLength(encbytespend);
                    c = enc.mbcToCode(bytespend);
                }
                if (!Encoding.isAscii(c)) {
                    p += StringSupport.length(encbytespend);
                    continue;
                }
                
                switch (c) {
                case '['case ']'case '{'case '}':
                case '('case ')'case '|'case '-':
                case '*'case '.'case '\\':
                case '?'case '+'case '^'case '$':
                case ' 'case '#':
                case '\t'case '\f'case case '\n'case '\r':
                    break metaFound;
                }
                p += cl;
            }
            if (asciiOnly) {
                ByteList tmp = bs.shallowDup();
                tmp.setEncoding(.);
                return tmp;
            }
            return bs;
        } while (false);
        ByteList result = new ByteList(end * 2);
        result.setEncoding(asciiOnly ? . : bs.getEncoding());
        byte[]obytes = result.getUnsafeBytes();
        int op = p - bs.getBegin();
        System.arraycopy(bytesbs.getBegin(), obytes, 0, op);
        while (p < end) {
            final int c;
            final int cl;
            if (enc.isAsciiCompatible()) {
                cl = 1;
                c = bytes[p] & 0xff;