Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
   /*
    * Copyright (C) 2008 The Guava Authors
    *
    * Licensed under the Apache License, Version 2.0 (the "License");
    * you may not use this file except in compliance with the License.
    * You may obtain a copy of the License at
    *
    * http://www.apache.org/licenses/LICENSE-2.0
    *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package com.google.common.base;
  
  import static com.google.common.base.Preconditions.checkArgument;
  import static com.google.common.base.Preconditions.checkNotNull;
  
  
  import java.util.Arrays;
  import java.util.BitSet;
  
  import  javax.annotation.CheckReturnValue;

Determines a true or false value for any Java char value, just as Predicate does for any Object. Also offers basic text processing methods based on this function. Implementations are strongly encouraged to be side-effect-free and immutable.

Throughout the documentation of this class, the phrase "matching character" is used to mean "any character c for which this.matches(c) returns true".

Note: This class deals only with char values; it does not understand supplementary Unicode code points in the range 0x10000 to 0x10FFFF. Such logical characters are encoded into a String using surrogate pairs, and a CharMatcher treats these just as two separate characters.

Example usages:

   String trimmed = WHITESPACE.trimFrom(userInput);
   if (ASCII.matchesAllOf(s)) { ... }

See the Guava User Guide article on CharMatcher.

Author(s):
Kevin Bourrillion
Since:
1.0
  
  @Beta // Possibly change from chars to code points; decide constants vs. methods
  @GwtCompatible(emulated = true)
  public abstract class CharMatcher implements Predicate<Character> {
    // Constants
    
Determines whether a character is a breaking whitespace (that is, a whitespace which can be interpreted as a break between words for formatting purposes). See WHITESPACE for a discussion of that term.

Since:
2.0
  
    public static final CharMatcher BREAKING_WHITESPACE = new CharMatcher() {
      @Override
      public boolean matches(char c) {
        switch (c) {
          case '\t':
          case '\n':
          case '\013':
          case '\f':
          case '\r':
          case ' ':
          case '\u0085':
          case '\u1680':
          case '\u2028':
          case '\u2029':
          case '\u205f':
          case '\u3000':
            return true;
          case '\u2007':
            return false;
          default:
            return c >= '\u2000' && c <= '\u200a';
        }
      }
  
      @Override
      public String toString() {
        return "CharMatcher.BREAKING_WHITESPACE";
      }
    };

  
Determines whether a character is ASCII, meaning that its code point is less than 128.
  
    public static final CharMatcher ASCII = inRange('\0''\u007f'"CharMatcher.ASCII");
 
   private static class RangesMatcher extends CharMatcher {
     private final char[] rangeStarts;
     private final char[] rangeEnds;
 
     RangesMatcher(String descriptionchar[] rangeStartschar[] rangeEnds) {
       super(description);
       this. = rangeStarts;
       this. = rangeEnds;
       checkArgument(rangeStarts.length == rangeEnds.length);
       for (int i = 0; i < rangeStarts.lengthi++) {
         checkArgument(rangeStarts[i] <= rangeEnds[i]);
         if (i + 1 < rangeStarts.length) {
           checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
         }
       }
     }
 
     @Override
     public boolean matches(char c) {
       int index = Arrays.binarySearch(c);
       if (index >= 0) {
         return true;
       } else {
         index = ~index - 1;
         return index >= 0 && c <= [index];
       }
     }
   }
 
   // Must be in ascending order.
   private static final String ZEROES = "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6"
       + "\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1b50\u1bb0"
       + "\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
 
   private static final String NINES;
   static {
     StringBuilder builder = new StringBuilder(.length());
     for (int i = 0; i < .length(); i++) {
       builder.append((char) (.charAt(i) + 9));
     }
      = builder.toString();
   }

  
Determines whether a character is a digit according to Unicode.
 
   public static final CharMatcher DIGIT = new RangesMatcher(
       "CharMatcher.DIGIT".toCharArray(), .toCharArray());

  
Determines whether a character is a digit according to Java's definition. If you only care to match ASCII digits, you can use inRange('0', '9').
 
   public static final CharMatcher JAVA_DIGIT = new CharMatcher("CharMatcher.JAVA_DIGIT") {
     @Override public boolean matches(char c) {
       return Character.isDigit(c);
     }
   };

  
Determines whether a character is a letter according to Java's definition. If you only care to match letters of the Latin alphabet, you can use inRange('a', 'z').or(inRange('A', 'Z')).
 
   public static final CharMatcher JAVA_LETTER = new CharMatcher("CharMatcher.JAVA_LETTER") {
     @Override public boolean matches(char c) {
       return Character.isLetter(c);
     }
   };

  
Determines whether a character is a letter or digit according to Character.isLetterOrDigit(char) Java's definition.
 
   public static final CharMatcher JAVA_LETTER_OR_DIGIT =
       new CharMatcher("CharMatcher.JAVA_LETTER_OR_DIGIT") {
     @Override public boolean matches(char c) {
       return Character.isLetterOrDigit(c);
     }
   };

  
Determines whether a character is upper case according to Java's definition.
 
   public static final CharMatcher JAVA_UPPER_CASE =
       new CharMatcher("CharMatcher.JAVA_UPPER_CASE") {
     @Override public boolean matches(char c) {
       return Character.isUpperCase(c);
     }
   };

  
Determines whether a character is lower case according to Java's definition.
 
   public static final CharMatcher JAVA_LOWER_CASE =
       new CharMatcher("CharMatcher.JAVA_LOWER_CASE") {
     @Override public boolean matches(char c) {
       return Character.isLowerCase(c);
     }
   };

  
Determines whether a character is an ISO control character as specified by Character.isISOControl(char).
 
   public static final CharMatcher JAVA_ISO_CONTROL =
       inRange('\u0000''\u001f')
       .or(inRange('\u007f''\u009f'))
       .withToString("CharMatcher.JAVA_ISO_CONTROL");

  
Determines whether a character is invisible; that is, if its Unicode category is any of SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and PRIVATE_USE according to ICU4J.
 
   public static final CharMatcher INVISIBLE = new RangesMatcher("CharMatcher.INVISIBLE", (
       "\u0000\u007f\u00ad\u0600\u06dd\u070f\u1680\u180e\u2000\u2028\u205f\u206a\u3000\ud800\ufeff"
       + "\ufff9\ufffa").toCharArray(), (
       "\u0020\u00a0\u00ad\u0604\u06dd\u070f\u1680\u180e\u200f\u202f\u2064\u206f\u3000\uf8ff\ufeff"
       + "\ufff9\ufffb").toCharArray());
 
   private static String showCharacter(char c) {
     String hex = "0123456789ABCDEF";
     char[] tmp = {'\\''u''\0''\0''\0''\0'};
     for (int i = 0; i < 4; i++) {
       tmp[5 - i] = hex.charAt(c & 0xF);
       c >>= 4;
     }
     return String.copyValueOf(tmp);
 
   }

  
Determines whether a character is single-width (not double-width). When in doubt, this matcher errs on the side of returning false (that is, it tends to assume a character is double-width).

Note: as the reference file evolves, we will modify this constant to keep it up to date.

 
   public static final CharMatcher SINGLE_WIDTH = new RangesMatcher("CharMatcher.SINGLE_WIDTH",
       "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
       "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());

  
Matches any character.
 
   public static final CharMatcher ANY =
       new FastMatcher("CharMatcher.ANY") {
         @Override public boolean matches(char c) {
           return true;
         }
 
         @Override public int indexIn(CharSequence sequence) {
           return (sequence.length() == 0) ? -1 : 0;
         }
 
         @Override public int indexIn(CharSequence sequenceint start) {
           int length = sequence.length();
           Preconditions.checkPositionIndex(startlength);
           return (start == length) ? -1 : start;
         }
 
         @Override public int lastIndexIn(CharSequence sequence) {
           return sequence.length() - 1;
         }
 
         @Override public boolean matchesAllOf(CharSequence sequence) {
           checkNotNull(sequence);
           return true;
         }
 
         @Override public boolean matchesNoneOf(CharSequence sequence) {
           return sequence.length() == 0;
         }
 
         @Override public String removeFrom(CharSequence sequence) {
           checkNotNull(sequence);
           return "";
         }
 
         @Override public String replaceFrom(CharSequence sequencechar replacement) {
           char[] array = new char[sequence.length()];
           Arrays.fill(arrayreplacement);
           return new String(array);
         }
 
         @Override public String replaceFrom(CharSequence sequenceCharSequence replacement) {
           StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
           for (int i = 0; i < sequence.length(); i++) {
             retval.append(replacement);
           }
           return retval.toString();
         }
 
         @Override public String collapseFrom(CharSequence sequencechar replacement) {
           return (sequence.length() == 0) ? "" : String.valueOf(replacement);
         }
 
         @Override public String trimFrom(CharSequence sequence) {
           checkNotNull(sequence);
           return "";
         }
 
         @Override public int countIn(CharSequence sequence) {
           return sequence.length();
         }
 
         @Override public CharMatcher and(CharMatcher other) {
           return checkNotNull(other);
         }
 
         @Override public CharMatcher or(CharMatcher other) {
           checkNotNull(other);
           return this;
         }
 
         @Override public CharMatcher negate() {
           return ;
         }
       };

  
Matches no characters.
 
   public static final CharMatcher NONE =
       new FastMatcher("CharMatcher.NONE") {
         @Override public boolean matches(char c) {
           return false;
         }
 
         @Override public int indexIn(CharSequence sequence) {
           checkNotNull(sequence);
           return -1;
         }
 
         @Override public int indexIn(CharSequence sequenceint start) {
           int length = sequence.length();
           Preconditions.checkPositionIndex(startlength);
           return -1;
         }
 
         @Override public int lastIndexIn(CharSequence sequence) {
           checkNotNull(sequence);
           return -1;
         }
 
         @Override public boolean matchesAllOf(CharSequence sequence) {
           return sequence.length() == 0;
         }
 
         @Override public boolean matchesNoneOf(CharSequence sequence) {
           checkNotNull(sequence);
           return true;
         }
 
         @Override public String removeFrom(CharSequence sequence) {
           return sequence.toString();
         }
 
         @Override public String replaceFrom(CharSequence sequencechar replacement) {
           return sequence.toString();
         }
 
         @Override public String replaceFrom(CharSequence sequenceCharSequence replacement) {
           checkNotNull(replacement);
           return sequence.toString();
         }
 
         @Override public String collapseFrom(CharSequence sequencechar replacement) {
           return sequence.toString();
         }
 
         @Override public String trimFrom(CharSequence sequence) {
           return sequence.toString();
         }
 
         @Override
         public String trimLeadingFrom(CharSequence sequence) {
           return sequence.toString();
         }
 
         @Override
         public String trimTrailingFrom(CharSequence sequence) {
           return sequence.toString();
         }
 
         @Override public int countIn(CharSequence sequence) {
           checkNotNull(sequence);
           return 0;
         }
 
         @Override public CharMatcher and(CharMatcher other) {
           checkNotNull(other);
           return this;
         }
 
         @Override public CharMatcher or(CharMatcher other) {
           return checkNotNull(other);
         }
 
         @Override public CharMatcher negate() {
           return ;
         }
       };
 
   // Static factories
 
  
Returns a char matcher that matches only one specified character.
 
   public static CharMatcher is(final char match) {
     String description = "CharMatcher.is('" + showCharacter(match) + "')";
     return new FastMatcher(description) {
       @Override public boolean matches(char c) {
         return c == match;
       }
 
       @Override public String replaceFrom(CharSequence sequencechar replacement) {
         return sequence.toString().replace(matchreplacement);
       }
 
       @Override public CharMatcher and(CharMatcher other) {
         return other.matches(match) ? this : ;
       }
 
       @Override public CharMatcher or(CharMatcher other) {
         return other.matches(match) ? other : super.or(other);
       }
 
       @Override public CharMatcher negate() {
         return isNot(match);
       }
 
       @GwtIncompatible("java.util.BitSet")
       @Override
       void setBits(BitSet table) {
         table.set(match);
       }
     };
   }

  
Returns a char matcher that matches any character except the one specified.

To negate another CharMatcher, use negate().

 
   public static CharMatcher isNot(final char match) {
     String description = "CharMatcher.isNot('" + showCharacter(match) + "')";
     return new FastMatcher(description) {
       @Override public boolean matches(char c) {
         return c != match;
       }
 
       @Override public CharMatcher and(CharMatcher other) {
         return other.matches(match) ? super.and(other) : other;
       }
 
       @Override public CharMatcher or(CharMatcher other) {
         return other.matches(match) ?  : this;
       }
 
       @GwtIncompatible("java.util.BitSet")
       @Override
       void setBits(BitSet table) {
         table.set(0, match);
         table.set(match + 1, . + 1);
       }
 
       @Override public CharMatcher negate() {
         return is(match);
       }
     };
   }

  
Returns a char matcher that matches any character present in the given character sequence.
 
   public static CharMatcher anyOf(final CharSequence sequence) {
     switch (sequence.length()) {
       case 0:
         return ;
       case 1:
         return is(sequence.charAt(0));
       case 2:
         return isEither(sequence.charAt(0), sequence.charAt(1));
       default:
         // continue below to handle the general case
     }
     // TODO(user): is it potentially worth just going ahead and building a precomputed matcher?
     final char[] chars = sequence.toString().toCharArray();
     Arrays.sort(chars);
     StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
     for (char c : chars) {
       description.append(showCharacter(c));
     }
     description.append("\")");
     return new CharMatcher(description.toString()) {
       @Override public boolean matches(char c) {
         return Arrays.binarySearch(charsc) >= 0;
       }
 
       @Override
       @GwtIncompatible("java.util.BitSet")
       void setBits(BitSet table) {
         for (char c : chars) {
           table.set(c);
         }
       }
     };
   }
 
   private static CharMatcher isEither(
       final char match1,
       final char match2) {
     String description = "CharMatcher.anyOf(\"" +
         showCharacter(match1) + showCharacter(match2) + "\")";
     return new FastMatcher(description) {
       @Override public boolean matches(char c) {
         return c == match1 || c == match2;
       }
 
       @GwtIncompatible("java.util.BitSet")
       @Override void setBits(BitSet table) {
         table.set(match1);
         table.set(match2);
       }
     };
   }

  
Returns a char matcher that matches any character not present in the given character sequence.
 
   public static CharMatcher noneOf(CharSequence sequence) {
     return anyOf(sequence).negate();
   }

  
Returns a char matcher that matches any character in a given range (both endpoints are inclusive). For example, to match any lowercase letter of the English alphabet, use CharMatcher.inRange('a', 'z').

Throws:
IllegalArgumentException if endInclusive < startInclusive
 
   public static CharMatcher inRange(final char startInclusivefinal char endInclusive) {
     checkArgument(endInclusive >= startInclusive);
     String description = "CharMatcher.inRange('" +
         showCharacter(startInclusive) + "', '" +
         showCharacter(endInclusive) + "')";
     return inRange(startInclusiveendInclusivedescription);
   }
 
   static CharMatcher inRange(final char startInclusivefinal char endInclusive,
       String description) {
     return new FastMatcher(description) {
       @Override public boolean matches(char c) {
         return startInclusive <= c && c <= endInclusive;
       }
 
       @GwtIncompatible("java.util.BitSet")
       @Override void setBits(BitSet table) {
         table.set(startInclusiveendInclusive + 1);
       }
     };
   }

  
Returns a matcher with identical behavior to the given Character-based predicate, but which operates on primitive char instances instead.
 
   public static CharMatcher forPredicate(final Predicate<? super Characterpredicate) {
     checkNotNull(predicate);
     if (predicate instanceof CharMatcher) {
       return (CharMatcherpredicate;
     }
     String description = "CharMatcher.forPredicate(" + predicate + ")";
     return new CharMatcher(description) {
       @Override public boolean matches(char c) {
         return predicate.apply(c);
       }
 
       @Override public boolean apply(Character character) {
         return predicate.apply(checkNotNull(character));
       }
     };
   }
 
   // State
   final String description;
 
   // Constructors
 
  
Sets the toString() from the given description.
 
   CharMatcher(String description) {
     this. = description;
   }

  
Constructor for use by subclasses. When subclassing, you may want to override toString() to provide a useful description.
 
   protected CharMatcher() {
      = super.toString();
   }
 
   // Abstract methods
 
  
Determines a true or false value for the given character.
 
   public abstract boolean matches(char c);
 
   // Non-static factories
 
  
Returns a matcher that matches any character not matched by this matcher.
 
   public CharMatcher negate() {
     return new NegatedMatcher(this);
   }
 
   private static class NegatedMatcher extends CharMatcher {
     final CharMatcher original;
 
     NegatedMatcher(String toStringCharMatcher original) {
       super(toString);
       this. = original;
     }
 
     NegatedMatcher(CharMatcher original) {
       this(original + ".negate()"original);
     }
 
     @Override public boolean matches(char c) {
       return !.matches(c);
     }
 
     @Override public boolean matchesAllOf(CharSequence sequence) {
       return .matchesNoneOf(sequence);
     }
 
     @Override public boolean matchesNoneOf(CharSequence sequence) {
       return .matchesAllOf(sequence);
     }
 
     @Override public int countIn(CharSequence sequence) {
       return sequence.length() - .countIn(sequence);
     }
 
     @GwtIncompatible("java.util.BitSet")
     @Override
     void setBits(BitSet table) {
       BitSet tmp = new BitSet();
       .setBits(tmp);
       tmp.flip(.. + 1);
       table.or(tmp);
     }
 
     @Override public CharMatcher negate() {
       return ;
     }
 
     @Override
     CharMatcher withToString(String description) {
       return new NegatedMatcher(description);
     }
   }

  
Returns a matcher that matches any character matched by both this matcher and other.
 
   public CharMatcher and(CharMatcher other) {
     return new And(thischeckNotNull(other));
   }
 
   private static class And extends CharMatcher {
     final CharMatcher first;
     final CharMatcher second;
 
     And(CharMatcher aCharMatcher b) {
       this(ab"CharMatcher.and(" + a + ", " + b + ")");
     }
 
     And(CharMatcher aCharMatcher bString description) {
       super(description);
        = checkNotNull(a);
        = checkNotNull(b);
     }
 
     @Override
     public boolean matches(char c) {
       return .matches(c) && .matches(c);
     }
 
     @GwtIncompatible("java.util.BitSet")
     @Override
     void setBits(BitSet table) {
       BitSet tmp1 = new BitSet();
       .setBits(tmp1);
       BitSet tmp2 = new BitSet();
       .setBits(tmp2);
       tmp1.and(tmp2);
       table.or(tmp1);
     }
 
     @Override
     CharMatcher withToString(String description) {
       return new And(description);
     }
   }

  
Returns a matcher that matches any character matched by either this matcher or other.
 
   public CharMatcher or(CharMatcher other) {
     return new Or(thischeckNotNull(other));
   }
 
   private static class Or extends CharMatcher {
     final CharMatcher first;
     final CharMatcher second;
 
     Or(CharMatcher aCharMatcher bString description) {
       super(description);
        = checkNotNull(a);
        = checkNotNull(b);
     }
 
     Or(CharMatcher aCharMatcher b) {
       this(ab"CharMatcher.or(" + a + ", " + b + ")");
     }
 
     @GwtIncompatible("java.util.BitSet")
     @Override
     void setBits(BitSet table) {
       .setBits(table);
       .setBits(table);
     }
 
     @Override
     public boolean matches(char c) {
       return .matches(c) || .matches(c);
     }
 
     @Override
     CharMatcher withToString(String description) {
       return new Or(description);
     }
   }

  
Returns a char matcher functionally equivalent to this one, but which may be faster to query than the original; your mileage may vary. Precomputation takes time and is likely to be worthwhile only if the precomputed matcher is queried many thousands of times.

This method has no effect (returns this) when called in GWT: it's unclear whether a precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a worthwhile tradeoff in a browser.

 
   public CharMatcher precomputed() {
     return Platform.precomputeCharMatcher(this);
   }

  
Subclasses should provide a new CharMatcher with the same characteristics as this, but with their toString method overridden with the new description.

This is unsupported by default.

 
   CharMatcher withToString(String description) {
     throw new UnsupportedOperationException();
   }
 
   private static final int DISTINCT_CHARS = . - . + 1;

  
This is the actual implementation of precomputed, but we bounce calls through a method on Platform so that we can have different behavior in GWT.

This implementation tries to be smart in a number of ways. It recognizes cases where the negation is cheaper to precompute than the matcher itself; it tries to build small hash tables for matchers that only match a few characters, and so on. In the worst-case scenario, it constructs an eight-kilobyte bit array and queries that. In many situations this produces a matcher which is faster to query than the original.

 
   @GwtIncompatible("java.util.BitSet")
     final BitSet table = new BitSet();
     setBits(table);
     int totalCharacters = table.cardinality();
     if (totalCharacters * 2 <= ) {
       return precomputedPositive(totalCharacterstable);
     } else {
       // TODO(user): is it worth it to worry about the last character of large matchers?
       table.flip(.. + 1);
       int negatedCharacters =  - totalCharacters;
       String suffix = ".negate()";
       String negatedDescription = .endsWith(suffix)
           ? .substring(0, .length() - suffix.length())
           :  + suffix;
       return new NegatedFastMatcher(toString(),
           precomputedPositive(negatedCharacterstablenegatedDescription));
     }
   }

  
A matcher for which precomputation will not yield any significant benefit.
 
   abstract static class FastMatcher extends CharMatcher {
     FastMatcher() {
       super();
     }
 
     FastMatcher(String description) {
       super(description);
     }
 
     @Override
     public final CharMatcher precomputed() {
       return this;
     }
 
     @Override
     public CharMatcher negate() {
       return new NegatedFastMatcher(this);
     }
   }
 
   static final class NegatedFastMatcher extends NegatedMatcher {
     NegatedFastMatcher(CharMatcher original) {
       super(original);
     }
 
     NegatedFastMatcher(String toStringCharMatcher original) {
       super(toStringoriginal);
     }
 
     @Override
     public final CharMatcher precomputed() {
       return this;
     }
 
     @Override
     CharMatcher withToString(String description) {
       return new NegatedFastMatcher(description);
     }
   }

  
Helper method for precomputedInternal that doesn't test if the negation is cheaper.
 
   @GwtIncompatible("java.util.BitSet")
   private static CharMatcher precomputedPositive(
       int totalCharacters,
       BitSet table,
       String description) {
     switch (totalCharacters) {
       case 0:
         return ;
       case 1:
         return is((chartable.nextSetBit(0));
       case 2:
         char c1 = (chartable.nextSetBit(0);
         char c2 = (chartable.nextSetBit(c1 + 1);
         return isEither(c1c2);
       default:
         return isSmall(totalCharacterstable.length())
             ? SmallCharMatcher.from(tabledescription)
             : new BitSetMatcher(tabledescription);
     }
   }
 
   private static boolean isSmall(int totalCharactersint tableLength) {
     return totalCharacters <= .
         && tableLength > (totalCharacters * 4 * .);
         // err on the side of BitSetMatcher
   }
 
   @GwtIncompatible("java.util.BitSet")
   private static class BitSetMatcher extends FastMatcher {
     private final BitSet table;
 
     private BitSetMatcher(BitSet tableString description) {
       super(description);
       if (table.length() + . < table.size()) {
         table = (BitSettable.clone();
         // If only we could actually call BitSet.trimToSize() ourselves...
       }
       this. = table;
     }
 
     @Override public boolean matches(char c) {
       return .get(c);
     }
 
     @Override
     void setBits(BitSet bitSet) {
       bitSet.or();
     }
   }

  
Sets bits in table matched by this matcher.
 
   @GwtIncompatible("java.util.BitSet")
   void setBits(BitSet table) {
     for (int c = .c >= .c--) {
       if (matches((charc)) {
         table.set(c);
       }
     }
   }
 
   // Text processing routines
 
  
Returns true if a character sequence contains at least one matching character. Equivalent to !matchesNoneOf(sequence).

The default implementation iterates over the sequence, invoking matches for each character, until this returns true or the end is reached.

Parameters:
sequence the character sequence to examine, possibly empty
Returns:
true if this matcher matches at least one character in the sequence
Since:
8.0
 
   public boolean matchesAnyOf(CharSequence sequence) {
     return !matchesNoneOf(sequence);
   }

  
Returns true if a character sequence contains only matching characters.

The default implementation iterates over the sequence, invoking matches for each character, until this returns false or the end is reached.

Parameters:
sequence the character sequence to examine, possibly empty
Returns:
true if this matcher matches every character in the sequence, including when the sequence is empty
 
   public boolean matchesAllOf(CharSequence sequence) {
     for (int i = sequence.length() - 1; i >= 0; i--) {
       if (!matches(sequence.charAt(i))) {
         return false;
       }
     }
     return true;
   }

  
Returns true if a character sequence contains no matching characters. Equivalent to !matchesAnyOf(sequence).

The default implementation iterates over the sequence, invoking matches for each character, until this returns false or the end is reached.

Parameters:
sequence the character sequence to examine, possibly empty
Returns:
true if this matcher matches every character in the sequence, including when the sequence is empty
 
   public boolean matchesNoneOf(CharSequence sequence) {
     return indexIn(sequence) == -1;
   }

  
Returns the index of the first matching character in a character sequence, or -1 if no matching character is present.

The default implementation iterates over the sequence in forward order calling matches for each character.

Parameters:
sequence the character sequence to examine from the beginning
Returns:
an index, or -1 if no character matches
 
   public int indexIn(CharSequence sequence) {
     int length = sequence.length();
     for (int i = 0; i < lengthi++) {
       if (matches(sequence.charAt(i))) {
         return i;
       }
     }
     return -1;
   }

  
Returns the index of the first matching character in a character sequence, starting from a given position, or -1 if no character matches after that position.

The default implementation iterates over the sequence in forward order, beginning at start, calling matches for each character.

Parameters:
sequence the character sequence to examine
start the first index to examine; must be nonnegative and no greater than sequence.length()
Returns:
the index of the first matching character, guaranteed to be no less than start, or -1 if no character matches
Throws:
IndexOutOfBoundsException if start is negative or greater than sequence.length()
  public int indexIn(CharSequence sequenceint start) {
    int length = sequence.length();
    Preconditions.checkPositionIndex(startlength);
    for (int i = starti < lengthi++) {
      if (matches(sequence.charAt(i))) {
        return i;
      }
    }
    return -1;
  }

  
Returns the index of the last matching character in a character sequence, or -1 if no matching character is present.

The default implementation iterates over the sequence in reverse order calling matches for each character.

Parameters:
sequence the character sequence to examine from the end
Returns:
an index, or -1 if no character matches
  public int lastIndexIn(CharSequence sequence) {
    for (int i = sequence.length() - 1; i >= 0; i--) {
      if (matches(sequence.charAt(i))) {
        return i;
      }
    }
    return -1;
  }

  
Returns the number of matching characters found in a character sequence.
  public int countIn(CharSequence sequence) {
    int count = 0;
    for (int i = 0; i < sequence.length(); i++) {
      if (matches(sequence.charAt(i))) {
        count++;
      }
    }
    return count;
  }

  
Returns a string containing all non-matching characters of a character sequence, in order. For example:
   CharMatcher.is('a').removeFrom("bazaar")
... returns "bzr".
  @CheckReturnValue
  public String removeFrom(CharSequence sequence) {
    String string = sequence.toString();
    int pos = indexIn(string);
    if (pos == -1) {
      return string;
    }
    char[] chars = string.toCharArray();
    int spread = 1;
    // This unusual loop comes from extensive benchmarking
    OUT: while (true) {
      pos++;
      while (true) {
        if (pos == chars.length) {
          break OUT;
        }
        if (matches(chars[pos])) {
          break;
        }
        chars[pos - spread] = chars[pos];
        pos++;
      }
      spread++;
    }
    return new String(chars, 0, pos - spread);
  }

  
Returns a string containing all matching characters of a character sequence, in order. For example:
   CharMatcher.is('a').retainFrom("bazaar")
... returns "aaa".
  @CheckReturnValue
  public String retainFrom(CharSequence sequence) {
    return negate().removeFrom(sequence);
  }

  
Returns a string copy of the input character sequence, with each character that matches this matcher replaced by a given replacement character. For example:
   CharMatcher.is('a').replaceFrom("radar", 'o')
... returns "rodor".

The default implementation uses indexIn(CharSequence) to find the first matching character, then iterates the remainder of the sequence calling matches(char) for each character.

Parameters:
sequence the character sequence to replace matching characters in
replacement the character to append to the result string in place of each matching character in sequence
Returns:
the new string
  @CheckReturnValue
  public String replaceFrom(CharSequence sequencechar replacement) {
    String string = sequence.toString();
    int pos = indexIn(string);
    if (pos == -1) {
      return string;
    }
    char[] chars = string.toCharArray();
    chars[pos] = replacement;
    for (int i = pos + 1; i < chars.lengthi++) {
      if (matches(chars[i])) {
        chars[i] = replacement;
      }
    }
    return new String(chars);
  }

  
Returns a string copy of the input character sequence, with each character that matches this matcher replaced by a given replacement sequence. For example:
   CharMatcher.is('a').replaceFrom("yaha", "oo")
... returns "yoohoo".

Note: If the replacement is a fixed string with only one character, you are better off calling replaceFrom(CharSequence, char) directly.

Parameters:
sequence the character sequence to replace matching characters in
replacement the characters to append to the result string in place of each matching character in sequence
Returns:
the new string
  @CheckReturnValue
  public String replaceFrom(CharSequence sequenceCharSequence replacement) {
    int replacementLen = replacement.length();
    if (replacementLen == 0) {
      return removeFrom(sequence);
    }
    if (replacementLen == 1) {
      return replaceFrom(sequencereplacement.charAt(0));
    }
    String string = sequence.toString();
    int pos = indexIn(string);
    if (pos == -1) {
      return string;
    }
    int len = string.length();
    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
    int oldpos = 0;
    do {
      buf.append(stringoldpospos);
      buf.append(replacement);
      oldpos = pos + 1;
      pos = indexIn(stringoldpos);
    } while (pos != -1);
    buf.append(stringoldposlen);
    return buf.toString();
  }

  
Returns a substring of the input character sequence that omits all characters this matcher matches from the beginning and from the end of the string. For example:
   CharMatcher.anyOf("ab").trimFrom("abacatbab")
... returns "cat".

Note that:

   CharMatcher.inRange('\0', ' ').trimFrom(str)
... is equivalent to String.trim().
  @CheckReturnValue
  public String trimFrom(CharSequence sequence) {
    int len = sequence.length();
    int first;
    int last;
    for (first = 0; first < lenfirst++) {
      if (!matches(sequence.charAt(first))) {
        break;
      }
    }
    for (last = len - 1; last > firstlast--) {
      if (!matches(sequence.charAt(last))) {
        break;
      }
    }
    return sequence.subSequence(firstlast + 1).toString();
  }

  
Returns a substring of the input character sequence that omits all characters this matcher matches from the beginning of the string. For example:
 CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")
... returns "catbab".
  @CheckReturnValue
  public String trimLeadingFrom(CharSequence sequence) {
    int len = sequence.length();
    for (int first = 0; first < lenfirst++) {
      if (!matches(sequence.charAt(first))) {
        return sequence.subSequence(firstlen).toString();
      }
    }
    return "";
  }

  
Returns a substring of the input character sequence that omits all characters this matcher matches from the end of the string. For example:
 CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")
... returns "abacat".
  @CheckReturnValue
  public String trimTrailingFrom(CharSequence sequence) {
    int len = sequence.length();
    for (int last = len - 1; last >= 0; last--) {
      if (!matches(sequence.charAt(last))) {
        return sequence.subSequence(0, last + 1).toString();
      }
    }
    return "";
  }

  
Returns a string copy of the input character sequence, with each group of consecutive characters that match this matcher replaced by a single replacement character. For example:
   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')
... returns "b-p-r".

The default implementation uses indexIn(CharSequence) to find the first matching character, then iterates the remainder of the sequence calling matches(char) for each character.

Parameters:
sequence the character sequence to replace matching groups of characters in
replacement the character to append to the result string in place of each group of matching characters in sequence
Returns:
the new string
  @CheckReturnValue
  public String collapseFrom(CharSequence sequencechar replacement) {
    // This implementation avoids unnecessary allocation.
    int len = sequence.length();
    for (int i = 0; i < leni++) {
      char c = sequence.charAt(i);
      if (matches(c)) {
        if (c == replacement
            && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
          // a no-op replacement
          i++;
        } else {
          StringBuilder builder = new StringBuilder(len)
              .append(sequence.subSequence(0, i))
              .append(replacement);
          return finishCollapseFrom(sequencei + 1, lenreplacementbuildertrue);
        }
      }
    }
    // no replacement needed
    return sequence.toString();
  }

  
Collapses groups of matching characters exactly as collapseFrom does, except that groups of matching characters at the start or end of the sequence are removed without replacement.
  @CheckReturnValue
  public String trimAndCollapseFrom(CharSequence sequencechar replacement) {
    // This implementation avoids unnecessary allocation.
    int len = sequence.length();
    int first;
    int last;
    for (first = 0; first < len && matches(sequence.charAt(first)); first++) {}
    for (last = len - 1; last > first && matches(sequence.charAt(last)); last--) {}
    return (first == 0 && last == len - 1)
        ? collapseFrom(sequencereplacement)
        : finishCollapseFrom(
              sequencefirstlast + 1, replacement,
              new StringBuilder(last + 1 - first),
              false);
  }
      CharSequence sequenceint startint endchar replacement,
      StringBuilder builderboolean inMatchingGroup) {
    for (int i = starti < endi++) {
      char c = sequence.charAt(i);
      if (matches(c)) {
        if (!inMatchingGroup) {
          builder.append(replacement);
          inMatchingGroup = true;
        }
      } else {
        builder.append(c);
        inMatchingGroup = false;
      }
    }
    return builder.toString();
  }
  // Predicate interface

  
Equivalent to matches; provided only to satisfy the Predicate interface. When using a reference of type CharMatcher, invoke matches directly instead.
  @Override public boolean apply(Character character) {
    return matches(character);
  }

  
Returns a string representation of this CharMatcher, such as CharMatcher.or(WHITESPACE, JAVA_DIGIT).
  public String toString() {
    return ;
  }

  
A special-case CharMatcher for Unicode whitespace characters that is extremely efficient both in space required and in time to check for matches. Implementation details. It turns out that all current (early 2012) Unicode characters are unique modulo 79: so we can construct a lookup table of exactly 79 entries, and just check the character code mod 79, and see if that character is in the table. There is a 1 at the beginning of the table so that the null character is not listed as whitespace. Other things we tried that did not prove to be beneficial, mostly due to speed concerns: * Binary search into the sorted list of characters, i.e., what CharMatcher.anyOf() does * Perfect hash function into a table of size 26 (using an offset table and a special Jenkins hash function) * Perfect-ish hash function that required two lookups into a single table of size 26. * Using a power-of-2 sized hash table (size 64) with linear probing. --Christopher Swenson, February 2012.
  private static final String WHITESPACE_TABLE = "\u0001\u0000\u00a0\u0000\u0000\u0000\u0000\u0000"
      + "\u0000\u0009\n\u000b\u000c\r\u0000\u0000\u2028\u2029\u0000\u0000\u0000\u0000\u0000\u202f"
      + "\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0020\u0000\u0000\u0000\u0000\u0000"
      + "\u0000\u0000\u0000\u0000\u0000\u3000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000"
      + "\u0000\u0000\u0085\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a"
      + "\u0000\u0000\u0000\u0000\u0000\u205f\u1680\u0000\u0000\u180e\u0000\u0000\u0000";

  
Determines whether a character is whitespace according to the latest Unicode standard, as illustrated here. This is not the same definition used by other Java APIs. (See a comparison of several definitions of "whitespace".)

Note: as the Unicode definition evolves, we will modify this constant to keep it up to date.

  public static final CharMatcher WHITESPACE = new FastMatcher("CharMatcher.WHITESPACE") {
    @Override public boolean matches(char c) {
      return .charAt(c % 79) == c;
    }
  };