Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
   /*
    * Copyright (c) 2010 Mozilla Foundation
    * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla 
    * Foundation, and Opera Software ASA.
    *
    * Permission is hereby granted, free of charge, to any person obtaining a 
    * copy of this software and associated documentation files (the "Software"), 
    * to deal in the Software without restriction, including without limitation 
    * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
   * and/or sell copies of the Software, and to permit persons to whom the 
   * Software is furnished to do so, subject to the following conditions:
   *
   * The above copyright notice and this permission notice shall be included in 
   * all copies or substantial portions of the Software.
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
   * DEALINGS IN THE SOFTWARE.
   */
  
  package org.whattf.checker;
  
  
  
  
  
  public class XmlPiChecker extends Checker implements LexicalHandler {
  
      private static final char[][] NAMES = { "amp;".toCharArray(),
              "lt;".toCharArray(), "gt;".toCharArray(), "quot;".toCharArray(),
              "apos;".toCharArray(), };
  
      private static final char[][] VALUES = { { '\u0026' }, { '\u003c' },
              { '\u003e' }, { '\u0022' }, { '\'' }, };
  
      private static final int DATA_AND_RCDATA_MASK = ~1;
  
      private static final int BEFORE_ATTRIBUTE_NAME = 0;
  
      private static final int ATTRIBUTE_NAME = 1;
  
      private static final int AFTER_ATTRIBUTE_NAME = 2;
  
      private static final int BEFORE_ATTRIBUTE_VALUE = 3;
  
      private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 4;
  
      private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 5;
  
      private static final int ATTRIBUTE_VALUE_UNQUOTED = 6;
  
      private static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 7;
  
      private static final int CONSUME_CHARACTER_REFERENCE = 8;
  
      private static final int CONSUME_NCR = 9;
  
      private static final int CHARACTER_REFERENCE_LOOP = 10;
  
      private static final int HEX_NCR_LOOP = 11;
  
      private static final int DECIMAL_NRC_LOOP = 12;
  
      private static final int HANDLE_NCR_VALUE = 13;
  
      private static final int BUFFER_GROW_BY = 1024;
  
      private static final char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
  
      private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
  
      private char[] strBuf = new char[64];
  
      private int strBufLen;
  
      private char[] longStrBuf = new char[1024];
  
      private int longStrBufLen;
  
      private final char[] bmpChar = new char[1];
  
      private final char[] astralChar = new char[2];
  
     private int entCol;
 
     private int lo;
 
     private int hi;
 
     private int candidate;
 
     private int strBufMark;
 
     private int prevValue;
 
     private int value;
 
     private boolean seenDigits;
 
     private char additional;
 
     private boolean alreadyWarnedAboutPrivateUseCharacters;
 
     private AttributesImpl attributes;
 
     private String attributeName;
 
     private boolean inDoctype;
 
     private boolean alreadyHasElement;
 
     private String piTarget = null;
 
     private boolean hasXsltPi;
 
     private enum PseudoAttrName {
         HREF, TYPE, TITLE, MEDIA, CHARSET, ALTERNATE, INVALID;
         private static PseudoAttrName toCaps(String str) {
             try {
                 if (!str.toLowerCase().equals(str)) {
                     return ;
                 }
                 return valueOf(newAsciiUpperCaseStringFromString(str));
             } catch (Exception ex) {
                 return ;
             }
         }
     }
 
     public XmlPiChecker() {
         super();
          = false;
          = false;
          = false;
     }
 
     public void startDTD(String nameString publicIdString systemId)
             throws SAXException {
          = true;
     }
 
     public void endDTD() throws SAXException {
          = false;
     }
 
     public void startEntity(String namethrows SAXException {
     }
 
     public void endEntity(String namethrows SAXException {
     }
 
     public void startCDATA() throws SAXException {
     }
 
     public void endCDATA() throws SAXException {
     }
 
     public void comment(char[] chint startint lenthrows SAXException {
     }
 
     @Override public void startDocument() throws SAXException {
          = false;
          = false;
          = false;
     }
 
     @Override public void startElement(String uriString localName,
             String qNameAttributes attsthrows SAXException {
          = true;
     }
 
     @Override public void processingInstruction(String targetString data)
             throws SAXException {
          = target;
         if ("xml-stylesheet".equals()) {
             checkXmlStylesheetPiData(data);
         }
     }
 
     private void errBadPseudoAttrDatatype(DatatypeException e,
             Class<?> datatypeClassString attrNameString attrValue)
             throws SAXExceptionClassNotFoundException {
         if (getErrorHandler() != null) {
             Html5DatatypeException ex5 = (Html5DatatypeExceptione;
             boolean warning = ex5.isWarning() ? true : false;
             DatatypeMismatchException bpe = new DatatypeMismatchException(
                     "Bad value \u201c" + attrValue + "\u201d for \u201c"
                             +  + "\u201d pseudo-attribute \u201c"
                             + attrName + "\u201d. "
                             + e.getMessage(),
                     getDocumentLocator(), datatypeClasswarning);
             getErrorHandler().error(bpe);
         }
     }
 
     private void errAttributeWithNoValue() throws SAXException {
         err("Found \u201c" +  + "\u201d pseudo-attribute \u201c"
                 + 
                 + "\u201d without a value. All pseudo-attributes in \u201c"
                 +  + "\u201d instructions must have values.");
     }
 
     private void errAttributeValueContainsLt() throws SAXException {
         err("Found \u201c"
                 + 
                 + "\u201d pseudo-attribute \u201c"
                 + 
                 + "\u201d with the character \u201c<\u201d in its value. All pseudo-attribute values in \u201c"
                 + 
                 + "\u201d instructions must not contain the character \u201c<\u201d.");
     }
 
     private void errUpperCaseXinHexNcr() throws SAXException {
         err("In XML documents, a hexadecimal character reference must begin with "
                 + "\u201c&#x\u201d (lowercase \u201cx\u201d), not \u201c&#X\u201d (uppercase \u201cX\u201d).");
     }
 
     private void checkXmlStylesheetPiData(String datathrows SAXException {
         boolean hasHref = false;
         boolean hasTitle = false;
         boolean hasMedia = false;
         boolean hasCharset = false;
         boolean hasAlternate = false;
         boolean hasNonEmptyTitle = false;
         boolean alternateIsYes = false;
         boolean badDatatype = false;
         if () {
             warn("An \u201cxml-stylesheet\u201d instruction should not be used within a \u201cDOCTYPE\u201d declaration.");
         }
         if () {
             err("Any \u201cxml-stylesheet\u201d instruction in a document must occur before any elements in the document. "
                     + "Suppressing any further errors for this \u201cxml-stylesheet\u201d instruction.");
             return;
         }
         if (!"".equals(data)) {
             Html5DatatypeLibrary dl = new Html5DatatypeLibrary();
             AttributesImpl patts = getPseudoAttributesFromPiData(data);
             String attrName;
             String attrValue;
             for (int i = 0; i < patts.getLength(); i++) {
                 attrName = patts.getQName(i);
                 attrValue = patts.getValue(i);
                 switch (PseudoAttrName.toCaps(attrName)) {
                     case :
                         hasHref = true;
                         if (attrValue == null) {
                             break;
                         }
                         try {
                             IriRef ir = (IriRefdl.createDatatype("iri-ref");
                             ir.checkValid(attrValue);
                         } catch (DatatypeException e) {
                             try {
                                 errBadPseudoAttrDatatype(eIriRef.class,
                                         "href"attrValue);
                             } catch (ClassNotFoundException ce) {
                             }
                         }
                         break;
                     case :
                         if (attrValue == null) {
                             break;
                         }
                         try {
                             MimeType mt = (MimeTypedl.createDatatype("mime-type");
                             mt.checkValid(attrValue);
                             attrValue = newAsciiLowerCaseStringFromString(attrValue);
                         } catch (DatatypeException e) {
                             badDatatype = true;
                             try {
                                 errBadPseudoAttrDatatype(eMimeType.class,
                                         "type"attrValue);
                             } catch (ClassNotFoundException ce) {
                             }
                         }
                         if (!badDatatype) {
                             if (attrValue.matches("application/xml(;.*)?")
                                     || attrValue.matches("text/xml(;.*)?")
                                     || attrValue.matches("application/xslt+xml(;.*)?")
                                     || attrValue.matches("text/xsl(;.*)?")
                                     || attrValue.matches("text/xslt(;.*)?")) {
                                 if (!attrValue.matches("text/xsl(;.*)?")) {
                                     warn("For indicating XSLT, \u201ctext/xsl\u201d is the only MIME type for the "
                                             + "\u201cxml-stylesheet\u201d pseudo-attribute \u201ctype\u201d that is supported across browsers.");
                                 }
                                 if () {
                                     warn("Browsers do not support multiple \u201cxml-stylesheet\u201d instructions with a "
                                             + "\u201ctype\u201d value that indicates XSLT.");
                                 }
                                  = true;
                             } else if (!attrValue.matches("^text/css(;.*)?$")) {
                                 warn("\u201ctext/css\u201d and \u201ctext/xsl\u201d are the only MIME types for the "
                                         + "\u201cxml-stylesheet\u201d pseudo-attribute \u201ctype\u201d that are supported across browsers.");
                             }
                         }
                         break;
                     case :
                         hasTitle = true;
                         if (attrValue == null) {
                             break;
                         }
                         if (!"".equals(attrValue)) {
                             hasNonEmptyTitle = true;
                         }
                         break;
                     case :
                         hasMedia = true;
                         if (attrValue == null) {
                             break;
                         }
                         try {
                             MediaQuery mq = (MediaQuerydl.createDatatype("media-query");
                             mq.checkValid(attrValue);
                         } catch (DatatypeException e) {
                             try {
                                 errBadPseudoAttrDatatype(eMediaQuery.class,
                                         "media"attrValue);
                             } catch (ClassNotFoundException ce) {
                             }
                         }
                         break;
                     case :
                         hasCharset = true;
                         if (attrValue == null) {
                             break;
                         }
                         try {
                             Charset c = (Charsetdl.createDatatype("charset");
                             c.checkValid(attrValue);
                         } catch (DatatypeException e) {
                             try {
                                 errBadPseudoAttrDatatype(eCharset.class,
                                         "charset"attrValue);
                             } catch (ClassNotFoundException ce) {
                             }
                         }
                         break;
                     case :
                         hasAlternate = true;
                         if (attrValue == null) {
                             break;
                         }
                         if ("yes".equals(attrValue)) {
                             alternateIsYes = true;
                         } else if (!"no".equals(attrValue)) {
                             err("The value of the \u201cxml-stylesheet\u201d pseudo-attribute \u201calternate\u201d "
                                     + "must be either \u201cyes\u201d or \u201cno\u201d.");
                         }
                         break;
                     default:
                         err("Pseudo-attribute \u201c"
                                 + attrName
                                 + "\u201D not allowed in \u201cxml-stylesheet\u201d instruction.");
                         break;
                 }
             }
             if (alternateIsYes && !hasNonEmptyTitle) {
                 err("An \u201cxml-stylesheet\u201d instruction with an \u201calternate\u201d pseudo-attribute "
                         + "whose value is \u201cyes\u201d must also have a \u201ctitle\u201d pseudo-attribute with a non-empty value.");
             }
         }
         if (!hasHref) {
             err("\u201cxml-stylesheet\u201d instruction lacks \u201chref\u201d pseudo-attribute. "
                     + "The \u201chref\u201d pseudo-attribute is required in all \u201cxml-stylesheet\u201d instructions.");
         }
         if ( && (hasTitle || hasMedia || hasCharset || hasAlternate)) {
             warn("When processing \u201cxml-stylesheet\u201d instructions, browsers ignore the pseudo-attributes "
                     + "\u201ctitle\u201d, \u201cmedia\u201d, \u201ccharset\u201d, and \u201calternate\u201d.");
         } else if (hasCharset) {
             warn("Some browsers ignore the value of the \u201cxml-stylesheet\u201d pseudo-attribute \u201ccharset\u201d.");
         }
     }

    
Collect a set of attribues and values from the data part of a PI.

The bulk of this method and associated methods that follow it here are copied from the nu.validator.htmlparser.impl.Tokenizer class, with appropriate modifications.

See also:
nu.validator.htmlparser.impl.Tokenizer
nu.validator.htmlparser.impl.ErrorReportingTokenizer
 
             throws SAXException {
 
         int state = ;
         int returnState = ;
         char c = '\u0000';
         int pos = -1;
         int endPos = buf.length();
         boolean reconsume = false;
          = null;
          = null;
         stateloop: for (;;) {
             switch (state) {
                 case :
                     beforeattributenameloop: for (;;) {
                         if (reconsume) {
                             reconsume = false;
                         } else {
                             if (++pos == endPos) {
                                 break stateloop;
                             }
                             c = buf.charAt(pos);
                         }
                         /*
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '\n':
                             case ' ':
                             case '\t':
                                 continue;
                             case '/':
                             case '>':
                             case '\"':
                             case '\'':
                             case '<':
                             case '=':
                                 /*
                                  * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                  * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
                                  * SIGN (=) Parse error.
                                  */
                                 errBadCharBeforeAttributeNameOrNull(c);
                                 /*
                                  * Treat it as per the "anything else" entry
                                  * below.
                                  */
                             default:
                                 /*
                                  * Anything else Start a new attribute in the
                                  * current tag token.
                                  */
                                 /*
                                  * Set that attribute's name to the current
                                  * input character,
                                  */
                                 clearStrBufAndAppendCurrentC(c);
                                 /*
                                  * and its value to the empty string.
                                  */
                                 // Will do later.
                                 /*
                                  * Switch to the attribute name state.
                                  */
                                 state = ;
                                 break beforeattributenameloop;
                             // continue stateloop;
                         }
                     }
                     // FALLTHRU DON'T REORDER
                 case :
                     attributenameloop: for (;;) {
                         if (++pos == endPos) {
                             attributeNameComplete();
                             addAttributeWithoutValue();
                             break stateloop;
                         }
                         c = buf.charAt(pos);
                         /*
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '\n':
                             case ' ':
                             case '\t':
                                 attributeNameComplete();
                                 state = ;
                                 continue stateloop;
                             case '=':
                                 /*
                                  * U+003D EQUALS SIGN (=) Switch to the before
                                  * attribute value state.
                                  */
                                 attributeNameComplete();
                                 state = ;
                                 break attributenameloop;
                             // continue stateloop;
                             case '\"':
                             case '\'':
                             case '<':
                                 /*
                                  * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                  * (') U+003C LESS-THAN SIGN (<) Parse error.
                                  */
                                 errQuoteOrLtInAttributeNameOrNull(c);
                                 /*
                                  * Treat it as per the "anything else" entry
                                  * below.
                                  */
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current attribute's name.
                                  */
                                 appendStrBuf(c);
                                 /*
                                  * Stay in the attribute name state.
                                  */
                                 continue;
                         }
                     }
                     // FALLTHRU DON'T REORDER
                 case :
                     beforeattributevalueloop: for (;;) {
                         if (++pos == endPos) {
                             addAttributeWithoutValue();
                             break stateloop;
                         }
                         c = buf.charAt(pos);
                         /*
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '\n':
                             case ' ':
                             case '\t':
                                 continue;
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Switch to the
                                  * attribute value (double-quoted) state.
                                  */
                                 clearLongStrBufForNextState();
                                 state = ;
                                 break beforeattributevalueloop;
                             // continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the attribute
                                  * value (unquoted) state and reconsume this
                                  * input character.
                                  */
                                 clearLongStrBuf();
                                 state = ;
                                 reconsume = true;
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Switch to the attribute
                                  * value (single-quoted) state.
                                  */
                                 clearLongStrBufForNextState();
                                 state = ;
                                 continue stateloop;
                             case '<':
                             case '=':
                             case '`':
                                 /*
                                  * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
                                  * (=) U+0060 GRAVE ACCENT (`)
                                  */
                                 errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
                                 /*
                                  * Treat it as per the "anything else" entry
                                  * below.
                                  */
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current attribute's value.
                                  */
                                 clearLongStrBufAndAppendCurrentC(c);
                                 /*
                                  * Switch to the attribute value (unquoted)
                                  * state.
                                  */
 
                                 state = ;
                                 continue stateloop;
                         }
                     }
                     // FALLTHRU DON'T REORDER
                 case :
                     attributevaluedoublequotedloop: for (;;) {
                         if (reconsume) {
                             reconsume = false;
                         } else {
                             if (++pos == endPos) {
                                 addAttributeWithoutValue();
                                 break stateloop;
                             }
                             c = buf.charAt(pos);
                         }
                         /*
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Switch to the after
                                  * attribute value (quoted) state.
                                  */
                                 addAttributeWithValue();
 
                                 state = ;
                                 break attributevaluedoublequotedloop;
                             // continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in attribute value state, with the
                                  * additional allowed character being U+0022
                                  * QUOTATION MARK (").
                                  */
                                 clearStrBufAndAppendCurrentC(c);
                                 returnState = state;
                                 state = ;
                                 continue stateloop;
                             case '\n':
                                 appendLongStrBufLineFeed();
                                 continue;
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current attribute's value.
                                  */
                                 appendLongStrBuf(c);
                                 /*
                                  * Stay in the attribute value (double-quoted)
                                  * state.
                                  */
                                 continue;
                         }
                     }
                     // FALLTHRU DON'T REORDER
                 case :
                     for (;;) {
                         if (++pos == endPos) {
                             break stateloop;
                         }
                         c = buf.charAt(pos);
                         /*
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '\n':
                             case ' ':
                             case '\t':
                                 state = ;
                                 continue stateloop;
                             default:
                                 /*
                                  * Anything else Parse error.
                                  */
                                 errNoSpaceBetweenAttributes();
                                 /*
                                  * Reconsume the character in the before
                                  * attribute name state.
                                  */
                                 state = ;
                                 reconsume = true;
                                 continue stateloop;
                         }
                     }
                     // FALLTHRU DON'T REORDER
                 case :
                     errUnquotedAttributeValOrNull();
                     for (;;) {
                         if (reconsume) {
                             reconsume = false;
                         } else {
                             if (++pos == endPos) {
                                 addAttributeWithValue();
                                 break stateloop;
                             }
                             c = buf.charAt(pos);
                         }
                         /*
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '\n':
                             case ' ':
                             case '\t':
                                 addAttributeWithValue();
                                 state = ;
                                 continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in attribute value state, with the
                                  * additional allowed character being U+003E
                                  * GREATER-THAN SIGN (>)
                                  */
                                 clearStrBufAndAppendCurrentC(c);
                                 returnState = state;
                                 state = ;
                                 continue stateloop;
                             case '>':
                                 /*
                                  * U+003E GREATER-THAN SIGN (>) Emit the current
                                  * tag token.
                                  */
                                 // addAttributeWithValue();
                                 // state = emitCurrentTagToken(false, pos);
                                 // if (shouldSuspend) {
                                 // break stateloop;
                                 // }
                                 /*
                                  * Switch to the data state.
                                  */
                                 continue stateloop;
                             case '<':
                             case '\"':
                             case '\'':
                             case '=':
                             case '`':
                                 /*
                                  * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                  * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
                                  * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
                                  */
                                 // errUnquotedAttributeValOrNull(c);
                                 /*
                                  * Treat it as per the "anything else" entry
                                  * below.
                                  */
                                 // fall through
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current attribute's value.
                                  */
                                 appendLongStrBuf(c);
                                 /*
                                  * Stay in the attribute value (unquoted) state.
                                  */
                                 continue;
                         }
                     }
                     // XXX reorder point
                 case :
                     for (;;) {
                         if (++pos == endPos) {
                             addAttributeWithoutValue();
                             break stateloop;
                         }
                         c = buf.charAt(pos);
                         /*
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '\n':
                             case ' ':
                             case '\t':
                                 continue;
                             case '=':
                                 /*
                                  * U+003D EQUALS SIGN (=) Switch to the before
                                  * attribute value state.
                                  */
                                 state = ;
                                 continue stateloop;
                             case '\"':
                             case '\'':
                             case '<':
                                 errQuoteOrLtInAttributeNameOrNull(c);
                                 /*
                                  * Treat it as per the "anything else" entry
                                  * below.
                                  */
                             default:
                                 addAttributeWithoutValue();
                                 /*
                                  * Anything else Start a new attribute in the
                                  * current tag token.
                                  */
                                 /*
                                  * Set that attribute's name to the current
                                  * input character,
                                  */
                                 clearStrBufAndAppendCurrentC(c);
                                 /*
                                  * and its value to the empty string.
                                  */
                                 // Will do later.
                                 /*
                                  * Switch to the attribute name state.
                                  */
                                 state = ;
                                 continue stateloop;
                         }
                     }
                     // XXX reorder point
                 case :
                     attributevaluesinglequotedloop: for (;;) {
                         if (reconsume) {
                             reconsume = false;
                         } else {
                             if (++pos == endPos) {
                                 addAttributeWithoutValue();
                                 break stateloop;
                             }
                             c = buf.charAt(pos);
                         }
                         /*
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Switch to the after
                                  * attribute value (quoted) state.
                                  */
                                 addAttributeWithValue();
                                 state = ;
                                 continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in attribute value state, with the
                                  * + additional allowed character being U+0027
                                  * APOSTROPHE (').
                                  */
                                 clearStrBufAndAppendCurrentC(c);
                                 returnState = state;
                                 state = ;
                                 break attributevaluesinglequotedloop;
                             // continue stateloop;
                             case '\n':
                                 appendLongStrBufLineFeed();
                                 continue;
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current attribute's value.
                                  */
                                 appendLongStrBuf(c);
                                 /*
                                  * Stay in the attribute value (double-quoted)
                                  * state.
                                  */
                                 continue;
                         }
                     }
                     // FALLTHRU DON'T REORDER
                 case :
                     if (++pos == endPos) {
                         break stateloop;
                     }
                     c = buf.charAt(pos);
                     /*
                      * Unlike the definition is the spec, this state does not
                      * return a value and never requires the caller to
                      * backtrack. This state takes care of emitting characters
                      * or appending to the current attribute value. It also
                      * takes care of that in the case when consuming the
                      * character reference fails.
                      */
                     /*
                      * This section defines how to consume a character
                      * reference. This definition is used when parsing character
                      * references in text and in attributes.
                      * 
                      * The behavior depends on the identity of the next
                      * character (the one immediately after the U+0026 AMPERSAND
                      * character):
                      */
                     switch (c) {
                         case '#':
                             /*
                              * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
                              * SIGN.
                              */
                             appendStrBuf('#');
                             state = ;
                             continue stateloop;
                         default:
                             if (c == ) {
                                 emitOrAppendStrBuf(returnState);
                                 state = returnState;
                                 reconsume = true;
                                 continue stateloop;
                             }
                              = -1;
                              = 0;
                              = (. - 1);
                              = -1;
                              = 0;
                             state = ;
                             reconsume = true;
                             // FALL THROUGH continue stateloop;
                     }
                     // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
                 case :
                     outer: for (;;) {
                         if (reconsume) {
                             reconsume = false;
                         } else {
                             if (++pos == endPos) {
                                 break stateloop;
                             }
                             c = buf.charAt(pos);
                         }
                         ++;
                         /*
                          * Consume the maximum number of characters possible,
                          * with the consumed characters matching one of the
                          * identifiers in the first column of the named
                          * character references table (in a case-sensitive
                          * manner).
                          */
                         hiloop: for (;;) {
                             if ( == -1) {
                                 break hiloop;
                             }
                             if ( == [].length) {
                                 break hiloop;
                             }
                             if ( > [].length) {
                                 break outer;
                             } else if (c < [][]) {
                                 --;
                             } else {
                                 break hiloop;
                             }
                         }
 
                         loloop: for (;;) {
                             if ( < ) {
                                 break outer;
                             }
                             if ( == [].length) {
                                  = ;
                                  = ;
                                 ++;
                             } else if ( > [].length) {
                                 break outer;
                             } else if (c > [][]) {
                                 ++;
                             } else {
                                 break loloop;
                             }
                         }
                         if ( < ) {
                             break outer;
                         }
                         appendStrBuf(c);
                         continue;
                     }
 
                     if ( == -1) {
                         /*
                          * If no match can be made, then this is a parse error.
                          */
                         errNoNamedCharacterMatch();
                         emitOrAppendStrBuf(returnState);
                         state = returnState;
                         reconsume = true;
                         continue stateloop;
                     } else {
                         char[] candidateArr = [];
                         if (candidateArr[candidateArr.length - 1] != ';') {
                             /*
                              * If the last character matched is not a U+003B
                              * SEMICOLON (;), there is a parse error.
                              */
                             if ((returnState & ) != 0) {
                                 /*
                                  * If the entity is being consumed as part of an
                                  * attribute, and the last character matched is
                                  * not a U+003B SEMICOLON (;),
                                  */
                                 char ch;
                                 if ( == ) {
                                     ch = c;
                                 } else {
                                     // if (strBufOffset != -1) {
                                     // ch = buf[strBufOffset + strBufMark];
                                     // } else {
                                     ch = [];
                                     // }
                                 }
                                 if ((ch >= '0' && ch <= '9')
                                         || (ch >= 'A' && ch <= 'Z')
                                         || (ch >= 'a' && ch <= 'z')) {
                                     /*
                                     * and the next character is in the range
                                     * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
                                     * U+0041 LATIN CAPITAL LETTER A to U+005A
                                     * LATIN CAPITAL LETTER Z, or U+0061 LATIN
                                     * SMALL LETTER A to U+007A LATIN SMALL
                                     * LETTER Z, then, for historical reasons,
                                     * all the characters that were matched
                                     * after the U+0026 AMPERSAND (&) must be
                                     * unconsumed, and nothing is returned.
                                     */
                                    errNoNamedCharacterMatch();
                                    appendStrBufToLongStrBuf();
                                    state = returnState;
                                    reconsume = true;
                                    continue stateloop;
                                }
                            }
                            if ((returnState & ) != 0) {
                                errUnescapedAmpersandInterpretedAsCharacterReference();
                            }
                        }
                        /*
                         * Otherwise, return a character token for the character
                         * corresponding to the entity name (as given by the
                         * second column of the named character references
                         * table).
                         */
                        char[] val = [];
                        emitOrAppend(valreturnState);
                        // this is so complicated!
                        if ( < ) {
                            // if (strBufOffset != -1) {
                            // if ((returnState & (~1)) != 0) {
                            // for (int i = strBufMark; i < strBufLen; i++) {
                            // appendLongStrBuf(buf[strBufOffset + i]);
                            // }
                            // } else {
                            // tokenHandler.characters(buf, strBufOffset
                            // + strBufMark, strBufLen
                            // - strBufMark);
                            // }
                            // } else {
                            if ((returnState & ) != 0) {
                                for (int i = i < i++) {
                                    appendLongStrBuf([i]);
                                }
                            }
                            // }
                        }
                        state = returnState;
                        reconsume = true;
                        continue stateloop;
                        /*
                         * If the markup contains I'm &notit; I tell you, the
                         * entity is parsed as "not", as in, I'm ¬it; I tell
                         * you. But if the markup was I'm &notin; I tell you,
                         * the entity would be parsed as "notin;", resulting in
                         * I'm ∉ I tell you.
                         */
                    }
                    // XXX reorder point
                case :
                    if (++pos == endPos) {
                        break stateloop;
                    }
                    c = buf.charAt(pos);
                     = -1;
                     = 0;
                     = false;
                    /*
                     * The behavior further depends on the character after the
                     * U+0023 NUMBER SIGN:
                     */
                    switch (c) {
                        case 'x':
                            /*
                             * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
                             * LETTER X Consume the X.
                             * 
                             * Follow the steps below, but using the range of
                             * characters U+0030 DIGIT ZERO through to U+0039
                             * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
                             * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
                             * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
                             * LETTER F (in other words, 0-9, A-F, a-f).
                             * 
                             * When it comes to interpreting the number,
                             * interpret it as a hexadecimal number.
                             */
                            appendStrBuf(c);
                            state = ;
                            continue stateloop;
                        case 'X':
                            /*
                             * XML requires a lowercase 'x' for hex character
                             * refs
                             */
                            errUpperCaseXinHexNcr();
                            appendStrBuf(c);
                            state = ;
                            continue stateloop;
                        default:
                            /*
                             * Anything else Follow the steps below, but using
                             * the range of characters U+0030 DIGIT ZERO through
                             * to U+0039 DIGIT NINE (i.e. just 0-9).
                             * 
                             * When it comes to interpreting the number,
                             * interpret it as a decimal number.
                             */
                            state = ;
                            reconsume = true;
                            // FALL THROUGH continue stateloop;
                    }
                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
                case :
                    decimalloop: for (;;) {
                        if (reconsume) {
                            reconsume = false;
                        } else {
                            if (++pos == endPos) {
                                break stateloop;
                            }
                            c = buf.charAt(pos);
                        }
                        // Deal with overflow gracefully
                        if ( < ) {