Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
   /* XmlParser.java -- 
    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
    Portions Copyright 2006-2007 Henri Sivonen
    Portions Copyright 2007-2008 Mozilla Foundation
   
    This file is part of GNU JAXP.
   
    GNU JAXP is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.
  
   GNU JAXP is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.
  
   You should have received a copy of the GNU General Public License
   along with GNU JAXP; see the file COPYING.  If not, write to the
   Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.
  
   Linking this library statically or dynamically with other modules is
   making a combined work based on this library.  Thus, the terms and
   conditions of the GNU General Public License cover the whole
   combination.
  
   As a special exception, the copyright holders of this library give you
   permission to link this library with independent modules to produce an
   executable, regardless of the license terms of these independent
   modules, and to copy and distribute the resulting executable under
   terms of your choice, provided that you also meet, for each linked
   independent module, the terms and conditions of the license of that
   module.  An independent module is a module which is not derived from
   or based on this library.  If you modify this library, you may extend
   this exception to your version of the library, but you are not
   obligated to do so.  If you do not wish to do so, delete this
   exception statement from your version.
  
   Partly derived from code which carried the following notice:
  
   Copyright (c) 1997, 1998 by Microstar Software Ltd.
  
   AElfred is free for both commercial and non-commercial use and
   redistribution, provided that Microstar's copyright and disclaimer are
   retained intact.  You are free to modify AElfred for your own use and
   to redistribute AElfred with your modifications, provided that the
   modifications are clearly documented.
  
   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   merchantability or fitness for a particular purpose.  Please use it AT
   YOUR OWN RISK.
   */
  
  package nu.validator.gnu.xml.aelfred2;
  
  import java.io.Reader;
  import java.util.HashMap;
  
  
  
  // Organized imports -- 2005-08-20 hsivonen
  // Removed unused imports -- 2015-03-03 rwhogg
  
Parse XML documents and return parse events through call-backs. Use the SAXDriver class as your entry point, as all internal parser interfaces are subject to change.

Author(s):
Written by David Megginson <dmeggins@microstar.com> (version 1.2a with bugfixes)
Updated by David Brownell <dbrownell@users.sourceforge.net>
Modified by Henri Sivonen <hsivonen@iki.fi>
See also:
SAXDriver
  
  final class XmlParser {
  
      // //////////////////////////////////////////////////////////////////////
      // Constants.
      // //////////////////////////////////////////////////////////////////////
  
     private static final int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
 
     private static final char[] NEW_LINE_ARR = {'\n'};
     
     //
     // Constants for element content type.
     //
 
    
Constant: an element has not been declared.

 
     public final static int CONTENT_UNDECLARED = 0;

    
Constant: the element has a content model of ANY.

 
     public final static int CONTENT_ANY = 1;

    
Constant: the element has declared content of EMPTY.

 
     public final static int CONTENT_EMPTY = 2;

    
Constant: the element has mixed content.

 
     public final static int CONTENT_MIXED = 3;

    
Constant: the element has element content.

 
     public final static int CONTENT_ELEMENTS = 4;
 
     //
     // Constants for the entity type.
     //
 
    
Constant: the entity has not been declared.

 
     public final static int ENTITY_UNDECLARED = 0;

    
Constant: the entity is internal.

 
     public final static int ENTITY_INTERNAL = 1;

    
Constant: the entity is external, non-parsable data.

 
     public final static int ENTITY_NDATA = 2;

    
Constant: the entity is external XML data.

 
     public final static int ENTITY_TEXT = 3;
 
     //
     // Attribute type constants are interned literal strings.
     //
 
     //
     // Constants for attribute default value.
     //
 
    
Constant: the attribute is not declared.

 
     public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;

    
 
     public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;

    
Constant: the attribute was declared #IMPLIED.

 
     public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;

    
Constant: the attribute was declared #REQUIRED.

 
     public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;

    
 
     public final static int ATTRIBUTE_DEFAULT_FIXED = 34;
 
     //
     // Constants for input.
     //
     private final static int INPUT_NONE = 0;
 
     private final static int INPUT_INTERNAL = 1;
 
     private final static int INPUT_READER = 5;
 
     //
     // Flags for reading literals.
     //
     // expand general entity refs (attribute values in dtd and content)
     private final static int LIT_ENTITY_REF = 2;
 
     // normalize this value (space chars) (attributes, public ids)
     private final static int LIT_NORMALIZE = 4;
 
     // literal is an attribute value
     private final static int LIT_ATTRIBUTE = 8;
 
     // don't expand parameter entities
     private final static int LIT_DISABLE_PE = 16;
 
     // don't expand [or parse] character refs
     private final static int LIT_DISABLE_CREF = 32;
 
     // don't parse general entity refs
     private final static int LIT_DISABLE_EREF = 64;
 
     // literal is a public ID value
     private final static int LIT_PUBID = 256;
 
     // Emit warnings for relative URIs with no base URI.
     static boolean uriWarnings;
     static {
         String key = "gnu.xml.aelfred2.XmlParser.uriWarnings";
         try {
              = "true".equals(System.getProperty(key));
         } catch (SecurityException e) {
              = false;
         }
     }
 
     //
     // The current XML handler interface.
     //
     private SAXDriver handler;
 
     //
     // I/O information.
     //
     private Reader reader// current reader
 
     private InputStream is// current input stream
 
     private int line// current line number
 
     private int linePrev// the line of the previous character -- hsivonen
 
     // 2007-09-28
 
     private int column// current column number
 
     private int columnPrev// the column of the previous character -- hsivonen
 
     // 2007-09-28
 
     private boolean nextCharOnNewLine// indicates whether the next character
 
     // is on the next line -- hsivonen
     // 2007-09-28
 
     private int sourceType// type of input source
 
     private LinkedList<InputinputStack// stack of input soruces
 
     private String characterEncoding// current character encoding
 
     private int currentByteCount// bytes read from current source
 
     //
     // Buffers for decoded but unparsed character input.
     //
     private char[] readBuffer;
 
     private int readBufferPos;
 
     private int readBufferLength;
 
     private int readBufferOverflow// overflow from last data chunk.
 
     //
     // Buffer for undecoded raw byte input.
     //
 //    private final static int READ_BUFFER_MAX = 16384;
     private final static int READ_BUFFER_MAX = 60;
 
     private byte[] rawReadBuffer;
 
     //
     // Buffer for attribute values, char refs, DTD stuff.
     //
     private static int DATA_BUFFER_INITIAL = 4096;
 
     private char[] dataBuffer;
 
     private int dataBufferPos;
 
     //
     // Buffer for parsed names.
     //
     private static int NAME_BUFFER_INITIAL = 1024;
 
     private char[] nameBuffer;
 
     private int nameBufferPos;
 
     //
     // Save any standalone flag
     //
     private boolean docIsStandalone;
 
     //
     // Hashtables for DTD information on elements, entities, and notations.
     // Populated until we start ignoring decls (because of skipping a PE)
     //
     private HashMap<StringElementDeclelementInfo;
 
     private HashMap<StringEntityInfoentityInfo;
 
     private HashMap<StringStringnotationInfo;
 
     private boolean skippedPE;
 
     //
     // Element type currently in force.
     //
     private String currentElement;
 
     private int currentElementContent;
 
     //
     // Stack of entity names, to detect recursion.
     //
     private LinkedList<StringentityStack;
 
     //
     // PE expansion is enabled in most chunks of the DTD, not all.
     // When it's enabled, literals are treated differently.
     //
     private boolean inLiteral;
 
     private boolean expandPE;
 
     private boolean peIsError;
 
     //
     // can't report entity expansion inside two constructs:
     // - attribute expansions (internal entities only)
     // - markup declarations (parameter entities only)
     //
     private boolean doReport;
 
     //
     // Symbol table, for caching interned names.
     //
     // These show up wherever XML names or nmtokens are used: naming elements,
     // attributes, PIs, notations, entities, and enumerated attribute values.
     //
     // NOTE: This hashtable doesn't grow. The default size is intended to be
     // rather large for most documents. Example: one snapshot of the DocBook
     // XML 4.1 DTD used only about 350 such names. As a rule, only pathological
     // documents (ones that don't reuse names) should ever see much collision.
     //
     // Be sure that SYMBOL_TABLE_LENGTH always stays prime, for best hashing.
     // "2039" keeps the hash table size at about two memory pages on typical
     // 32 bit hardware.
     //
     private final static int SYMBOL_TABLE_LENGTH = 2039;
 
     private Object[][] symbolTable;
 
     //
     // Hash table of attributes found in current start tag.
     //
     private String[] tagAttributes;
 
     private int tagAttributePos;
 
     //
     // Utility flag: are we in CDATA? If so, whitespace isn't ignorable.
     // 
     private boolean inCDATA;
 
     //
     // Xml version.
     //  
     private static final int XML_10 = 0;
 
     private static final int XML_11 = 1;
 
     private int xmlVersion = ;
 
     //
     // Normalization checking
     //
 
 
 
     // ////////////////////////////////////////////////////////////////////
     // Constructors.
     // //////////////////////////////////////////////////////////////////////
 
    
Construct a new parser with no associated handler.

 
     // package private
     XmlParser() {
     }

    
Set the handler that will receive parsing events.

Parameters:
handler The handler to receive callback events.
See also:
parse
 
     // package private
     void setHandler(SAXDriver handler) {
         this. = handler;
     }

    
Parse an XML document from the character stream, byte stream, or URI that you provide (in that order of preference). Any URI that you supply will become the base URI for resolving relative URI, and may be used to acquire a reader or byte stream.

Only one thread at a time may use this parser; since it is private to this package, post-parse cleanup is done by the caller, which MUST NOT REUSE the parser (just null it).

Parameters:
systemId Absolute URI of the document; should never be null, but may be so iff a reader or a stream is provided.
publicId The public identifier of the document, or null.
reader A character stream; must be null if stream isn't.
stream A byte input stream; must be null if reader isn't.
characterEncoding The suggested encoding, or null if unknown.
Throws:
java.lang.Exception Basically SAXException or IOException
 
     // package private
     void doParse(String systemIdString publicIdReader reader,
             InputStream streamString encodingthrows Exception {
         if ( == null) {
             throw new IllegalStateException("no callback handler");
         }
 
          = false;
         initializeVariables();
 
         // predeclare the built-in entities here (replacement texts)
         // we don't need to intern(), since we're guaranteed literals
         // are always (globally) interned.
         setInternalEntity("amp""&#38;");
         setInternalEntity("lt""&#60;");
         setInternalEntity("gt""&#62;");
         setInternalEntity("apos""&#39;");
         setInternalEntity("quot""&#34;");
 
         try {
             // pushURL first to ensure locator is correct in startDocument
             // ... it might report an IO or encoding exception.
             .startDocument();
             pushURL(false"[document]",
                     // default baseURI: null
                     new ExternalIdentifiers(publicIdsystemIdnull), reader,
                     streamencodingfalse);
 
             parseDocument();
         } catch (EOFException e) {
             // empty input
             fatal("Empty document, with no root element.");
         } finally {
             if (reader != null) {
                 try {
                     reader.close();
                 } catch (IOException e) {
                     /* ignore */
                 }
             }
             if (stream != null) {
                 try {
                     stream.close();
                 } catch (IOException e) {
                     /* ignore */
                 }
             }
             if ( != null) {
                 try {
                     .close();
                 } catch (IOException e) {
                     /* ignore */
                 }
             }
         }
     }
 
     // ////////////////////////////////////////////////////////////////////
     // Error reporting.
     // ////////////////////////////////////////////////////////////////////
 
    
Report an error.

Parameters:
message The error message.
textFound The text that caused the error (or null).
See also:
SAXDriver.error
line
 
     private void fatal(String messageString textFoundString textExpected)
             throws SAXException {
         // smart quotes -- 2005-08-20 hsivonen
         if (textFound != null) {
             message = message + " (found \u201C" + textFound + "\u201D)";
         }
         if (textExpected != null) {
             message = message + " (expected \u201C" + textExpected + "\u201D)";
         }
         .fatal(message);
 
         // "can't happen"
         throw new SAXException(message);
     }

    
Report a serious error.

Parameters:
message The error message.
textFound The text that caused the error (or null).
 
     private void fatal(String messagechar textFoundString textExpected)
             throws SAXException {
         fatal(messagenew Character(textFound).toString(), textExpected);
     }

    
Report typical case fatal errors.
 
     private void fatal(String messagethrows SAXException {
         .fatal(message);
     }

    
Report non-fatal errors.
 
     private void err(String messagethrows SAXException {
         .verror(message);
     }
 
     // ////////////////////////////////////////////////////////////////////
     // Major syntactic productions.
     // ////////////////////////////////////////////////////////////////////
 
    
Parse an XML document.
  [1] document ::= prolog element Misc*
 

This is the top-level parsing function for a single XML document. As a minimum, a well-formed document must have a document element, and a valid document must have a prolog (one with doctype) as well.

 
     private void parseDocument() throws Exception {
         try { // added by MHK
             boolean sawDTD = parseProlog();
             require('<');
             parseElement(!sawDTD);
         } catch (EOFException ee) { // added by MHK
             fatal("premature end of file""[EOF]"null);
         }
 
         try {
             parseMisc(); // skip all white, PIs, and comments
             char c = readCh(); // if this doesn't throw an exception...
             fatal("unexpected characters after document end"cnull);
         } catch (EOFException e) {
             if ( != null) {
                 .end();
             }
             if ( != null) {
                 .end();
             }
             return;
         }
     }
 
     static final char[] startDelimComment = { '<''!''-''-' };
 
     static final char[] endDelimComment = { '-''-' };

    
Skip a comment.
  [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
 

(The <!-- has already been read.)

 
     private void parseComment() throws Exception {
         boolean saved = ;
 
          = false;
         parseUntil();
         require('>');
          = saved;
         .comment(, 0, );
          = 0;
     }
 
     static final char[] startDelimPI = { '<''?' };
 
     static final char[] endDelimPI = { '?''>' };

    
Parse a processing instruction and do a call-back.
  [16] PI ::= '<?' PITarget
     (S (Char* - (Char* '?>' Char*)))?
     '?>'
  [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )
 

(The <? has already been read.)

 
     private void parsePI() throws SAXExceptionIOException {
         String name;
         boolean saved = ;
 
          = false;
         name = readNmtoken(true);
         // NE08
         if (name.indexOf(':') >= 0) {
             fatal("Illegal character(':') in processing instruction name ",
                     namenull);
         }
         if ("xml".equalsIgnoreCase(name)) {
             fatal("Illegal processing instruction target"namenull);
         }
         if (!tryRead()) {
             requireWhitespace();
             parseUntil();
         }
          = saved;
     }
 
     static final char[] endDelimCDATA = { ']'']''>' };
 
     private boolean isDirtyCurrentElement;
 
     private boolean alreadyWarnedAboutPrivateUseCharacters;
 
     private char prev;

    
Parse a CDATA section.
  [18] CDSect ::= CDStart CData CDEnd
  [19] CDStart ::= '<![CDATA['
  [20] CData ::= (Char* - (Char* ']]>' Char*))
  [21] CDEnd ::= ']]>'
 

(The '<![CDATA[' has already been read.)

 
     private void parseCDSect() throws Exception {
         parseUntil();
         dataBufferFlush();
     }

    
Parse the prolog of an XML document.
  [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?
 

We do not look for the XML declaration here, because it was handled by pushURL ().

Returns:
true if a DTD was read.
See also:
pushURL
 
     private boolean parseProlog() throws Exception {
         parseMisc();
 
         if (tryRead("<!DOCTYPE")) {
             parseDoctypedecl();
             parseMisc();
             return true;
         }
         return false;
     }
 
     private void checkLegalVersion(String versionthrows SAXException {
         int len = version.length();
         for (int i = 0; i < leni++) {
             char c = version.charAt(i);
             if (('0' <= c) && (c <= '9')) {
                 continue;
             }
             if ((c == '_') || (c == '.') || (c == ':') || (c == '-')) {
                 continue;
             }
             if (('a' <= c) && (c <= 'z')) {
                 continue;
             }
             if (('A' <= c) && (c <= 'Z')) {
                 continue;
             }
             fatal("illegal character in version"version"1.0");
         }
     }

    
Parse the XML declaration.
  [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  [24] VersionInfo ::= S 'version' Eq
     ("'" VersionNum "'" | '"' VersionNum '"' )
  [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')*
  [32] SDDecl ::= S 'standalone' Eq
     ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' )
  [80] EncodingDecl ::= S 'encoding' Eq
     ( "'" EncName "'" | "'" EncName "'" )
  [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
 

(The <?xml and whitespace have already been read.)

Returns:
the encoding in the declaration, uppercased; or null
See also:
parseTextDecl(java.lang.String)
setupDecoding
 
     private String parseXMLDecl(String encodingthrows SAXException,
             IOException {
         String version;
         String encodingName = null;
         String standalone = null;
         int flags =  |  | ;
 
         // Read the version.
         require("version");
         parseEq();
         checkLegalVersion(version = readLiteral(flags));
         if (!version.equals("1.0")) {
             if (version.equals("1.1")) {
                 fatal("XML 1.1 not supported."); // 2006-04-24 hsivonen
             } else {
                 fatal("illegal XML version"version"1.0"); // removed 1.1
                 // -- 2006-04-24
                 // hsivonen
             }
         } else {
              = ;
         }
         // Try reading an encoding declaration.
         boolean white = tryWhitespace();
 
         if (tryRead("encoding")) {
             if (!white) {
                 fatal("whitespace required before 'encoding='");
             }
             parseEq();
             encodingName = readLiteral(flags);
             checkEncodingLiteral(encodingName); // 2006-04-28 hsivonen
             if ( == null) {
                 draconianInputStreamReader(encodingNametrue);
             } else {
                 checkEncodingMatch(encodingencodingName);
             }
         }
 
         // Try reading a standalone declaration
         if (encodingName != null) {
             white = tryWhitespace();
         } else {
             if (encoding == null) {
                 draconianInputStreamReader("UTF-8"false); // 2006-04-24
                 // hsivonen
             }
             warnAboutLackOfEncodingDecl(encoding);
         }
         if (tryRead("standalone")) {
             if (!white) {
                 fatal("whitespace required before 'standalone='");
             }
             parseEq();
             standalone = readLiteral(flags);
             if ("yes".equals(standalone)) {
                  = true;
             } else if (!"no".equals(standalone)) {
                 fatal("standalone flag must be 'yes' or 'no'");
             }
         }
 
         skipWhitespace();
         require("?>");
 
         return encodingName;
     }
 
     // hsivonen 2006-04-28
     private void checkEncodingLiteral(String encodingNamethrows SAXException {
         if (encodingName == null) {
             return;
         }
         if (encodingName.length() == 0) {
             fatal("The empty string does not a legal encoding name.");
         }
         char c = encodingName.charAt(0);
         if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) {
             fatal("The encoding name must start with an ASCII letter.");
         }
         for (int i = 1; i < encodingName.length(); i++) {
             c = encodingName.charAt(i);
             if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))
                     || ((c >= '0') && (c <= '9')) || (c == '.') || (c == '_') || (c == '-'))) {
                 fatal("Illegal character in encoding name: U+"
                         + Integer.toHexString(c) + ".");
             }
         }
     }

    
Parse a text declaration.
  [79] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  [80] EncodingDecl ::= S 'encoding' Eq
     ( '"' EncName '"' | "'" EncName "'" )
  [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
 

(The <?xml' and whitespace have already been read.)

Returns:
the encoding in the declaration, uppercased; or null
See also:
parseXMLDecl(java.lang.String)
setupDecoding
 
     private String parseTextDecl(String encodingthrows SAXException,
             IOException {
         String encodingName = null;
         int flags =  |  | ;
 
         // Read an optional version.
         if (tryRead("version")) {
             String version;
             parseEq();
             checkLegalVersion(version = readLiteral(flags));
             if (!version.equals("1.0")) {
                 if (version.equals("1.1")) {
                     fatal("XML 1.1 not supported."); // 2006-04-24 hsivonen
                 } else {
                     fatal("illegal XML version"version"1.0"); // removed
                     // 1.1 --
                     // 2006-04-24
                     // hsivonen
                 }
             }
             requireWhitespace();
         }
 
         // Read the encoding.
         require("encoding");
         parseEq();
         encodingName = readLiteral(flags);
         checkEncodingLiteral(encodingName); // 2006-04-28 hsivonen
         if ( == null) {
             draconianInputStreamReader(encodingNametrue);
         } else {
             checkEncodingMatch(encodingencodingName);
         }
         skipWhitespace();
         require("?>");
 
         return encodingName;
     }
 
     private void checkEncodingMatch(String usedString detected)
             throws SAXException {
         // method added -- 2006-02-03 hsivonen
         if (used == null) {
             if (!.equalsIgnoreCase(detected)) {
                 fatal(
                         "Declared character encoding was not the one sniffed from the BOM.",
                         detected);
             }
         } else {
             if (!"".equals(used) && !used.equalsIgnoreCase(detected)) {
                 .warn("External encoding information specified "
                         + used
                         + ", but XML declaration specified "
                         + detected
                         + ". Allowing external to override per RFC 3023. The well-formedness status of this document may change when decoupled from the external character encoding information.");
             }
         }
     }
 
     private void draconianInputStreamReader(String encoding,
             InputStream streamboolean requireAsciiSuperset)
             throws SAXExceptionIOException {
         draconianInputStreamReader(encodingstreamrequireAsciiSuperset,
                 encoding);
     }
 
     private void draconianInputStreamReader(String encoding,
             InputStream streamboolean requireAsciiSupersetString actualName)
             throws SAXExceptionIOException {
         // method added -- 2005-08-21 hsivonen
         // revised -- 2008-03-17 hsivonen
          = ;
          = Encoding.toAsciiLowerCase(actualName);
         encoding = Encoding.toAsciiLowerCase(encoding);
         try {
             Encoding cs = Encoding.forName(encoding);
             String canonName = cs.getCanonName();
             if (requireAsciiSuperset) {
                 if (!cs.isAsciiSuperset()) {
                     fatal("The encoding \u201C"
                             + actualName
                             + "\u201D is not an ASCII superset and, therefore, cannot be used in an internal encoding declaration.");
                 }
             }
             if (!cs.isRegistered()) {
                 if (encoding.startsWith("x-")) {
                     err("The encoding \u201C"
                             + actualName
                             + "\u201D is not an IANA-registered encoding. (Charmod C022)");                    
                 } else {
                     err("The encoding \u201C"
                             + actualName
                             + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
                 }
             } else if (!canonName.equals(encoding)) {
                 err("The encoding \u201C"
                         + actualName
                         + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
                         + canonName + "\u201D. (Charmod C024)");
             }
            if (!("utf-8".equals(encoding) || "utf-16".equals(encoding)
                    || "utf-16be".equals(encoding)
                    || "utf-16le".equals(encoding)
                    || "iso-8859-1".equals(encoding) || "us-ascii".equals(encoding))) {
                .warn("XML processors are required to support the UTF-8 and UTF-16 character encodings. The encoding was \u201C"
                        + actualName
                        + "\u201D instead, which is an incompatibility risk.");
            }
            Encoding htmlActual = cs.getActualHtmlEncoding();
            if (htmlActual != null) {
                .warn("Documents encoded as \u201C"
                        + htmlActual.getCanonName()
                        + "\u201D are often mislabeled as \u201C"
                        + actualName
                        + "\u201D, which is the declared encoding of this document.");
            }
            CharsetDecoder decoder = cs.newDecoder();
            decoder.onMalformedInput(.);
            decoder.onUnmappableCharacter(.);
            this. = new InputStreamReader(streamdecoder);
        } catch (UnsupportedCharsetException e) {
            fatal("Unsupported character encoding \u201C" + actualName
                    + "\u201D.");
        }
    }

    
Parse miscellaneous markup outside the document element and DOCTYPE declaration.
  [27] Misc ::= Comment | PI | S
 
    private void parseMisc() throws Exception {
        while (true) {
            skipWhitespace();
            if (tryRead()) {
                parsePI();
            } else if (tryRead()) {
                parseComment();
            } else {
                return;
            }
        }
    }

    
Parse a document type declaration.
   [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
 

(The <!DOCTYPE has already been read.)

    private void parseDoctypedecl() throws Exception {
        String rootName;
        ExternalIdentifiers ids;
        // Read the document type name.
        requireWhitespace();
        rootName = readNmtoken(true);
        // Read the External subset's IDs
        skipWhitespace();
        ids = readExternalIds(falsetrue);
        // report (a) declaration of name, (b) lexical info (ids)
        .doctypeDecl(rootNameids.publicIdids.systemId);
        // Internal subset is parsed first, if present
        skipWhitespace();
        if (tryRead('[')) {
            // loop until the subset ends
            while (true) {
                 =  = true;
                skipWhitespace();
                 =  = false;
                if (tryRead(']')) {
                    break// end of subset
                } else {
                    // WFC, PEs in internal subset (only between decls)
                     =  = true;
                    parseMarkupdecl();
                     =  = false;
                }
            }
        }
        skipWhitespace();
        require('>');
        // Read the external subset, if any
        InputSource subset;
        if (ids.systemId == null) {
            subset = .getExternalSubset(rootName.getSystemId());
        } else {
            subset = null;
        }
        if ((ids.systemId != null) || (subset != null)) {
            pushString(null">");
            // NOTE: [dtd] is so we say what SAX2 expects,
            // though it's misleading (subset, not entire dtd)
            if (ids.systemId != null) {
                pushURL(true"[dtd]"idsnullnullnulltrue);
            } else {
                .warn("modifying document by adding external subset");
                pushURL(true"[dtd]"new ExternalIdentifiers(
                        subset.getPublicId(), subset.getSystemId(), null),
                        subset.getCharacterStream(), subset.getByteStream(),
                        subset.getEncoding(), false);
            }
            // Loop until we end up back at '>'
            while (true) {
                 =  = true;
                skipWhitespace();
                 =  = false;
                if (tryRead('>')) {
                    break;
                } else {
                     = true;
                    parseMarkupdecl();
                     = false;
                }
            }
            // the ">" string isn't popped yet
            if (.size() != 1) {
                fatal("external subset has unmatched '>'");
            }
        }
        // done dtd
        .endDoctype();
         = false;
         = true;
    }

    
Parse a markup declaration in the internal or external DTD subset.
   [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl
      | NotationDecl | PI | Comment
   [30] extSubsetDecl ::= (markupdecl | conditionalSect
      | PEReference | S) *
 

Reading toplevel PE references is handled as a lexical issue by the caller, as is whitespace.

    private void parseMarkupdecl() throws Exception {
        char[] saved = null;
        boolean savedPE = ;
        // prevent "<%foo;" and ensures saved entity is right
        require('<');
        unread('<');
         = false;
        if (tryRead("<!ELEMENT")) {
            saved = ;
             = savedPE;
            parseElementDecl();
        } else if (tryRead("<!ATTLIST")) {
            saved = ;
             = savedPE;
            parseAttlistDecl();
        } else if (tryRead("<!ENTITY")) {
            saved = ;
             = savedPE;
            parseEntityDecl();
        } else if (tryRead("<!NOTATION")) {
            saved = ;
             = savedPE;
            parseNotationDecl();
        } else if (tryRead()) {
            saved = ;
             = savedPE;
            parsePI();
        } else if (tryRead()) {
            saved = ;
             = savedPE;
            parseComment();
        } else if (tryRead("<![")) {
            saved = ;
             = savedPE;
            if (.size() > 0) {
                parseConditionalSect(saved);
            } else {
                fatal("conditional sections illegal in internal subset");
            }
        } else {
            fatal("expected markup declaration");
        }
        // VC: Proper Decl/PE Nesting
        if ( != saved) {
            .verror("Illegal Declaration/PE nesting");
        }
    }

    
Parse an element, with its tags.
   [39] element ::= EmptyElementTag | STag content ETag
   [40] STag ::= '<' Name (S Attribute)* S? '>'
   [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>'
 

(The '<' has already been read.)

NOTE: this method actually chains onto parseContent (), if necessary, and parseContent () will take care of calling parseETag ().

    private void parseElement(boolean maybeGetSubsetthrows Exception {
        String gi;
        char c;
        int oldElementContent = ;
        String oldElement = ;
        ElementDecl element;
        // This is the (global) counter for the
        // array of specified attributes.
         = 0;
        // Read the element type name.
        gi = readNmtoken(true);
        // If we saw no DTD, and this is the document root element,
        // let the application modify the input stream by providing one.
        if (maybeGetSubset) {
            InputSource subset = .getExternalSubset(gi,
                    .getSystemId());
            if (subset != null) {
                String publicId = subset.getPublicId();
                String systemId = subset.getSystemId();
                .warn("modifying document by adding DTD");
                .doctypeDecl(gipublicIdsystemId);
                pushString(null">");
                // NOTE: [dtd] is so we say what SAX2 expects,
                // though it's misleading (subset, not entire dtd)
                pushURL(true"[dtd]"new ExternalIdentifiers(publicId,
                        systemIdnull), subset.getCharacterStream(),
                        subset.getByteStream(), subset.getEncoding(), false);
                // Loop until we end up back at '>'
                while (true) {
                     =  = true;
                    skipWhitespace();
                     =  = false;
                    if (tryRead('>')) {
                        break;
                    } else {
                         = true;
                        parseMarkupdecl();
                         = false;
                    }
                }
                // the ">" string isn't popped yet
                if (.size() != 1) {
                    fatal("external subset has unmatched '>'");
                }
                .endDoctype();
            }
        }
        // Determine the current content type.
         = gi;
        element = .get(gi);
         = getContentType(element);
        // Read the attributes, if any.
        // After this loop, "c" is the closing delimiter.
        boolean white = tryWhitespace();
        c = readCh();
        while ((c != '/') && (c != '>')) {
            unread(c);
            if (!white) {
                fatal("need whitespace between attributes");
            }
            parseAttribute(gi);
            white = tryWhitespace();
            c = readCh();
        }
        // Supply any defaulted attributes.
        Iterator<Stringatts = declaredAttributes(element);
        if (atts != null) {
            String aname;
            loop: while (atts.hasNext()) {
                aname = atts.next();
                // See if it was specified.
                for (int i = 0; i < i++) {
                    if ([i] == aname) {
                        continue loop;
                    }
                }
                // ... or has a default
                String value = getAttributeDefaultValue(gianame);
                if (value == null) {
                    continue;
                }
                .attribute(anamevaluefalse);
            }
        }
        // Figure out if this is a start tag
        // or an empty element, and dispatch an
        // event accordingly.
        switch (c) {
            case '>':
                .startElement(gi);
                parseContent();
                break;
            case '/':
                require('>');
                .startElement(gi);
                .endElement(gi);
                break;
        }
        // Restore the previous state.
         = oldElement;
         = oldElementContent;
    }

    
Parse an attribute assignment.
   [41] Attribute ::= Name Eq AttValue
 

Parameters:
name The name of the attribute's element.
See also:
SAXDriver.attribute(java.lang.String,java.lang.String,boolean)
    private void parseAttribute(String namethrows Exception {
        String aname;
        String type;
        String value;
        int flags =  | ;
        // Read the attribute name.
        aname = readNmtoken(true);
        type = getAttributeType(nameaname);
        // Parse '='
        parseEq();
        // Read the value, normalizing whitespace
        // unless it is CDATA.
        if (.) {
            if ((type == "CDATA") || (type == null)) {
                value = readLiteral(flags);
            } else {
                value = readLiteral(flags | );
            }
        } else {
            if ((type == null) || type.equals("CDATA")) {
                value = readLiteral(flags);
            } else {
                value = readLiteral(flags | );
            }
        }
        // WFC: no duplicate attributes
        for (int i = 0; i < i++) {
            if (aname.equals([i])) {
                fatal("duplicate attribute"anamenull);
            }
        }
        // Inform the handler about the
        // attribute.
        .attribute(anamevaluetrue);
         = 0;
        // Note that the attribute has been
        // specified.
        if ( == .) {
            String newAttrib[] = new String[. * 2];
            System.arraycopy(, 0, newAttrib, 0, );
             = newAttrib;
        }
        [++] = aname;
    }

    
Parse an equals sign surrounded by optional whitespace.
   [25] Eq ::= S? '=' S?
 
    private void parseEq() throws SAXExceptionIOException {
        skipWhitespace();
        require('=');
        skipWhitespace();
    }

    
Parse an end tag.
   [42] ETag ::= '</' Name S? '>'
 

NOTE: parseContent () chains to here, we already read the "</".

    private void parseETag() throws Exception {
        require();
        skipWhitespace();
        require('>');
        // not re-reporting any SAXException re bogus end tags,
        // even though that diagnostic might be clearer ...
    }

    
Parse the content of an element.
   [43] content ::= (element | CharData | Reference
      | CDSect | PI | Comment)*
   [67] Reference ::= EntityRef | CharRef
 

NOTE: consumes ETtag.

    private void parseContent() throws Exception {
        char c;
        while (true) {
            // consume characters (or ignorable whitspace) until delimiter
            parseCharData();
            // Handle delimiters
            c = readCh();
            switch (c) {
                case '&'// Found "&"
                    c = readCh();
                    if (c == '#') {
                        parseCharRef();
                    } else {
                        unread(c);