mirror of
https://github.com/moparisthebest/davmail
synced 2024-08-13 16:53:51 -04:00
cbeef8c00d
git-svn-id: http://svn.code.sf.net/p/davmail/code/trunk@1877 3d1905a2-6b24-0410-a738-b14d5a86fcbd
2445 lines
86 KiB
Java
2445 lines
86 KiB
Java
/* Woodstox XML processor
|
|
*
|
|
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
|
|
*
|
|
* Licensed under the License specified in file LICENSE, included with
|
|
* the source code.
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package com.ctc.wstx.sr;
|
|
|
|
import java.io.FileNotFoundException;
|
|
import java.io.IOException;
|
|
import java.net.URL;
|
|
import java.text.MessageFormat;
|
|
import java.util.Collections;
|
|
import java.util.HashMap;
|
|
import java.util.Map;
|
|
|
|
import javax.xml.stream.Location;
|
|
import javax.xml.stream.XMLInputFactory;
|
|
import javax.xml.stream.XMLReporter;
|
|
import javax.xml.stream.XMLResolver;
|
|
import javax.xml.stream.XMLStreamException;
|
|
|
|
import org.codehaus.stax2.XMLReporter2;
|
|
import org.codehaus.stax2.XMLStreamLocation2;
|
|
import org.codehaus.stax2.validation.XMLValidationProblem;
|
|
|
|
import com.ctc.wstx.api.ReaderConfig;
|
|
import com.ctc.wstx.cfg.ErrorConsts;
|
|
import com.ctc.wstx.cfg.InputConfigFlags;
|
|
import com.ctc.wstx.cfg.ParsingErrorMsgs;
|
|
import com.ctc.wstx.cfg.XmlConsts;
|
|
import com.ctc.wstx.dtd.MinimalDTDReader;
|
|
import com.ctc.wstx.ent.EntityDecl;
|
|
import com.ctc.wstx.ent.IntEntity;
|
|
import com.ctc.wstx.exc.*;
|
|
import com.ctc.wstx.io.DefaultInputResolver;
|
|
import com.ctc.wstx.io.WstxInputData;
|
|
import com.ctc.wstx.io.WstxInputLocation;
|
|
import com.ctc.wstx.io.WstxInputSource;
|
|
import com.ctc.wstx.util.ExceptionUtil;
|
|
import com.ctc.wstx.util.SymbolTable;
|
|
import com.ctc.wstx.util.TextBuffer;
|
|
|
|
/**
|
|
* Abstract base class that defines some basic functionality that all
|
|
* Woodstox reader classes (main XML reader, DTD reader) extend from.
|
|
*/
|
|
|
|
public abstract class StreamScanner
|
|
extends WstxInputData
|
|
implements InputProblemReporter,
|
|
InputConfigFlags, ParsingErrorMsgs
|
|
{
|
|
|
|
// // // Some well-known chars:
|
|
|
|
/**
|
|
* Last (highest) char code of the three, LF, CR and NULL
|
|
*/
|
|
public final static char CHAR_CR_LF_OR_NULL = (char) 13;
|
|
|
|
public final static int INT_CR_LF_OR_NULL = 13;
|
|
|
|
/**
|
|
* Character that allows quick check of whether a char can potentially
|
|
* be some kind of markup, WRT input stream processing;
|
|
* has to contain linefeeds, &, < and > (">" only matters when
|
|
* quoting text, as part of "]]>")
|
|
*/
|
|
protected final static char CHAR_FIRST_PURE_TEXT = (char) ('>' + 1);
|
|
|
|
|
|
/**
|
|
* First character in Unicode (ie one with lowest id) that is legal
|
|
* as part of a local name (all valid name chars minus ':'). Used
|
|
* for doing quick check for local name end; usually name ends in
|
|
* a whitespace or equals sign.
|
|
*/
|
|
protected final static char CHAR_LOWEST_LEGAL_LOCALNAME_CHAR = '-';
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Character validity constants, structs
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* We will only use validity array for first 256 characters, mostly
|
|
* because after those characters it's easier to do fairly simple
|
|
* block checks.
|
|
*/
|
|
private final static int VALID_CHAR_COUNT = 0x100;
|
|
|
|
private final static byte NAME_CHAR_INVALID_B = (byte) 0;
|
|
private final static byte NAME_CHAR_ALL_VALID_B = (byte) 1;
|
|
private final static byte NAME_CHAR_VALID_NONFIRST_B = (byte) -1;
|
|
|
|
private final static byte[] sCharValidity = new byte[VALID_CHAR_COUNT];
|
|
|
|
static {
|
|
/* First, since all valid-as-first chars are also valid-as-other chars,
|
|
* we'll initialize common chars:
|
|
*/
|
|
sCharValidity['_'] = NAME_CHAR_ALL_VALID_B;
|
|
for (int i = 0, last = ('z' - 'a'); i <= last; ++i) {
|
|
sCharValidity['A' + i] = NAME_CHAR_ALL_VALID_B;
|
|
sCharValidity['a' + i] = NAME_CHAR_ALL_VALID_B;
|
|
}
|
|
for (int i = 0xC0; i < 0xF6; ++i) { // not all are fully valid, but
|
|
sCharValidity[i] = NAME_CHAR_ALL_VALID_B;
|
|
}
|
|
// ... now we can 'revert' ones not fully valid:
|
|
sCharValidity[0xD7] = NAME_CHAR_INVALID_B;
|
|
sCharValidity[0xF7] = NAME_CHAR_INVALID_B;
|
|
|
|
/* And then we can proceed with ones only valid-as-other.
|
|
*/
|
|
sCharValidity['-'] = NAME_CHAR_VALID_NONFIRST_B;
|
|
sCharValidity['.'] = NAME_CHAR_VALID_NONFIRST_B;
|
|
sCharValidity[0xB7] = NAME_CHAR_VALID_NONFIRST_B;
|
|
for (int i = '0'; i <= '9'; ++i) {
|
|
sCharValidity[i] = NAME_CHAR_VALID_NONFIRST_B;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Public identifiers only use 7-bit ascii range.
|
|
*/
|
|
private final static int VALID_PUBID_CHAR_COUNT = 0x80;
|
|
private final static byte[] sPubidValidity = new byte[VALID_PUBID_CHAR_COUNT];
|
|
// private final static byte PUBID_CHAR_INVALID_B = (byte) 0;
|
|
private final static byte PUBID_CHAR_VALID_B = (byte) 1;
|
|
static {
|
|
for (int i = 0, last = ('z' - 'a'); i <= last; ++i) {
|
|
sPubidValidity['A' + i] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['a' + i] = PUBID_CHAR_VALID_B;
|
|
}
|
|
for (int i = '0'; i <= '9'; ++i) {
|
|
sPubidValidity[i] = PUBID_CHAR_VALID_B;
|
|
}
|
|
|
|
// 3 main white space types are valid
|
|
sPubidValidity[0x0A] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity[0x0D] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity[0x20] = PUBID_CHAR_VALID_B;
|
|
|
|
// And many of punctuation/separator ascii chars too:
|
|
sPubidValidity['-'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['\''] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['('] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity[')'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['+'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity[','] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['.'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['/'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity[':'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['='] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['?'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity[';'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['!'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['*'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['#'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['@'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['$'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['_'] = PUBID_CHAR_VALID_B;
|
|
sPubidValidity['%'] = PUBID_CHAR_VALID_B;
|
|
}
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Basic configuration
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Copy of the configuration object passed by the factory.
|
|
* Contains immutable settings for this reader (or in case
|
|
* of DTD parsers, reader that uses it)
|
|
*/
|
|
protected final ReaderConfig mConfig;
|
|
|
|
// // // Various extracted settings:
|
|
|
|
/**
|
|
* If true, Reader is namespace aware, and should do basic checks
|
|
* (usually enforcing limitations on having colons in names)
|
|
*/
|
|
protected final boolean mCfgNsEnabled;
|
|
|
|
// Extracted standard on/off settings:
|
|
|
|
/**
|
|
* note: left non-final on purpose: sub-class may need to modify
|
|
* the default value after construction.
|
|
*/
|
|
protected boolean mCfgReplaceEntities;
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Symbol handling, if applicable
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
final SymbolTable mSymbols;
|
|
|
|
/**
|
|
* Local full name for the event, if it has one (note: element events
|
|
* do NOT use this variable; those names are stored in element stack):
|
|
* target for processing instructions.
|
|
*<p>
|
|
* Currently used for proc. instr. target, and entity name (at least
|
|
* when current entity reference is null).
|
|
*<p>
|
|
* Note: this variable is generally not cleared, since it comes from
|
|
* a symbol table, ie. this won't be the only reference.
|
|
*/
|
|
protected String mCurrName;
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Input handling
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Currently active input source; contains link to parent (nesting) input
|
|
* sources, if any.
|
|
*/
|
|
protected WstxInputSource mInput;
|
|
|
|
/**
|
|
* Top-most input source this reader can use; due to input source
|
|
* chaining, this is not necessarily the root of all input; for example,
|
|
* external DTD subset reader's root input still has original document
|
|
* input as its parent.
|
|
*/
|
|
protected final WstxInputSource mRootInput;
|
|
|
|
/**
|
|
* Custom resolver used to handle external entities that are to be expanded
|
|
* by this reader (external param/general entity expander)
|
|
*/
|
|
XMLResolver mEntityResolver = null;
|
|
|
|
/**
|
|
* This is the current depth of the input stack (same as what input
|
|
* element stack would return as its depth).
|
|
* It is used to enforce input scope constraints for nesting of
|
|
* elements (for xml reader) and dtd declaration (for dtd reader)
|
|
* with regards to input block (entity expansion) boundaries.
|
|
*<p>
|
|
* Basically this value is compared to {@link #mInputTopDepth}, which
|
|
* indicates what was the depth at the point where the currently active
|
|
* input scope/block was started.
|
|
*/
|
|
protected int mCurrDepth = 0;
|
|
|
|
protected int mInputTopDepth = 0;
|
|
|
|
/**
|
|
* Flag that indicates whether linefeeds in the input data are to
|
|
* be normalized or not.
|
|
* Xml specs mandate that the line feeds are only normalized
|
|
* when they are from the external entities (main doc, external
|
|
* general/parsed entities), so normalization has to be
|
|
* suppressed when expanding internal general/parsed entities.
|
|
*/
|
|
protected boolean mNormalizeLFs;
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Buffer(s) for local name(s) and text content
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Temporary buffer used if local name can not be just directly
|
|
* constructed from input buffer (name is on a boundary or such).
|
|
*/
|
|
protected char[] mNameBuffer = null;
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Information about starting location of event
|
|
// Reader is pointing to; updated on-demand
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
// // // Location info at point when current token was started
|
|
|
|
/**
|
|
* Total number of characters read before start of current token.
|
|
* For big (gigabyte-sized) sizes are possible, needs to be long,
|
|
* unlike pointers and sizes related to in-memory buffers.
|
|
*/
|
|
protected long mTokenInputTotal = 0;
|
|
|
|
/**
|
|
* Input row on which current token starts, 1-based
|
|
*/
|
|
protected int mTokenInputRow = 1;
|
|
|
|
/**
|
|
* Column on input row that current token starts; 0-based (although
|
|
* in the end it'll be converted to 1-based)
|
|
*/
|
|
protected int mTokenInputCol = 0;
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// XML document information (from doc decl if one
|
|
// was found) common to all entities (main xml
|
|
// document, external DTD subset)
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Input stream encoding, if known (passed in, or determined by
|
|
* auto-detection); null if not.
|
|
*/
|
|
String mDocInputEncoding = null;
|
|
|
|
/**
|
|
* Character encoding from xml declaration, if any; null if no
|
|
* declaration, or it didn't specify encoding.
|
|
*/
|
|
String mDocXmlEncoding = null;
|
|
|
|
/**
|
|
* XML version as declared by the document; one of constants
|
|
* from {@link XmlConsts} (like {@link XmlConsts#XML_V_10}).
|
|
*/
|
|
protected int mDocXmlVersion = XmlConsts.XML_V_UNKNOWN;
|
|
|
|
/**
|
|
* Cache of internal character entities;
|
|
*/
|
|
protected Map mCachedEntities;
|
|
|
|
/**
|
|
* Flag for whether or not character references should be treated as entities
|
|
*/
|
|
protected boolean mCfgTreatCharRefsAsEntities;
|
|
|
|
/**
|
|
* Entity reference stream currently points to.
|
|
*/
|
|
protected EntityDecl mCurrEntity;
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Life-cycle
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Constructor used when creating a complete new (main-level) reader that
|
|
* does not share its input buffers or state with another reader.
|
|
*/
|
|
protected StreamScanner(WstxInputSource input, ReaderConfig cfg,
|
|
XMLResolver res)
|
|
{
|
|
super();
|
|
mInput = input;
|
|
// 17-Jun-2004, TSa: Need to know root-level input source
|
|
mRootInput = input;
|
|
|
|
mConfig = cfg;
|
|
mSymbols = cfg.getSymbols();
|
|
int cf = cfg.getConfigFlags();
|
|
mCfgNsEnabled = (cf & CFG_NAMESPACE_AWARE) != 0;
|
|
mCfgReplaceEntities = (cf & CFG_REPLACE_ENTITY_REFS) != 0;
|
|
|
|
mNormalizeLFs = mConfig.willNormalizeLFs();
|
|
mInputBuffer = null;
|
|
mInputPtr = mInputEnd = 0;
|
|
mEntityResolver = res;
|
|
|
|
mCfgTreatCharRefsAsEntities = mConfig.willTreatCharRefsAsEnts();
|
|
mCachedEntities = mCfgTreatCharRefsAsEntities ? new HashMap() : Collections.EMPTY_MAP;
|
|
}
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Package API
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Method that returns location of the last character returned by this
|
|
* reader; that is, location "one less" than the currently pointed to
|
|
* location.
|
|
*/
|
|
protected WstxInputLocation getLastCharLocation()
|
|
{
|
|
return mInput.getLocation(mCurrInputProcessed + mInputPtr - 1,
|
|
mCurrInputRow,
|
|
mInputPtr - mCurrInputRowStart);
|
|
}
|
|
|
|
protected URL getSource() {
|
|
return mInput.getSource();
|
|
}
|
|
|
|
protected String getSystemId() {
|
|
return mInput.getSystemId();
|
|
}
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Partial LocationInfo implementation (not implemented
|
|
// by this base class, but is by some sub-classes)
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Returns location of last properly parsed token; as per StAX specs,
|
|
* apparently needs to be the end of current event, which is the same
|
|
* as the start of the following event (or EOF if that's next).
|
|
*/
|
|
public abstract Location getLocation();
|
|
|
|
public XMLStreamLocation2 getStartLocation()
|
|
{
|
|
// note: +1 is used as columns are 1-based...
|
|
return mInput.getLocation(mTokenInputTotal, mTokenInputRow,
|
|
mTokenInputCol + 1);
|
|
}
|
|
|
|
public XMLStreamLocation2 getCurrentLocation()
|
|
{
|
|
return mInput.getLocation(mCurrInputProcessed + mInputPtr,
|
|
mCurrInputRow,
|
|
mInputPtr - mCurrInputRowStart + 1);
|
|
}
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// InputProblemReporter implementation
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
public WstxException throwWfcException(String msg, boolean deferErrors)
|
|
throws WstxException
|
|
{
|
|
WstxException ex = constructWfcException(msg);
|
|
if (!deferErrors) {
|
|
throw ex;
|
|
}
|
|
return ex;
|
|
}
|
|
|
|
public void throwParseError(String msg) throws XMLStreamException
|
|
{
|
|
throwParseError(msg, null, null);
|
|
}
|
|
|
|
/**
|
|
* Throws generic parse error with specified message and current parsing
|
|
* location.
|
|
*<p>
|
|
* Note: public access only because core code in other packages needs
|
|
* to access it.
|
|
*/
|
|
public void throwParseError(String format, Object arg, Object arg2)
|
|
throws XMLStreamException
|
|
{
|
|
String msg = (arg != null || arg2 != null) ?
|
|
MessageFormat.format(format, new Object[] { arg, arg2 }) : format;
|
|
throw constructWfcException(msg);
|
|
}
|
|
|
|
public void reportProblem(String probType, String format, Object arg, Object arg2)
|
|
throws XMLStreamException
|
|
{
|
|
XMLReporter rep = mConfig.getXMLReporter();
|
|
if (rep != null) {
|
|
_reportProblem(rep, probType,
|
|
MessageFormat.format(format, new Object[] { arg, arg2 }), null);
|
|
}
|
|
}
|
|
|
|
public void reportProblem(Location loc, String probType,
|
|
String format, Object arg, Object arg2)
|
|
throws XMLStreamException
|
|
{
|
|
XMLReporter rep = mConfig.getXMLReporter();
|
|
if (rep != null) {
|
|
String msg = (arg != null || arg2 != null) ?
|
|
MessageFormat.format(format, new Object[] { arg, arg2 }) : format;
|
|
_reportProblem(rep, probType, msg, loc);
|
|
}
|
|
}
|
|
|
|
protected void _reportProblem(XMLReporter rep, String probType, String msg, Location loc)
|
|
throws XMLStreamException
|
|
{
|
|
if (loc == null) {
|
|
loc = getLastCharLocation();
|
|
}
|
|
_reportProblem(rep, new XMLValidationProblem(loc, msg, XMLValidationProblem.SEVERITY_ERROR, probType));
|
|
}
|
|
|
|
protected void _reportProblem(XMLReporter rep, XMLValidationProblem prob)
|
|
throws XMLStreamException
|
|
{
|
|
if (rep != null) {
|
|
Location loc = prob.getLocation();
|
|
if (loc == null) {
|
|
loc = getLastCharLocation();
|
|
prob.setLocation(loc);
|
|
}
|
|
// Backwards-compatibility fix: add non-null type, if missing:
|
|
if (prob.getType() == null) {
|
|
prob.setType(ErrorConsts.WT_VALIDATION);
|
|
}
|
|
// [WSTX-154]: was catching and dropping thrown exception: shouldn't.
|
|
// [WTSX-157]: need to support XMLReporter2
|
|
if (rep instanceof XMLReporter2) {
|
|
((XMLReporter2) rep).report(prob);
|
|
} else {
|
|
rep.report(prob.getMessage(), prob.getType(), prob, loc);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
*<p>
|
|
* Note: this is the base implementation used for implementing
|
|
* <code>ValidationContext</code>
|
|
*/
|
|
public void reportValidationProblem(XMLValidationProblem prob)
|
|
throws XMLStreamException
|
|
{
|
|
// !!! TBI: Fail-fast vs. deferred modes?
|
|
/* For now let's implement basic functionality: warnings get
|
|
* reported via XMLReporter, errors and fatal errors result in
|
|
* immediate exceptions.
|
|
*/
|
|
/* 27-May-2008, TSa: [WSTX-153] Above is incorrect: as per Stax
|
|
* javadocs for XMLReporter, both warnings and non-fatal errors
|
|
* (which includes all validation errors) should be reported via
|
|
* XMLReporter interface, and only fatals should cause an
|
|
* immediate stream exception (by-passing reporter)
|
|
*/
|
|
if (prob.getSeverity() > XMLValidationProblem.SEVERITY_ERROR) {
|
|
throw WstxValidationException.create(prob);
|
|
}
|
|
XMLReporter rep = mConfig.getXMLReporter();
|
|
if (rep != null) {
|
|
_reportProblem(rep, prob);
|
|
} else {
|
|
/* If no reporter, regular non-fatal errors are to be reported
|
|
* as exceptions as well, for backwards compatibility
|
|
*/
|
|
if (prob.getSeverity() >= XMLValidationProblem.SEVERITY_ERROR) {
|
|
throw WstxValidationException.create(prob);
|
|
}
|
|
}
|
|
}
|
|
|
|
public void reportValidationProblem(String msg, int severity)
|
|
throws XMLStreamException
|
|
{
|
|
reportValidationProblem(new XMLValidationProblem(getLastCharLocation(),
|
|
msg, severity));
|
|
}
|
|
|
|
public void reportValidationProblem(String msg)
|
|
throws XMLStreamException
|
|
{
|
|
reportValidationProblem(new XMLValidationProblem(getLastCharLocation(),
|
|
msg,
|
|
XMLValidationProblem.SEVERITY_ERROR));
|
|
}
|
|
|
|
public void reportValidationProblem(Location loc, String msg)
|
|
throws XMLStreamException
|
|
{
|
|
reportValidationProblem(new XMLValidationProblem(loc, msg));
|
|
}
|
|
|
|
public void reportValidationProblem(String format, Object arg, Object arg2)
|
|
throws XMLStreamException
|
|
{
|
|
reportValidationProblem(MessageFormat.format(format, new Object[] { arg, arg2 }));
|
|
}
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Other error reporting methods
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
protected WstxException constructWfcException(String msg)
|
|
{
|
|
return new WstxParsingException(msg, getLastCharLocation());
|
|
}
|
|
|
|
/**
|
|
* Construct and return a {@link XMLStreamException} to throw
|
|
* as a result of a failed Typed Access operation (but one not
|
|
* caused by a Well-Formedness Constraint or Validation Constraint
|
|
* problem)
|
|
*/
|
|
/*
|
|
protected WstxException _constructTypeException(String msg)
|
|
{
|
|
// Hmmh. Should there be a distinct sub-type?
|
|
return new WstxParsingException(msg, getLastCharLocation());
|
|
}
|
|
*/
|
|
|
|
protected WstxException constructFromIOE(IOException ioe)
|
|
{
|
|
return new WstxIOException(ioe);
|
|
}
|
|
|
|
protected WstxException constructNullCharException()
|
|
{
|
|
return new WstxUnexpectedCharException("Illegal character (NULL, unicode 0) encountered: not valid in any content",
|
|
getLastCharLocation(), CHAR_NULL);
|
|
}
|
|
|
|
protected void throwUnexpectedChar(int i, String msg)
|
|
throws WstxException
|
|
{
|
|
char c = (char) i;
|
|
String excMsg = "Unexpected character "+getCharDesc(c)+msg;
|
|
throw new WstxUnexpectedCharException(excMsg, getLastCharLocation(), c);
|
|
}
|
|
|
|
protected void throwNullChar()
|
|
throws WstxException
|
|
{
|
|
throw constructNullCharException();
|
|
}
|
|
|
|
protected void throwInvalidSpace(int i)
|
|
throws WstxException
|
|
{
|
|
throwInvalidSpace(i, false);
|
|
}
|
|
|
|
protected WstxException throwInvalidSpace(int i, boolean deferErrors)
|
|
throws WstxException
|
|
{
|
|
char c = (char) i;
|
|
WstxException ex;
|
|
if (c == CHAR_NULL) {
|
|
ex = constructNullCharException();
|
|
} else {
|
|
String msg = "Illegal character ("+getCharDesc(c)+")";
|
|
if (mXml11) {
|
|
msg += " [note: in XML 1.1, it could be included via entity expansion]";
|
|
}
|
|
ex = new WstxUnexpectedCharException(msg, getLastCharLocation(), c);
|
|
}
|
|
if (!deferErrors) {
|
|
throw ex;
|
|
}
|
|
return ex;
|
|
}
|
|
|
|
protected void throwUnexpectedEOF(String msg)
|
|
throws WstxException
|
|
{
|
|
throw new WstxEOFException("Unexpected EOF"
|
|
+(msg == null ? "" : msg),
|
|
getLastCharLocation());
|
|
}
|
|
|
|
/**
|
|
* Similar to {@link #throwUnexpectedEOF}, but only indicates ending
|
|
* of an input block. Used when reading a token that can not span
|
|
* input block boundaries (ie. can not continue past end of an
|
|
* entity expansion).
|
|
*/
|
|
protected void throwUnexpectedEOB(String msg)
|
|
throws WstxException
|
|
{
|
|
throw new WstxEOFException("Unexpected end of input block"
|
|
+(msg == null ? "" : msg),
|
|
getLastCharLocation());
|
|
}
|
|
|
|
protected void throwFromIOE(IOException ioe)
|
|
throws WstxException
|
|
{
|
|
throw new WstxIOException(ioe);
|
|
}
|
|
|
|
protected void throwFromStrE(XMLStreamException strex)
|
|
throws WstxException
|
|
{
|
|
if (strex instanceof WstxException) {
|
|
throw (WstxException) strex;
|
|
}
|
|
WstxException newEx = new WstxException(strex);
|
|
ExceptionUtil.setInitCause(newEx, strex);
|
|
throw newEx;
|
|
}
|
|
|
|
/**
|
|
* Method called to report an error, when caller's signature only
|
|
* allows runtime exceptions to be thrown.
|
|
*/
|
|
protected void throwLazyError(Exception e)
|
|
{
|
|
if (e instanceof XMLStreamException) {
|
|
WstxLazyException.throwLazily((XMLStreamException) e);
|
|
}
|
|
ExceptionUtil.throwRuntimeException(e);
|
|
}
|
|
|
|
protected String tokenTypeDesc(int type)
|
|
{
|
|
return ErrorConsts.tokenTypeDesc(type);
|
|
}
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Input buffer handling
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Returns current input source this source uses.
|
|
*<p>
|
|
* Note: public only because some implementations are on different
|
|
* package.
|
|
*/
|
|
public final WstxInputSource getCurrentInput() {
|
|
return mInput;
|
|
}
|
|
|
|
protected final int inputInBuffer() {
|
|
return mInputEnd - mInputPtr;
|
|
}
|
|
|
|
protected final int getNext()
|
|
throws XMLStreamException
|
|
{
|
|
if (mInputPtr >= mInputEnd) {
|
|
if (!loadMore()) {
|
|
return -1;
|
|
}
|
|
}
|
|
return (int) mInputBuffer[mInputPtr++];
|
|
}
|
|
|
|
/**
|
|
* Similar to {@link #getNext}, but does not advance pointer
|
|
* in input buffer.
|
|
*<p>
|
|
* Note: this method only peeks within current input source;
|
|
* it does not close it and check nested input source (if any).
|
|
* This is necessary when checking keywords, since they can never
|
|
* cross input block boundary.
|
|
*/
|
|
protected final int peekNext()
|
|
throws XMLStreamException
|
|
{
|
|
if (mInputPtr >= mInputEnd) {
|
|
if (!loadMoreFromCurrent()) {
|
|
return -1;
|
|
}
|
|
}
|
|
return (int) mInputBuffer[mInputPtr];
|
|
}
|
|
|
|
protected final char getNextChar(String errorMsg)
|
|
throws XMLStreamException
|
|
{
|
|
if (mInputPtr >= mInputEnd) {
|
|
loadMore(errorMsg);
|
|
}
|
|
return mInputBuffer[mInputPtr++];
|
|
}
|
|
|
|
/**
|
|
* Similar to {@link #getNextChar}, but will not read more characters
|
|
* from parent input source(s) if the current input source doesn't
|
|
* have more content. This is often needed to prevent "runaway" content,
|
|
* such as comments that start in an entity but do not have matching
|
|
* close marker inside entity; XML specification specifically states
|
|
* such markup is not legal.
|
|
*/
|
|
protected final char getNextCharFromCurrent(String errorMsg)
|
|
throws XMLStreamException
|
|
{
|
|
if (mInputPtr >= mInputEnd) {
|
|
loadMoreFromCurrent(errorMsg);
|
|
}
|
|
return mInputBuffer[mInputPtr++];
|
|
}
|
|
|
|
/**
|
|
* Method that will skip through zero or more white space characters,
|
|
* and return either the character following white space, or -1 to
|
|
* indicate EOF (end of the outermost input source)/
|
|
*/
|
|
protected final int getNextAfterWS()
|
|
throws XMLStreamException
|
|
{
|
|
if (mInputPtr >= mInputEnd) {
|
|
if (!loadMore()) {
|
|
return -1;
|
|
}
|
|
}
|
|
char c = mInputBuffer[mInputPtr++];
|
|
while (c <= CHAR_SPACE) {
|
|
// Linefeed?
|
|
if (c == '\n' || c == '\r') {
|
|
skipCRLF(c);
|
|
} else if (c != CHAR_SPACE && c != '\t') {
|
|
throwInvalidSpace(c);
|
|
}
|
|
// Still a white space?
|
|
if (mInputPtr >= mInputEnd) {
|
|
if (!loadMore()) {
|
|
return -1;
|
|
}
|
|
}
|
|
c = mInputBuffer[mInputPtr++];
|
|
}
|
|
return (int) c;
|
|
}
|
|
|
|
protected final char getNextCharAfterWS(String errorMsg)
|
|
throws XMLStreamException
|
|
{
|
|
if (mInputPtr >= mInputEnd) {
|
|
loadMore(errorMsg);
|
|
}
|
|
|
|
char c = mInputBuffer[mInputPtr++];
|
|
while (c <= CHAR_SPACE) {
|
|
// Linefeed?
|
|
if (c == '\n' || c == '\r') {
|
|
skipCRLF(c);
|
|
} else if (c != CHAR_SPACE && c != '\t') {
|
|
throwInvalidSpace(c);
|
|
}
|
|
|
|
// Still a white space?
|
|
if (mInputPtr >= mInputEnd) {
|
|
loadMore(errorMsg);
|
|
}
|
|
c = mInputBuffer[mInputPtr++];
|
|
}
|
|
return c;
|
|
}
|
|
|
|
protected final char getNextInCurrAfterWS(String errorMsg)
|
|
throws XMLStreamException
|
|
{
|
|
return getNextInCurrAfterWS(errorMsg, getNextCharFromCurrent(errorMsg));
|
|
}
|
|
|
|
protected final char getNextInCurrAfterWS(String errorMsg, char c)
|
|
throws XMLStreamException
|
|
{
|
|
while (c <= CHAR_SPACE) {
|
|
// Linefeed?
|
|
if (c == '\n' || c == '\r') {
|
|
skipCRLF(c);
|
|
} else if (c != CHAR_SPACE && c != '\t') {
|
|
throwInvalidSpace(c);
|
|
}
|
|
|
|
// Still a white space?
|
|
if (mInputPtr >= mInputEnd) {
|
|
loadMoreFromCurrent(errorMsg);
|
|
}
|
|
c = mInputBuffer[mInputPtr++];
|
|
}
|
|
return c;
|
|
}
|
|
|
|
/**
|
|
* Method called when a CR has been spotted in input; checks if next
|
|
* char is LF, and if so, skips it. Note that next character has to
|
|
* come from the current input source, to qualify; it can never come
|
|
* from another (nested) input source.
|
|
*
|
|
* @return True, if passed in char is '\r' and next one is '\n'.
|
|
*/
|
|
protected final boolean skipCRLF(char c)
|
|
throws XMLStreamException
|
|
{
|
|
boolean result;
|
|
|
|
if (c == '\r' && peekNext() == '\n') {
|
|
++mInputPtr;
|
|
result = true;
|
|
} else {
|
|
result = false;
|
|
}
|
|
++mCurrInputRow;
|
|
mCurrInputRowStart = mInputPtr;
|
|
return result;
|
|
}
|
|
|
|
protected final void markLF() {
|
|
++mCurrInputRow;
|
|
mCurrInputRowStart = mInputPtr;
|
|
}
|
|
|
|
protected final void markLF(int inputPtr) {
|
|
++mCurrInputRow;
|
|
mCurrInputRowStart = inputPtr;
|
|
}
|
|
|
|
/**
|
|
* Method to push back last character read; can only be called once,
|
|
* that is, no more than one char can be guaranteed to be succesfully
|
|
* returned.
|
|
*/
|
|
protected final void pushback() { --mInputPtr; }
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Sub-class overridable input handling methods
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Method called when an entity has been expanded (new input source
|
|
* has been created). Needs to initialize location information and change
|
|
* active input source.
|
|
*
|
|
* @param entityId Name of the entity being expanded
|
|
*/
|
|
protected void initInputSource(WstxInputSource newInput, boolean isExt,
|
|
String entityId)
|
|
throws XMLStreamException
|
|
{
|
|
mInput = newInput;
|
|
// Let's make sure new input will be read next time input is needed:
|
|
mInputPtr = 0;
|
|
mInputEnd = 0;
|
|
/* Plus, reset the input location so that'll be accurate for
|
|
* error reporting etc.
|
|
*/
|
|
mInputTopDepth = mCurrDepth;
|
|
mInput.initInputLocation(this, mCurrDepth);
|
|
|
|
/* 21-Feb-2006, TSa: Linefeeds are NOT normalized when expanding
|
|
* internal entities (XML, 2.11)
|
|
*/
|
|
if (isExt) {
|
|
mNormalizeLFs = true;
|
|
} else {
|
|
mNormalizeLFs = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Method that will try to read one or more characters from currently
|
|
* open input sources; closing input sources if necessary.
|
|
*
|
|
* @return true if reading succeeded (or may succeed), false if
|
|
* we reached EOF.
|
|
*/
|
|
protected boolean loadMore()
|
|
throws XMLStreamException
|
|
{
|
|
WstxInputSource input = mInput;
|
|
do {
|
|
/* Need to make sure offsets are properly updated for error
|
|
* reporting purposes, and do this now while previous amounts
|
|
* are still known.
|
|
*/
|
|
mCurrInputProcessed += mInputEnd;
|
|
mCurrInputRowStart -= mInputEnd;
|
|
int count;
|
|
try {
|
|
count = input.readInto(this);
|
|
if (count > 0) {
|
|
return true;
|
|
}
|
|
input.close();
|
|
} catch (IOException ioe) {
|
|
throw constructFromIOE(ioe);
|
|
}
|
|
if (input == mRootInput) {
|
|
/* Note: no need to check entity/input nesting in this
|
|
* particular case, since it will be handled by higher level
|
|
* parsing code (results in an unexpected EOF)
|
|
*/
|
|
return false;
|
|
}
|
|
WstxInputSource parent = input.getParent();
|
|
if (parent == null) { // sanity check!
|
|
throwNullParent(input);
|
|
}
|
|
/* 13-Feb-2006, TSa: Ok, do we violate a proper nesting constraints
|
|
* with this input block closure?
|
|
*/
|
|
if (mCurrDepth != input.getScopeId()) {
|
|
handleIncompleteEntityProblem(input);
|
|
}
|
|
|
|
mInput = input = parent;
|
|
input.restoreContext(this);
|
|
mInputTopDepth = input.getScopeId();
|
|
/* 21-Feb-2006, TSa: Since linefeed normalization needs to be
|
|
* suppressed for internal entity expansion, we may need to
|
|
* change the state...
|
|
*/
|
|
if (!mNormalizeLFs) {
|
|
mNormalizeLFs = !input.fromInternalEntity();
|
|
}
|
|
// Maybe there are leftovers from that input in buffer now?
|
|
} while (mInputPtr >= mInputEnd);
|
|
|
|
return true;
|
|
}
|
|
|
|
protected final boolean loadMore(String errorMsg)
|
|
throws XMLStreamException
|
|
{
|
|
if (!loadMore()) {
|
|
throwUnexpectedEOF(errorMsg);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
protected boolean loadMoreFromCurrent()
|
|
throws XMLStreamException
|
|
{
|
|
// Need to update offsets properly
|
|
mCurrInputProcessed += mInputEnd;
|
|
mCurrInputRowStart -= mInputEnd;
|
|
try {
|
|
int count = mInput.readInto(this);
|
|
return (count > 0);
|
|
} catch (IOException ie) {
|
|
throw constructFromIOE(ie);
|
|
}
|
|
}
|
|
|
|
protected final boolean loadMoreFromCurrent(String errorMsg)
|
|
throws XMLStreamException
|
|
{
|
|
if (!loadMoreFromCurrent()) {
|
|
throwUnexpectedEOB(errorMsg);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Method called to make sure current main-level input buffer has at
|
|
* least specified number of characters available consequtively,
|
|
* without having to call {@link #loadMore}. It can only be called
|
|
* when input comes from main-level buffer; further, call can shift
|
|
* content in input buffer, so caller has to flush any data still
|
|
* pending. In short, caller has to know exactly what it's doing. :-)
|
|
*<p>
|
|
* Note: method does not check for any other input sources than the
|
|
* current one -- if current source can not fulfill the request, a
|
|
* failure is indicated.
|
|
*
|
|
* @return true if there's now enough data; false if not (EOF)
|
|
*/
|
|
protected boolean ensureInput(int minAmount)
|
|
throws XMLStreamException
|
|
{
|
|
int currAmount = mInputEnd - mInputPtr;
|
|
if (currAmount >= minAmount) {
|
|
return true;
|
|
}
|
|
try {
|
|
return mInput.readMore(this, minAmount);
|
|
} catch (IOException ie) {
|
|
throw constructFromIOE(ie);
|
|
}
|
|
}
|
|
|
|
protected void closeAllInput(boolean force)
|
|
throws XMLStreamException
|
|
{
|
|
WstxInputSource input = mInput;
|
|
while (true) {
|
|
try {
|
|
if (force) {
|
|
input.closeCompletely();
|
|
} else {
|
|
input.close();
|
|
}
|
|
} catch (IOException ie) {
|
|
throw constructFromIOE(ie);
|
|
}
|
|
if (input == mRootInput) {
|
|
break;
|
|
}
|
|
WstxInputSource parent = input.getParent();
|
|
if (parent == null) { // sanity check!
|
|
throwNullParent(input);
|
|
}
|
|
mInput = input = parent;
|
|
}
|
|
}
|
|
|
|
protected void throwNullParent(WstxInputSource curr)
|
|
{
|
|
throw new IllegalStateException(ErrorConsts.ERR_INTERNAL);
|
|
//throw new IllegalStateException("Internal error: null parent for input source '"+curr+"'; should never occur (should have stopped at root input '"+mRootInput+"').");
|
|
}
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Entity resolution
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Method that tries to resolve a character entity, or (if caller so
|
|
* specifies), a pre-defined internal entity (lt, gt, amp, apos, quot).
|
|
* It will succeed iff:
|
|
* <ol>
|
|
* <li>Entity in question is a simple character entity (either one of
|
|
* 5 pre-defined ones, or using decimal/hex notation), AND
|
|
* <li>
|
|
* <li>Entity fits completely inside current input buffer.
|
|
* <li>
|
|
* </ol>
|
|
* If so, character value of entity is returned. Character 0 is returned
|
|
* otherwise; if so, caller needs to do full resolution.
|
|
*<p>
|
|
* Note: On entry we are guaranteed there are at least 3 more characters
|
|
* in this buffer; otherwise we shouldn't be called.
|
|
*
|
|
* @param checkStd If true, will check pre-defined internal entities
|
|
* (gt, lt, amp, apos, quot); if false, will only check actual
|
|
* character entities.
|
|
*
|
|
* @return (Valid) character value, if entity is a character reference,
|
|
* and could be resolved from current input buffer (does not span
|
|
* buffer boundary); null char (code 0) if not (either non-char
|
|
* entity, or spans input buffer boundary).
|
|
*/
|
|
protected int resolveSimpleEntity(boolean checkStd)
|
|
throws XMLStreamException
|
|
{
|
|
char[] buf = mInputBuffer;
|
|
int ptr = mInputPtr;
|
|
char c = buf[ptr++];
|
|
|
|
// Numeric reference?
|
|
if (c == '#') {
|
|
c = buf[ptr++];
|
|
int value = 0;
|
|
int inputLen = mInputEnd;
|
|
if (c == 'x') { // hex
|
|
while (ptr < inputLen) {
|
|
c = buf[ptr++];
|
|
if (c == ';') {
|
|
break;
|
|
}
|
|
value = value << 4;
|
|
if (c <= '9' && c >= '0') {
|
|
value += (c - '0');
|
|
} else if (c >= 'a' && c <= 'f') {
|
|
value += (10 + (c - 'a'));
|
|
} else if (c >= 'A' && c <= 'F') {
|
|
value += (10 + (c - 'A'));
|
|
} else {
|
|
mInputPtr = ptr; // so error points to correct char
|
|
throwUnexpectedChar(c, "; expected a hex digit (0-9a-fA-F).");
|
|
}
|
|
/* Need to check for overflow; easiest to do right as
|
|
* it happens...
|
|
*/
|
|
if (value > MAX_UNICODE_CHAR) {
|
|
reportUnicodeOverflow();
|
|
}
|
|
}
|
|
} else { // numeric (decimal)
|
|
while (c != ';') {
|
|
if (c <= '9' && c >= '0') {
|
|
value = (value * 10) + (c - '0');
|
|
// Overflow?
|
|
if (value > MAX_UNICODE_CHAR) {
|
|
reportUnicodeOverflow();
|
|
}
|
|
} else {
|
|
mInputPtr = ptr; // so error points to correct char
|
|
throwUnexpectedChar(c, "; expected a decimal number.");
|
|
}
|
|
if (ptr >= inputLen) {
|
|
break;
|
|
}
|
|
c = buf[ptr++];
|
|
}
|
|
}
|
|
/* We get here either if we got it all, OR if we ran out of
|
|
* input in current buffer.
|
|
*/
|
|
if (c == ';') { // got the full thing
|
|
mInputPtr = ptr;
|
|
validateChar(value);
|
|
return value;
|
|
}
|
|
|
|
/* If we ran out of input, need to just fall back, gets
|
|
* resolved via 'full' resolution mechanism.
|
|
*/
|
|
} else if (checkStd) {
|
|
/* Caller may not want to resolve these quite yet...
|
|
* (when it wants separate events for non-char entities)
|
|
*/
|
|
if (c == 'a') { // amp or apos?
|
|
c = buf[ptr++];
|
|
|
|
if (c == 'm') { // amp?
|
|
if (buf[ptr++] == 'p') {
|
|
if (ptr < mInputEnd && buf[ptr++] == ';') {
|
|
mInputPtr = ptr;
|
|
return '&';
|
|
}
|
|
}
|
|
} else if (c == 'p') { // apos?
|
|
if (buf[ptr++] == 'o') {
|
|
int len = mInputEnd;
|
|
if (ptr < len && buf[ptr++] == 's') {
|
|
if (ptr < len && buf[ptr++] == ';') {
|
|
mInputPtr = ptr;
|
|
return '\'';
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else if (c == 'g') { // gt?
|
|
if (buf[ptr++] == 't' && buf[ptr++] == ';') {
|
|
mInputPtr = ptr;
|
|
return '>';
|
|
}
|
|
} else if (c == 'l') { // lt?
|
|
if (buf[ptr++] == 't' && buf[ptr++] == ';') {
|
|
mInputPtr = ptr;
|
|
return '<';
|
|
}
|
|
} else if (c == 'q') { // quot?
|
|
if (buf[ptr++] == 'u' && buf[ptr++] == 'o') {
|
|
int len = mInputEnd;
|
|
if (ptr < len && buf[ptr++] == 't') {
|
|
if (ptr < len && buf[ptr++] == ';') {
|
|
mInputPtr = ptr;
|
|
return '"';
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Method called to resolve character entities, and only character
|
|
* entities (except that pre-defined char entities -- amp, apos, lt,
|
|
* gt, quote -- MAY be "char entities" in this sense, depending on
|
|
* arguments).
|
|
* Otherwise it is to return the null char; if so,
|
|
* the input pointer will point to the same point as when method
|
|
* entered (char after ampersand), plus the ampersand itself is
|
|
* guaranteed to be in the input buffer (so caller can just push it
|
|
* back if necessary).
|
|
*<p>
|
|
* Most often this method is called when reader is not to expand
|
|
* non-char entities automatically, but to return them as separate
|
|
* events.
|
|
*<p>
|
|
* Main complication here is that we need to do 5-char lookahead. This
|
|
* is problematic if chars are on input buffer boundary. This is ok
|
|
* for the root level input buffer, but not for some nested buffers.
|
|
* However, according to XML specs, such split entities are actually
|
|
* illegal... so we can throw an exception in those cases.
|
|
*
|
|
* @param checkStd If true, will check pre-defined internal entities
|
|
* (gt, lt, amp, apos, quot) as character entities; if false, will only
|
|
* check actual 'real' character entities.
|
|
*
|
|
* @return (Valid) character value, if entity is a character reference,
|
|
* and could be resolved from current input buffer (does not span
|
|
* buffer boundary); null char (code 0) if not (either non-char
|
|
* entity, or spans input buffer boundary).
|
|
*/
|
|
protected int resolveCharOnlyEntity(boolean checkStd)
|
|
throws XMLStreamException
|
|
{
|
|
//int avail = inputInBuffer();
|
|
int avail = mInputEnd - mInputPtr;
|
|
if (avail < 6) {
|
|
// split entity, or buffer boundary
|
|
/* Don't want to lose leading '&' (in case we can not expand
|
|
* the entity), so let's push it back first
|
|
*/
|
|
--mInputPtr;
|
|
/* Shortest valid reference would be 3 chars ('&a;'); which
|
|
* would only be legal from an expanded entity...
|
|
*/
|
|
if (!ensureInput(6)) {
|
|
avail = inputInBuffer();
|
|
if (avail < 3) {
|
|
throwUnexpectedEOF(SUFFIX_IN_ENTITY_REF);
|
|
}
|
|
} else {
|
|
avail = 6;
|
|
}
|
|
// ... and now we can move pointer back as well:
|
|
++mInputPtr;
|
|
}
|
|
|
|
/* Ok, now we have one more character to check, and that's enough
|
|
* to determine type decisively.
|
|
*/
|
|
char c = mInputBuffer[mInputPtr];
|
|
|
|
// A char reference?
|
|
if (c == '#') { // yup
|
|
++mInputPtr;
|
|
return resolveCharEnt(null);
|
|
}
|
|
|
|
// nope... except may be a pre-def?
|
|
if (checkStd) {
|
|
if (c == 'a') {
|
|
char d = mInputBuffer[mInputPtr+1];
|
|
if (d == 'm') {
|
|
if (avail >= 4
|
|
&& mInputBuffer[mInputPtr+2] == 'p'
|
|
&& mInputBuffer[mInputPtr+3] == ';') {
|
|
mInputPtr += 4;
|
|
return '&';
|
|
}
|
|
} else if (d == 'p') {
|
|
if (avail >= 5
|
|
&& mInputBuffer[mInputPtr+2] == 'o'
|
|
&& mInputBuffer[mInputPtr+3] == 's'
|
|
&& mInputBuffer[mInputPtr+4] == ';') {
|
|
mInputPtr += 5;
|
|
return '\'';
|
|
}
|
|
}
|
|
} else if (c == 'l') {
|
|
if (avail >= 3
|
|
&& mInputBuffer[mInputPtr+1] == 't'
|
|
&& mInputBuffer[mInputPtr+2] == ';') {
|
|
mInputPtr += 3;
|
|
return '<';
|
|
}
|
|
} else if (c == 'g') {
|
|
if (avail >= 3
|
|
&& mInputBuffer[mInputPtr+1] == 't'
|
|
&& mInputBuffer[mInputPtr+2] == ';') {
|
|
mInputPtr += 3;
|
|
return '>';
|
|
}
|
|
} else if (c == 'q') {
|
|
if (avail >= 5
|
|
&& mInputBuffer[mInputPtr+1] == 'u'
|
|
&& mInputBuffer[mInputPtr+2] == 'o'
|
|
&& mInputBuffer[mInputPtr+3] == 't'
|
|
&& mInputBuffer[mInputPtr+4] == ';') {
|
|
mInputPtr += 5;
|
|
return '"';
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Reverse of {@link #resolveCharOnlyEntity}; will only resolve entity
|
|
* if it is NOT a character entity (or pre-defined 'generic' entity;
|
|
* amp, apos, lt, gt or quot). Only used in cases where entities
|
|
* are to be separately returned unexpanded (in non-entity-replacing
|
|
* mode); which means it's never called from dtd handler.
|
|
*/
|
|
protected EntityDecl resolveNonCharEntity()
|
|
throws XMLStreamException
|
|
{
|
|
//int avail = inputInBuffer();
|
|
int avail = mInputEnd - mInputPtr;
|
|
if (avail < 6) {
|
|
// split entity, or buffer boundary
|
|
/* Don't want to lose leading '&' (in case we can not expand
|
|
* the entity), so let's push it back first
|
|
*/
|
|
--mInputPtr;
|
|
|
|
/* Shortest valid reference would be 3 chars ('&a;'); which
|
|
* would only be legal from an expanded entity...
|
|
*/
|
|
if (!ensureInput(6)) {
|
|
avail = inputInBuffer();
|
|
if (avail < 3) {
|
|
throwUnexpectedEOF(SUFFIX_IN_ENTITY_REF);
|
|
}
|
|
} else {
|
|
avail = 6;
|
|
}
|
|
// ... and now we can move pointer back as well:
|
|
++mInputPtr;
|
|
}
|
|
|
|
// We don't care about char entities:
|
|
char c = mInputBuffer[mInputPtr];
|
|
if (c == '#') {
|
|
return null;
|
|
}
|
|
|
|
/* 19-Aug-2004, TSa: Need special handling for pre-defined
|
|
* entities; they are not counted as 'real' general parsed
|
|
* entities, but more as character entities...
|
|
*/
|
|
|
|
// have chars at least up to mInputPtr+4 by now
|
|
if (c == 'a') {
|
|
char d = mInputBuffer[mInputPtr+1];
|
|
if (d == 'm') {
|
|
if (avail >= 4
|
|
&& mInputBuffer[mInputPtr+2] == 'p'
|
|
&& mInputBuffer[mInputPtr+3] == ';') {
|
|
// If not automatically expanding:
|
|
//return sEntityAmp;
|
|
// mInputPtr += 4;
|
|
return null;
|
|
}
|
|
} else if (d == 'p') {
|
|
if (avail >= 5
|
|
&& mInputBuffer[mInputPtr+2] == 'o'
|
|
&& mInputBuffer[mInputPtr+3] == 's'
|
|
&& mInputBuffer[mInputPtr+4] == ';') {
|
|
return null;
|
|
}
|
|
}
|
|
} else if (c == 'l') {
|
|
if (avail >= 3
|
|
&& mInputBuffer[mInputPtr+1] == 't'
|
|
&& mInputBuffer[mInputPtr+2] == ';') {
|
|
return null;
|
|
}
|
|
} else if (c == 'g') {
|
|
if (avail >= 3
|
|
&& mInputBuffer[mInputPtr+1] == 't'
|
|
&& mInputBuffer[mInputPtr+2] == ';') {
|
|
return null;
|
|
}
|
|
} else if (c == 'q') {
|
|
if (avail >= 5
|
|
&& mInputBuffer[mInputPtr+1] == 'u'
|
|
&& mInputBuffer[mInputPtr+2] == 'o'
|
|
&& mInputBuffer[mInputPtr+3] == 't'
|
|
&& mInputBuffer[mInputPtr+4] == ';') {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Otherwise, let's just parse in generic way:
|
|
++mInputPtr; // since we already read the first letter
|
|
String id = parseEntityName(c);
|
|
mCurrName = id;
|
|
|
|
return findEntity(id, null);
|
|
}
|
|
|
|
/**
|
|
* Method that does full resolution of an entity reference, be it
|
|
* character entity, internal entity or external entity, including
|
|
* updating of input buffers, and depending on whether result is
|
|
* a character entity (or one of 5 pre-defined entities), returns
|
|
* char in question, or null character (code 0) to indicate it had
|
|
* to change input source.
|
|
*
|
|
* @param allowExt If true, is allowed to expand external entities
|
|
* (expanding text); if false, is not (expanding attribute value).
|
|
*
|
|
* @return Either single-character replacement (which is NOT to be
|
|
* reparsed), or null char (0) to indicate expansion is done via
|
|
* input source.
|
|
*/
|
|
protected int fullyResolveEntity(boolean allowExt)
|
|
throws XMLStreamException
|
|
{
|
|
char c = getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);
|
|
// Do we have a (numeric) character entity reference?
|
|
if (c == '#') { // numeric
|
|
final StringBuffer originalSurface = new StringBuffer("#");
|
|
int ch = resolveCharEnt(originalSurface);
|
|
if (mCfgTreatCharRefsAsEntities) {
|
|
final char[] originalChars = new char[originalSurface.length()];
|
|
originalSurface.getChars(0, originalSurface.length(), originalChars, 0);
|
|
mCurrEntity = getIntEntity(ch, originalChars);
|
|
return 0;
|
|
}
|
|
return ch;
|
|
}
|
|
|
|
String id = parseEntityName(c);
|
|
|
|
// Perhaps we have a pre-defined char reference?
|
|
c = id.charAt(0);
|
|
/*
|
|
* 16-May-2004, TSa: Should custom entities (or ones defined in int/ext subset) override
|
|
* pre-defined settings for these?
|
|
*/
|
|
char d = CHAR_NULL;
|
|
if (c == 'a') { // amp or apos?
|
|
if (id.equals("amp")) {
|
|
d = '&';
|
|
} else if (id.equals("apos")) {
|
|
d = '\'';
|
|
}
|
|
} else if (c == 'g') { // gt?
|
|
if (id.length() == 2 && id.charAt(1) == 't') {
|
|
d = '>';
|
|
}
|
|
} else if (c == 'l') { // lt?
|
|
if (id.length() == 2 && id.charAt(1) == 't') {
|
|
d = '<';
|
|
}
|
|
} else if (c == 'q') { // quot?
|
|
if (id.equals("quot")) {
|
|
d = '"';
|
|
}
|
|
}
|
|
|
|
if (d != CHAR_NULL) {
|
|
if (mCfgTreatCharRefsAsEntities) {
|
|
final char[] originalChars = new char[id.length()];
|
|
id.getChars(0, id.length(), originalChars, 0);
|
|
mCurrEntity = getIntEntity(d, originalChars);
|
|
return 0;
|
|
}
|
|
return d;
|
|
}
|
|
|
|
final EntityDecl e = expandEntity(id, allowExt, null);
|
|
if (mCfgTreatCharRefsAsEntities) {
|
|
mCurrEntity = e;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Returns an entity (possibly from cache) for the argument character using the encoded
|
|
* representation in mInputBuffer[entityStartPos ... mInputPtr-1].
|
|
*/
|
|
protected EntityDecl getIntEntity(int ch, final char[] originalChars)
|
|
{
|
|
String cacheKey = new String(originalChars);
|
|
|
|
IntEntity entity = (IntEntity) mCachedEntities.get(cacheKey);
|
|
if (entity == null) {
|
|
String repl;
|
|
if (ch <= 0xFFFF) {
|
|
repl = Character.toString((char) ch);
|
|
} else {
|
|
StringBuffer sb = new StringBuffer(2);
|
|
ch -= 0x10000;
|
|
sb.append((char) ((ch >> 10) + 0xD800));
|
|
sb.append((char) ((ch & 0x3FF) + 0xDC00));
|
|
repl = sb.toString();
|
|
}
|
|
entity = IntEntity.create(new String(originalChars), repl);
|
|
mCachedEntities.put(cacheKey, entity);
|
|
}
|
|
return entity;
|
|
}
|
|
|
|
|
|
/**
|
|
* Helper method that will try to expand a parsed entity (parameter or
|
|
* generic entity).
|
|
*<p>
|
|
* note: called by sub-classes (dtd parser), needs to be protected.
|
|
*
|
|
* @param id Name of the entity being expanded
|
|
* @param allowExt Whether external entities can be expanded or not; if
|
|
* not, and the entity to expand would be external one, an exception
|
|
* will be thrown
|
|
*/
|
|
protected EntityDecl expandEntity(String id, boolean allowExt,
|
|
Object extraArg)
|
|
throws XMLStreamException
|
|
{
|
|
mCurrName = id;
|
|
|
|
EntityDecl ed = findEntity(id, extraArg);
|
|
|
|
if (ed == null) {
|
|
/* 30-Sep-2005, TSa: As per [WSTX-5], let's only throw exception
|
|
* if we have to resolve it (otherwise it's just best-effort,
|
|
* and null is ok)
|
|
*/
|
|
/* 02-Oct-2005, TSa: Plus, [WSTX-4] adds "undeclared entity
|
|
* resolver"
|
|
*/
|
|
if (mCfgReplaceEntities) {
|
|
mCurrEntity = expandUnresolvedEntity(id);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
if (!mCfgTreatCharRefsAsEntities || this instanceof MinimalDTDReader) {
|
|
expandEntity(ed, allowExt);
|
|
}
|
|
|
|
return ed;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*<p>
|
|
* note: defined as private for documentation, ie. it's just called
|
|
* from within this class (not sub-classes), from one specific method
|
|
* (see above)
|
|
*
|
|
* @param ed Entity to be expanded
|
|
* @param allowExt Whether external entities are allowed or not.
|
|
*/
|
|
private void expandEntity(EntityDecl ed, boolean allowExt)
|
|
throws XMLStreamException
|
|
{
|
|
String id = ed.getName();
|
|
|
|
/* Very first thing; we can immediately check if expanding
|
|
* this entity would result in infinite recursion:
|
|
*/
|
|
if (mInput.isOrIsExpandedFrom(id)) {
|
|
throwRecursionError(id);
|
|
}
|
|
|
|
/* Should not refer unparsed entities from attribute values
|
|
* or text content (except via notation mechanism, but that's
|
|
* not parsed here)
|
|
*/
|
|
if (!ed.isParsed()) {
|
|
throwParseError("Illegal reference to unparsed external entity \"{0}\"", id, null);
|
|
}
|
|
|
|
// 28-Jun-2004, TSa: Do we support external entity expansion?
|
|
boolean isExt = ed.isExternal();
|
|
if (isExt) {
|
|
if (!allowExt) { // never ok in attribute value...
|
|
throwParseError("Encountered a reference to external parsed entity \"{0}\" when expanding attribute value: not legal as per XML 1.0/1.1 #3.1", id, null);
|
|
}
|
|
if (!mConfig.willSupportExternalEntities()) {
|
|
throwParseError("Encountered a reference to external entity \"{0}\", but stream reader has feature \"{1}\" disabled",
|
|
id, XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES);
|
|
}
|
|
}
|
|
|
|
// First, let's give current context chance to save its stuff
|
|
WstxInputSource oldInput = mInput;
|
|
oldInput.saveContext(this);
|
|
WstxInputSource newInput = null;
|
|
try {
|
|
newInput = ed.expand(oldInput, mEntityResolver, mConfig, mDocXmlVersion);
|
|
} catch (FileNotFoundException fex) {
|
|
/* Let's catch and rethrow this just so we get more meaningful
|
|
* description (with input source position etc)
|
|
*/
|
|
throwParseError("(was {0}) {1}", fex.getClass().getName(), fex.getMessage());
|
|
} catch (IOException ioe) {
|
|
throw constructFromIOE(ioe);
|
|
}
|
|
/* And then we'll need to make sure new input comes from the new
|
|
* input source
|
|
*/
|
|
initInputSource(newInput, isExt, id);
|
|
}
|
|
|
|
/**
|
|
*<p>
|
|
* note: only called from the local expandEntity() method
|
|
*/
|
|
private EntityDecl expandUnresolvedEntity(String id)
|
|
throws XMLStreamException
|
|
{
|
|
XMLResolver resolver = mConfig.getUndeclaredEntityResolver();
|
|
if (resolver != null) {
|
|
/* Ok, we can check for recursion here; but let's only do that
|
|
* if there is any chance that it might get resolved by
|
|
* the special resolver (it must have been resolved this way
|
|
* earlier, too...)
|
|
*/
|
|
if (mInput.isOrIsExpandedFrom(id)) {
|
|
throwRecursionError(id);
|
|
}
|
|
|
|
WstxInputSource oldInput = mInput;
|
|
oldInput.saveContext(this);
|
|
// null, null -> no public or system ids
|
|
int xmlVersion = mDocXmlVersion;
|
|
// 05-Feb-2006, TSa: If xmlVersion not explicitly known, defaults to 1.0
|
|
if (xmlVersion == XmlConsts.XML_V_UNKNOWN) {
|
|
xmlVersion = XmlConsts.XML_V_10;
|
|
}
|
|
WstxInputSource newInput;
|
|
try {
|
|
newInput = DefaultInputResolver.resolveEntityUsing
|
|
(oldInput, id, null, null, resolver, mConfig, xmlVersion);
|
|
if (mCfgTreatCharRefsAsEntities) {
|
|
return new IntEntity(WstxInputLocation.getEmptyLocation(), newInput.getEntityId(),
|
|
newInput.getSource(), new char[]{}, WstxInputLocation.getEmptyLocation());
|
|
}
|
|
} catch (IOException ioe) {
|
|
throw constructFromIOE(ioe);
|
|
}
|
|
if (newInput != null) {
|
|
// true -> is external
|
|
initInputSource(newInput, true, id);
|
|
return null;
|
|
}
|
|
}
|
|
handleUndeclaredEntity(id);
|
|
return null;
|
|
}
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Abstract methods for sub-classes to implement
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Abstract method for sub-classes to implement, for finding
|
|
* a declared general or parsed entity.
|
|
*
|
|
* @param id Identifier of the entity to find
|
|
* @param arg Optional argument passed from caller; needed by DTD
|
|
* reader.
|
|
*/
|
|
protected abstract EntityDecl findEntity(String id, Object arg)
|
|
throws XMLStreamException;
|
|
|
|
/**
|
|
* This method gets called if a declaration for an entity was not
|
|
* found in entity expanding mode (enabled by default for xml reader,
|
|
* always enabled for dtd reader).
|
|
*/
|
|
protected abstract void handleUndeclaredEntity(String id)
|
|
throws XMLStreamException;
|
|
|
|
protected abstract void handleIncompleteEntityProblem(WstxInputSource closing)
|
|
throws XMLStreamException;
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Basic tokenization
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
/**
|
|
* Method that will parse name token (roughly equivalent to XML specs;
|
|
* although bit lenier for more efficient handling); either uri prefix,
|
|
* or local name.
|
|
*<p>
|
|
* Much of complexity in this method has to do with the intention to
|
|
* try to avoid any character copies. In this optimal case algorithm
|
|
* would be fairly simple. However, this only works if all data is
|
|
* already in input buffer... if not, copy has to be made halfway
|
|
* through parsing, and that complicates things.
|
|
*<p>
|
|
* One thing to note is that String returned has been canonicalized
|
|
* and (if necessary) added to symbol table. It can thus be compared
|
|
* against other such (usually id) Strings, with simple equality operator.
|
|
*
|
|
* @param c First character of the name; not yet checked for validity
|
|
*
|
|
* @return Canonicalized name String (which may have length 0, if
|
|
* EOF or non-name-start char encountered)
|
|
*/
|
|
protected String parseLocalName(char c)
|
|
throws XMLStreamException
|
|
{
|
|
/* Has to start with letter, or '_' (etc); we won't allow ':' as that
|
|
* is taken as namespace separator; no use trying to optimize
|
|
* heavily as it's 98% likely it is a valid char...
|
|
*/
|
|
if (!isNameStartChar(c)) {
|
|
if (c == ':') {
|
|
throwUnexpectedChar(c, " (missing namespace prefix?)");
|
|
}
|
|
throwUnexpectedChar(c, " (expected a name start character)");
|
|
}
|
|
|
|
int ptr = mInputPtr;
|
|
int hash = (int) c;
|
|
final int inputLen = mInputEnd;
|
|
int startPtr = ptr-1; // already read previous char
|
|
final char[] inputBuf = mInputBuffer;
|
|
|
|
/* After which there may be zero or more name chars
|
|
* we have to consider
|
|
*/
|
|
while (true) {
|
|
if (ptr >= inputLen) {
|
|
/* Ok, identifier may continue past buffer end, need
|
|
* to continue with part 2 (separate method, as this is
|
|
* not as common as having it all in buffer)
|
|
*/
|
|
mInputPtr = ptr;
|
|
return parseLocalName2(startPtr, hash);
|
|
}
|
|
// Ok, we have the char... is it a name char?
|
|
c = inputBuf[ptr];
|
|
if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {
|
|
break;
|
|
}
|
|
if (!isNameChar(c)) {
|
|
break;
|
|
}
|
|
hash = (hash * 31) + (int) c;
|
|
++ptr;
|
|
}
|
|
mInputPtr = ptr;
|
|
return mSymbols.findSymbol(mInputBuffer, startPtr, ptr - startPtr, hash);
|
|
}
|
|
|
|
/**
|
|
* Second part of name token parsing; called when name can continue
|
|
* past input buffer end (so only part was read before calling this
|
|
* method to read the rest).
|
|
*<p>
|
|
* Note that this isn't heavily optimized, on assumption it's not
|
|
* called very often.
|
|
*/
|
|
protected String parseLocalName2(int start, int hash)
|
|
throws XMLStreamException
|
|
{
|
|
int ptr = mInputEnd - start;
|
|
// Let's assume fairly short names
|
|
char[] outBuf = getNameBuffer(ptr+8);
|
|
|
|
if (ptr > 0) {
|
|
System.arraycopy(mInputBuffer, start, outBuf, 0, ptr);
|
|
}
|
|
|
|
int outLen = outBuf.length;
|
|
while (true) {
|
|
// note: names can not cross input block (entity) boundaries...
|
|
if (mInputPtr >= mInputEnd) {
|
|
if (!loadMoreFromCurrent()) {
|
|
break;
|
|
}
|
|
}
|
|
char c = mInputBuffer[mInputPtr];
|
|
if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {
|
|
break;
|
|
}
|
|
if (!isNameChar(c)) {
|
|
break;
|
|
}
|
|
++mInputPtr;
|
|
if (ptr >= outLen) {
|
|
mNameBuffer = outBuf = expandBy50Pct(outBuf);
|
|
outLen = outBuf.length;
|
|
}
|
|
outBuf[ptr++] = c;
|
|
hash = (hash * 31) + (int) c;
|
|
}
|
|
// Still need to canonicalize the name:
|
|
return mSymbols.findSymbol(outBuf, 0, ptr, hash);
|
|
}
|
|
|
|
/**
|
|
* Method that will parse 'full' name token; what full means depends on
|
|
* whether reader is namespace aware or not. If it is, full name means
|
|
* local name with no namespace prefix (PI target, entity/notation name);
|
|
* if not, name can contain arbitrary number of colons. Note that
|
|
* element and attribute names are NOT parsed here, so actual namespace
|
|
* prefix separation can be handled properly there.
|
|
*<p>
|
|
* Similar to {@link #parseLocalName}, much of complexity stems from
|
|
* trying to avoid copying name characters from input buffer.
|
|
*<p>
|
|
* Note that returned String will be canonicalized, similar to
|
|
* {@link #parseLocalName}, but without separating prefix/local name.
|
|
*
|
|
* @return Canonicalized name String (which may have length 0, if
|
|
* EOF or non-name-start char encountered)
|
|
*/
|
|
protected String parseFullName()
|
|
throws XMLStreamException
|
|
{
|
|
if (mInputPtr >= mInputEnd) {
|
|
loadMoreFromCurrent();
|
|
}
|
|
return parseFullName(mInputBuffer[mInputPtr++]);
|
|
}
|
|
|
|
protected String parseFullName(char c)
|
|
throws XMLStreamException
|
|
{
|
|
// First char has special handling:
|
|
if (!isNameStartChar(c)) {
|
|
if (c == ':') { // no name.... generally an error:
|
|
if (mCfgNsEnabled) {
|
|
throwNsColonException(parseFNameForError());
|
|
}
|
|
// Ok, that's fine actually
|
|
} else {
|
|
if (c <= CHAR_SPACE) {
|
|
throwUnexpectedChar(c, " (missing name?)");
|
|
}
|
|
throwUnexpectedChar(c, " (expected a name start character)");
|
|
}
|
|
}
|
|
|
|
int ptr = mInputPtr;
|
|
int hash = (int) c;
|
|
int inputLen = mInputEnd;
|
|
int startPtr = ptr-1; // to account for the first char
|
|
|
|
/* After which there may be zero or more name chars
|
|
* we have to consider
|
|
*/
|
|
while (true) {
|
|
if (ptr >= inputLen) {
|
|
/* Ok, identifier may continue past buffer end, need
|
|
* to continue with part 2 (separate method, as this is
|
|
* not as common as having it all in buffer)
|
|
*/
|
|
mInputPtr = ptr;
|
|
return parseFullName2(startPtr, hash);
|
|
}
|
|
c = mInputBuffer[ptr];
|
|
if (c == ':') { // colon only allowed in non-NS mode
|
|
if (mCfgNsEnabled) {
|
|
mInputPtr = ptr;
|
|
throwNsColonException(new String(mInputBuffer, startPtr, ptr - startPtr) + parseFNameForError());
|
|
}
|
|
} else {
|
|
if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {
|
|
break;
|
|
}
|
|
if (!isNameChar(c)) {
|
|
break;
|
|
}
|
|
}
|
|
hash = (hash * 31) + (int) c;
|
|
++ptr;
|
|
}
|
|
mInputPtr = ptr;
|
|
return mSymbols.findSymbol(mInputBuffer, startPtr, ptr - startPtr, hash);
|
|
}
|
|
|
|
protected String parseFullName2(int start, int hash)
|
|
throws XMLStreamException
|
|
{
|
|
int ptr = mInputEnd - start;
|
|
// Let's assume fairly short names
|
|
char[] outBuf = getNameBuffer(ptr+8);
|
|
|
|
if (ptr > 0) {
|
|
System.arraycopy(mInputBuffer, start, outBuf, 0, ptr);
|
|
}
|
|
|
|
int outLen = outBuf.length;
|
|
while (true) {
|
|
/* 06-Sep-2004, TSa: Name tokens are not allowed to continue
|
|
* past entity expansion ranges... that is, all characters
|
|
* have to come from the same input source. Thus, let's only
|
|
* load things from same input level
|
|
*/
|
|
if (mInputPtr >= mInputEnd) {
|
|
if (!loadMoreFromCurrent()) {
|
|
break;
|
|
}
|
|
}
|
|
char c = mInputBuffer[mInputPtr];
|
|
if (c == ':') { // colon only allowed in non-NS mode
|
|
if (mCfgNsEnabled) {
|
|
throwNsColonException(new String(outBuf, 0, ptr) + c + parseFNameForError());
|
|
}
|
|
} else if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {
|
|
break;
|
|
} else if (!isNameChar(c)) {
|
|
break;
|
|
}
|
|
++mInputPtr;
|
|
|
|
if (ptr >= outLen) {
|
|
mNameBuffer = outBuf = expandBy50Pct(outBuf);
|
|
outLen = outBuf.length;
|
|
}
|
|
outBuf[ptr++] = c;
|
|
hash = (hash * 31) + (int) c;
|
|
}
|
|
|
|
// Still need to canonicalize the name:
|
|
return mSymbols.findSymbol(outBuf, 0, ptr, hash);
|
|
}
|
|
|
|
/**
|
|
* Method called to read in full name, including unlimited number of
|
|
* namespace separators (':'), for the purpose of displaying name in
|
|
* an error message. Won't do any further validations, and parsing
|
|
* is not optimized: main need is just to get more meaningful error
|
|
* messages.
|
|
*/
|
|
protected String parseFNameForError()
|
|
throws XMLStreamException
|
|
{
|
|
StringBuffer sb = new StringBuffer(100);
|
|
while (true) {
|
|
char c;
|
|
|
|
if (mInputPtr < mInputEnd) {
|
|
c = mInputBuffer[mInputPtr++];
|
|
} else { // can't error here, so let's accept EOF for now:
|
|
int i = getNext();
|
|
if (i < 0) {
|
|
break;
|
|
}
|
|
c = (char) i;
|
|
}
|
|
if (c != ':' && !isNameChar(c)) {
|
|
--mInputPtr;
|
|
break;
|
|
}
|
|
sb.append(c);
|
|
}
|
|
return sb.toString();
|
|
}
|
|
|
|
protected final String parseEntityName(char c)
|
|
throws XMLStreamException
|
|
{
|
|
String id = parseFullName(c);
|
|
// Needs to be followed by a semi-colon, too.. from same input source:
|
|
if (mInputPtr >= mInputEnd) {
|
|
if (!loadMoreFromCurrent()) {
|
|
throwParseError("Missing semicolon after reference for entity \"{0}\"", id, null);
|
|
}
|
|
}
|
|
c = mInputBuffer[mInputPtr++];
|
|
if (c != ';') {
|
|
throwUnexpectedChar(c, "; expected a semi-colon after the reference for entity '"+id+"'");
|
|
}
|
|
return id;
|
|
}
|
|
|
|
/**
|
|
* Note: does not check for number of colons, amongst other things.
|
|
* Main idea is to skip through what superficially seems like a valid
|
|
* id, nothing more. This is only done when really skipping through
|
|
* something we do not care about at all: not even whether names/ids
|
|
* would be valid (for example, when ignoring internal DTD subset).
|
|
*
|
|
* @return Length of skipped name.
|
|
*/
|
|
protected int skipFullName(char c)
|
|
throws XMLStreamException
|
|
{
|
|
if (!isNameStartChar(c)) {
|
|
--mInputPtr;
|
|
return 0;
|
|
}
|
|
|
|
/* After which there may be zero or more name chars
|
|
* we have to consider
|
|
*/
|
|
int count = 1;
|
|
while (true) {
|
|
c = (mInputPtr < mInputEnd) ?
|
|
mInputBuffer[mInputPtr++] : getNextChar(SUFFIX_EOF_EXP_NAME);
|
|
if (c != ':' && !isNameChar(c)) {
|
|
break;
|
|
}
|
|
++count;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
* Simple parsing method that parses system ids, which are generally
|
|
* used in entities (from DOCTYPE declaration to internal/external
|
|
* subsets).
|
|
*<p>
|
|
* NOTE: returned String is not canonicalized, on assumption that
|
|
* external ids may be longish, and are not shared all that often, as
|
|
* they are generally just used for resolving paths, if anything.
|
|
*<br />
|
|
* Also note that this method is not heavily optimized, as it's not
|
|
* likely to be a bottleneck for parsing.
|
|
*/
|
|
protected final String parseSystemId(char quoteChar, boolean convertLFs,
|
|
String errorMsg)
|
|
throws XMLStreamException
|
|
{
|
|
char[] buf = getNameBuffer(-1);
|
|
int ptr = 0;
|
|
|
|
while (true) {
|
|
char c = (mInputPtr < mInputEnd) ?
|
|
mInputBuffer[mInputPtr++] : getNextChar(errorMsg);
|
|
if (c == quoteChar) {
|
|
break;
|
|
}
|
|
/* ??? 14-Jun-2004, TSa: Should we normalize linefeeds or not?
|
|
* It seems like we should, for all input... so that's the way it
|
|
* works.
|
|
*/
|
|
if (c == '\n') {
|
|
markLF();
|
|
} else if (c == '\r') {
|
|
if (peekNext() == '\n') {
|
|
++mInputPtr;
|
|
if (!convertLFs) {
|
|
/* The only tricky thing; need to preserve 2-char LF; need to
|
|
* output one char from here, then can fall back to default:
|
|
*/
|
|
if (ptr >= buf.length) {
|
|
buf = expandBy50Pct(buf);
|
|
}
|
|
buf[ptr++] = '\r';
|
|
}
|
|
c = '\n';
|
|
} else if (convertLFs) {
|
|
c = '\n';
|
|
}
|
|
}
|
|
|
|
// Other than that, let's just append it:
|
|
if (ptr >= buf.length) {
|
|
buf = expandBy50Pct(buf);
|
|
}
|
|
buf[ptr++] = c;
|
|
}
|
|
|
|
return (ptr == 0) ? "" : new String(buf, 0, ptr);
|
|
}
|
|
|
|
/**
|
|
* Simple parsing method that parses system ids, which are generally
|
|
* used in entities (from DOCTYPE declaration to internal/external
|
|
* subsets).
|
|
*<p>
|
|
* As per xml specs, the contents are actually normalized.
|
|
*<p>
|
|
* NOTE: returned String is not canonicalized, on assumption that
|
|
* external ids may be longish, and are not shared all that often, as
|
|
* they are generally just used for resolving paths, if anything.
|
|
*<br />
|
|
* Also note that this method is not heavily optimized, as it's not
|
|
* likely to be a bottleneck for parsing.
|
|
*/
|
|
protected final String parsePublicId(char quoteChar, String errorMsg)
|
|
throws XMLStreamException
|
|
{
|
|
char[] buf = getNameBuffer(-1);
|
|
int ptr = 0;
|
|
boolean spaceToAdd = false;
|
|
|
|
while (true) {
|
|
char c = (mInputPtr < mInputEnd) ?
|
|
mInputBuffer[mInputPtr++] : getNextChar(errorMsg);
|
|
if (c == quoteChar) {
|
|
break;
|
|
}
|
|
if (c == '\n') {
|
|
markLF();
|
|
spaceToAdd = true;
|
|
continue;
|
|
} else if (c == '\r') {
|
|
if (peekNext() == '\n') {
|
|
++mInputPtr;
|
|
}
|
|
spaceToAdd = true;
|
|
continue;
|
|
} else if (c == CHAR_SPACE) {
|
|
spaceToAdd = true;
|
|
continue;
|
|
} else {
|
|
// Verify it's a legal pubid char (see XML spec, #13, from 2.3)
|
|
if ((c >= VALID_PUBID_CHAR_COUNT)
|
|
|| sPubidValidity[c] != PUBID_CHAR_VALID_B) {
|
|
throwUnexpectedChar(c, " in public identifier");
|
|
}
|
|
}
|
|
|
|
// Other than that, let's just append it:
|
|
if (ptr >= buf.length) {
|
|
buf = expandBy50Pct(buf);
|
|
}
|
|
/* Space-normalization means scrapping leading and trailing
|
|
* white space, and coalescing remaining ws into single spaces.
|
|
*/
|
|
if (spaceToAdd) { // pending white space to add?
|
|
if (c == CHAR_SPACE) { // still a space; let's skip
|
|
continue;
|
|
}
|
|
/* ok: if we have non-space, we'll either forget about
|
|
* space(s) (if nothing has been output, ie. leading space),
|
|
* or output a single space (in-between non-white space)
|
|
*/
|
|
spaceToAdd = false;
|
|
if (ptr > 0) {
|
|
buf[ptr++] = CHAR_SPACE;
|
|
if (ptr >= buf.length) {
|
|
buf = expandBy50Pct(buf);
|
|
}
|
|
}
|
|
}
|
|
buf[ptr++] = c;
|
|
}
|
|
|
|
return (ptr == 0) ? "" : new String(buf, 0, ptr);
|
|
}
|
|
|
|
protected final void parseUntil(TextBuffer tb, char endChar, boolean convertLFs,
|
|
String errorMsg)
|
|
throws XMLStreamException
|
|
{
|
|
// Let's first ensure we have some data in there...
|
|
if (mInputPtr >= mInputEnd) {
|
|
loadMore(errorMsg);
|
|
}
|
|
while (true) {
|
|
// Let's loop consequtive 'easy' spans:
|
|
char[] inputBuf = mInputBuffer;
|
|
int inputLen = mInputEnd;
|
|
int ptr = mInputPtr;
|
|
int startPtr = ptr;
|
|
while (ptr < inputLen) {
|
|
char c = inputBuf[ptr++];
|
|
if (c == endChar) {
|
|
int thisLen = ptr - startPtr - 1;
|
|
if (thisLen > 0) {
|
|
tb.append(inputBuf, startPtr, thisLen);
|
|
}
|
|
mInputPtr = ptr;
|
|
return;
|
|
}
|
|
if (c == '\n') {
|
|
mInputPtr = ptr; // markLF() requires this
|
|
markLF();
|
|
} else if (c == '\r') {
|
|
if (!convertLFs && ptr < inputLen) {
|
|
if (inputBuf[ptr] == '\n') {
|
|
++ptr;
|
|
}
|
|
mInputPtr = ptr;
|
|
markLF();
|
|
} else {
|
|
int thisLen = ptr - startPtr - 1;
|
|
if (thisLen > 0) {
|
|
tb.append(inputBuf, startPtr, thisLen);
|
|
}
|
|
mInputPtr = ptr;
|
|
c = getNextChar(errorMsg);
|
|
if (c != '\n') {
|
|
--mInputPtr; // pusback
|
|
tb.append(convertLFs ? '\n' : '\r');
|
|
} else {
|
|
if (convertLFs) {
|
|
tb.append('\n');
|
|
} else {
|
|
tb.append('\r');
|
|
tb.append('\n');
|
|
}
|
|
}
|
|
startPtr = ptr = mInputPtr;
|
|
markLF();
|
|
}
|
|
}
|
|
}
|
|
int thisLen = ptr - startPtr;
|
|
if (thisLen > 0) {
|
|
tb.append(inputBuf, startPtr, thisLen);
|
|
}
|
|
loadMore(errorMsg);
|
|
startPtr = ptr = mInputPtr;
|
|
inputBuf = mInputBuffer;
|
|
inputLen = mInputEnd;
|
|
}
|
|
}
|
|
|
|
/*
|
|
///////////////////////////////////////////////////////////
|
|
// Internal methods
|
|
///////////////////////////////////////////////////////////
|
|
*/
|
|
|
|
private int resolveCharEnt(StringBuffer originalCharacters)
|
|
throws XMLStreamException
|
|
{
|
|
int value = 0;
|
|
char c = getNextChar(SUFFIX_IN_ENTITY_REF);
|
|
|
|
if (originalCharacters != null) {
|
|
originalCharacters.append(c);
|
|
}
|
|
|
|
if (c == 'x') { // hex
|
|
while (true) {
|
|
c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]
|
|
: getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);
|
|
if (c == ';') {
|
|
break;
|
|
}
|
|
|
|
if (originalCharacters != null) {
|
|
originalCharacters.append(c);
|
|
}
|
|
value = value << 4;
|
|
if (c <= '9' && c >= '0') {
|
|
value += (c - '0');
|
|
} else if (c >= 'a' && c <= 'f') {
|
|
value += 10 + (c - 'a');
|
|
} else if (c >= 'A' && c <= 'F') {
|
|
value += 10 + (c - 'A');
|
|
} else {
|
|
throwUnexpectedChar(c, "; expected a hex digit (0-9a-fA-F).");
|
|
}
|
|
// Overflow?
|
|
if (value > MAX_UNICODE_CHAR) {
|
|
reportUnicodeOverflow();
|
|
}
|
|
}
|
|
} else { // numeric (decimal)
|
|
while (c != ';') {
|
|
if (c <= '9' && c >= '0') {
|
|
value = (value * 10) + (c - '0');
|
|
// Overflow?
|
|
if (value > MAX_UNICODE_CHAR) {
|
|
reportUnicodeOverflow();
|
|
}
|
|
} else {
|
|
throwUnexpectedChar(c, "; expected a decimal number.");
|
|
}
|
|
c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]
|
|
: getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);
|
|
|
|
if (originalCharacters != null && c != ';') {
|
|
originalCharacters.append(c);
|
|
}
|
|
}
|
|
}
|
|
validateChar(value);
|
|
return value;
|
|
}
|
|
|
|
/**
|
|
* Method that will verify that expanded Unicode codepoint is a valid
|
|
* XML content character.
|
|
*/
|
|
private final void validateChar(int value)
|
|
throws XMLStreamException
|
|
{
|
|
/* 24-Jan-2006, TSa: Ok, "high" Unicode chars are problematic,
|
|
* need to be reported by a surrogate pair..
|
|
*/
|
|
if (value >= 0xD800) {
|
|
if (value < 0xE000) { // no surrogates via entity expansion
|
|
reportIllegalChar(value);
|
|
}
|
|
if (value > 0xFFFF) {
|
|
// Within valid range at all?
|
|
if (value > MAX_UNICODE_CHAR) {
|
|
reportUnicodeOverflow();
|
|
}
|
|
} else if (value >= 0xFFFE) { // 0xFFFE and 0xFFFF are illegal too
|
|
reportIllegalChar(value);
|
|
}
|
|
// Ok, fine as is
|
|
} else if (value < 32) {
|
|
if (value == 0) {
|
|
throwParseError("Invalid character reference: null character not allowed in XML content.");
|
|
}
|
|
// XML 1.1 allows most other chars; 1.0 does not: However Exchange sends such chars with XML 1.0
|
|
//if (!mXml11 &&
|
|
// (value != 0x9 && value != 0xA && value != 0xD)) {
|
|
// reportIllegalChar(value);
|
|
//}
|
|
}
|
|
}
|
|
|
|
protected final char[] getNameBuffer(int minSize)
|
|
{
|
|
char[] buf = mNameBuffer;
|
|
|
|
if (buf == null) {
|
|
mNameBuffer = buf = new char[(minSize > 48) ? (minSize+16) : 64];
|
|
} else if (minSize >= buf.length) { // let's allow one char extra...
|
|
int len = buf.length;
|
|
len += (len >> 1); // grow by 50%
|
|
mNameBuffer = buf = new char[(minSize >= len) ? (minSize+16) : len];
|
|
}
|
|
return buf;
|
|
}
|
|
|
|
protected final char[] expandBy50Pct(char[] buf)
|
|
{
|
|
int len = buf.length;
|
|
char[] newBuf = new char[len + (len >> 1)];
|
|
System.arraycopy(buf, 0, newBuf, 0, len);
|
|
return newBuf;
|
|
}
|
|
|
|
/**
|
|
* Method called to throw an exception indicating that a name that
|
|
* should not be namespace-qualified (PI target, entity/notation name)
|
|
* is one, and reader is namespace aware.
|
|
*/
|
|
private void throwNsColonException(String name)
|
|
throws XMLStreamException
|
|
{
|
|
throwParseError("Illegal name \"{0}\" (PI target, entity/notation name): can not contain a colon (XML Namespaces 1.0#6)", name, null);
|
|
}
|
|
|
|
private void throwRecursionError(String entityName)
|
|
throws XMLStreamException
|
|
{
|
|
throwParseError("Illegal entity expansion: entity \"{0}\" expands itself recursively.", entityName, null);
|
|
}
|
|
|
|
private void reportUnicodeOverflow()
|
|
throws XMLStreamException
|
|
{
|
|
throwParseError("Illegal character entity: value higher than max allowed (0x{0})", Integer.toHexString(MAX_UNICODE_CHAR), null);
|
|
}
|
|
|
|
private void reportIllegalChar(int value)
|
|
throws XMLStreamException
|
|
{
|
|
throwParseError("Illegal character entity: expansion character (code 0x{0}", Integer.toHexString(value), null);
|
|
}
|
|
}
|