|
|||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||||
java.lang.Objectorg.cyberneko.html.filters.DefaultFilter
org.cyberneko.html.filters.Purifier
This filter purifies the HTML input to ensure XML well-formedness. The purification process includes:
Illegal characters in XML names are converted to the character sequence "_u####_" where "####" is the value of the Unicode character represented in hexadecimal. Whereas illegal characters appearing in document content is converted to the character sequence "\\u####".
In comments, the character '-' is replaced by the character sequence "- " to prevent "--" from ever appearing in the comment content. For CDATA sections, the character ']' is replaced by the character sequence "] " to prevent "]]" from appearing.
The URI used for synthesized namespace bindings is "http://cyberneko.org/html/ns/synthesized/number" where number is generated to ensure uniqueness.
| Field Summary | |
protected static java.lang.String |
AUGMENTATIONS
Include infoset augmentations. |
protected boolean |
fAugmentations
Augmentations. |
protected boolean |
fInCDATASection
True if inside a CDATA section. |
protected org.apache.xerces.xni.NamespaceContext |
fNamespaceContext
Namespace information. |
protected boolean |
fNamespaces
Namespaces. |
protected java.lang.String |
fPublicId
Public identifier of doctype declaration. |
protected boolean |
fSeenDoctype
True if the doctype declaration was seen. |
protected boolean |
fSeenRootElement
True if root element was seen. |
protected int |
fSynthesizedNamespaceCount
Synthesized namespace binding count. |
protected java.lang.String |
fSystemId
System identifier of doctype declaration. |
protected static java.lang.String |
NAMESPACES
Namespaces. |
protected static HTMLEventInfo |
SYNTHESIZED_ITEM
Synthesized event info item. |
static java.lang.String |
SYNTHESIZED_NAMESPACE_PREFX
Synthesized namespace binding prefix. |
| Fields inherited from class org.cyberneko.html.filters.DefaultFilter |
fDocumentHandler, fDocumentSource |
| Constructor Summary | |
Purifier()
|
|
| Method Summary | |
void |
characters(org.apache.xerces.xni.XMLString text,
org.apache.xerces.xni.Augmentations augs)
Characters. |
void |
comment(org.apache.xerces.xni.XMLString text,
org.apache.xerces.xni.Augmentations augs)
Comment. |
void |
doctypeDecl(java.lang.String root,
java.lang.String pubid,
java.lang.String sysid,
org.apache.xerces.xni.Augmentations augs)
Doctype declaration. |
void |
emptyElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attrs,
org.apache.xerces.xni.Augmentations augs)
Empty element. |
void |
endCDATA(org.apache.xerces.xni.Augmentations augs)
End CDATA section. |
void |
endElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.Augmentations augs)
End element. |
protected void |
handleStartDocument()
Handle start document. |
protected void |
handleStartElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attrs)
Handle start element. |
void |
processingInstruction(java.lang.String target,
org.apache.xerces.xni.XMLString data,
org.apache.xerces.xni.Augmentations augs)
Processing instruction. |
protected java.lang.String |
purifyName(java.lang.String name,
boolean localpart)
Purify name. |
protected org.apache.xerces.xni.QName |
purifyQName(org.apache.xerces.xni.QName qname)
Purify qualified name. |
protected org.apache.xerces.xni.XMLString |
purifyText(org.apache.xerces.xni.XMLString text)
Purify content. |
void |
reset(org.apache.xerces.xni.parser.XMLComponentManager manager)
Resets the component. |
void |
startCDATA(org.apache.xerces.xni.Augmentations augs)
Start CDATA section. |
void |
startDocument(org.apache.xerces.xni.XMLLocator locator,
java.lang.String encoding,
org.apache.xerces.xni.Augmentations augs)
Start document. |
void |
startDocument(org.apache.xerces.xni.XMLLocator locator,
java.lang.String encoding,
org.apache.xerces.xni.NamespaceContext nscontext,
org.apache.xerces.xni.Augmentations augs)
Start document. |
void |
startElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attrs,
org.apache.xerces.xni.Augmentations augs)
Start element. |
protected void |
synthesizeBinding(org.apache.xerces.xni.XMLAttributes attrs,
java.lang.String ns)
Synthesize namespace binding. |
protected org.apache.xerces.xni.Augmentations |
synthesizedAugs()
Returns an augmentations object with a synthesized item added. |
protected static java.lang.String |
toHexString(int c,
int padlen)
Returns a padded hexadecimal string for the given value. |
void |
xmlDecl(java.lang.String version,
java.lang.String encoding,
java.lang.String standalone,
org.apache.xerces.xni.Augmentations augs)
XML declaration. |
| Methods inherited from class org.cyberneko.html.filters.DefaultFilter |
endDocument, endGeneralEntity, endPrefixMapping, getDocumentHandler, getDocumentSource, getFeatureDefault, getPropertyDefault, getRecognizedFeatures, getRecognizedProperties, ignorableWhitespace, merge, setDocumentHandler, setDocumentSource, setFeature, setProperty, startGeneralEntity, startPrefixMapping, textDecl |
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
public static final java.lang.String SYNTHESIZED_NAMESPACE_PREFX
protected static final java.lang.String NAMESPACES
protected static final java.lang.String AUGMENTATIONS
protected static final HTMLEventInfo SYNTHESIZED_ITEM
protected boolean fNamespaces
protected boolean fAugmentations
protected boolean fSeenDoctype
protected boolean fSeenRootElement
protected boolean fInCDATASection
protected java.lang.String fPublicId
protected java.lang.String fSystemId
protected org.apache.xerces.xni.NamespaceContext fNamespaceContext
protected int fSynthesizedNamespaceCount
| Constructor Detail |
public Purifier()
| Method Detail |
public void reset(org.apache.xerces.xni.parser.XMLComponentManager manager)
throws org.apache.xerces.xni.parser.XMLConfigurationException
DefaultFilter
reset in interface org.apache.xerces.xni.parser.XMLComponentreset in class DefaultFiltermanager - The component manager.
org.apache.xerces.xni.parser.XMLConfigurationException
public void startDocument(org.apache.xerces.xni.XMLLocator locator,
java.lang.String encoding,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
startDocument in class DefaultFilterorg.apache.xerces.xni.XNIException
public void startDocument(org.apache.xerces.xni.XMLLocator locator,
java.lang.String encoding,
org.apache.xerces.xni.NamespaceContext nscontext,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
startDocument in interface org.apache.xerces.xni.XMLDocumentHandlerstartDocument in class DefaultFilterorg.apache.xerces.xni.XNIException
public void xmlDecl(java.lang.String version,
java.lang.String encoding,
java.lang.String standalone,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
xmlDecl in interface org.apache.xerces.xni.XMLDocumentHandlerxmlDecl in class DefaultFilterorg.apache.xerces.xni.XNIException
public void comment(org.apache.xerces.xni.XMLString text,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
comment in interface org.apache.xerces.xni.XMLDocumentHandlercomment in class DefaultFilterorg.apache.xerces.xni.XNIException
public void processingInstruction(java.lang.String target,
org.apache.xerces.xni.XMLString data,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
processingInstruction in interface org.apache.xerces.xni.XMLDocumentHandlerprocessingInstruction in class DefaultFilterorg.apache.xerces.xni.XNIException
public void doctypeDecl(java.lang.String root,
java.lang.String pubid,
java.lang.String sysid,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
doctypeDecl in interface org.apache.xerces.xni.XMLDocumentHandlerdoctypeDecl in class DefaultFilterorg.apache.xerces.xni.XNIException
public void startElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attrs,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
startElement in interface org.apache.xerces.xni.XMLDocumentHandlerstartElement in class DefaultFilterorg.apache.xerces.xni.XNIException
public void emptyElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attrs,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
emptyElement in interface org.apache.xerces.xni.XMLDocumentHandleremptyElement in class DefaultFilterorg.apache.xerces.xni.XNIException
public void startCDATA(org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
startCDATA in interface org.apache.xerces.xni.XMLDocumentHandlerstartCDATA in class DefaultFilterorg.apache.xerces.xni.XNIException
public void endCDATA(org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
endCDATA in interface org.apache.xerces.xni.XMLDocumentHandlerendCDATA in class DefaultFilterorg.apache.xerces.xni.XNIException
public void characters(org.apache.xerces.xni.XMLString text,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
characters in interface org.apache.xerces.xni.XMLDocumentHandlercharacters in class DefaultFilterorg.apache.xerces.xni.XNIException
public void endElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.Augmentations augs)
throws org.apache.xerces.xni.XNIException
endElement in interface org.apache.xerces.xni.XMLDocumentHandlerendElement in class DefaultFilterorg.apache.xerces.xni.XNIExceptionprotected void handleStartDocument()
protected void handleStartElement(org.apache.xerces.xni.QName element,
org.apache.xerces.xni.XMLAttributes attrs)
protected void synthesizeBinding(org.apache.xerces.xni.XMLAttributes attrs,
java.lang.String ns)
protected final org.apache.xerces.xni.Augmentations synthesizedAugs()
protected org.apache.xerces.xni.QName purifyQName(org.apache.xerces.xni.QName qname)
protected java.lang.String purifyName(java.lang.String name,
boolean localpart)
protected org.apache.xerces.xni.XMLString purifyText(org.apache.xerces.xni.XMLString text)
protected static java.lang.String toHexString(int c,
int padlen)
|
|||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||||