Improve parseContent() and parseContentDepth()

The idea is that xml-roundtrip should *never* be expected from a
XmlPullParser. So what we need is a method that parses the content of an
element without relying on getText() returning text if on START_TAG or
END_TAG. This is already done by PubSubs ItemProvider.

Also add PacketParserUtils.parseElement() which will return the current
element as String and use this method in PubSub's ItemProvider.
This commit is contained in:
Florian Schmaus 2014-06-18 09:06:48 +02:00
parent 26b5bc0212
commit 56222d6ee2
8 changed files with 201 additions and 117 deletions

View File

@ -44,6 +44,7 @@ import org.jivesoftware.smack.provider.ProviderManager;
import org.jivesoftware.smack.sasl.SASLMechanism.SASLFailure;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import org.xmlpull.v1.XmlPullParserFactory;
/**
* Utility class that helps to parse packets. Any parsing packets method that must be shared
@ -54,6 +55,24 @@ import org.xmlpull.v1.XmlPullParserException;
public class PacketParserUtils {
private static final Logger LOGGER = Logger.getLogger(PacketParserUtils.class.getName());
/**
* Creates a new XmlPullParser suitable for parsing XMPP. This means in particular that
* FEATURE_PROCESS_NAMESPACES is enabled.
* <p>
* Note that not all XmlPullParser implementations will return a String on
* <code>getText()</code> if the parser is on START_TAG or END_TAG. So you must not rely on this
* behavior when using the parser.
* </p>
*
* @return A suitable XmlPullParser for XMPP parsing
* @throws XmlPullParserException
*/
public static XmlPullParser newXmppParser() throws XmlPullParserException {
XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
return parser;
}
/**
* Parses a message packet.
*
@ -96,7 +115,7 @@ public class PacketParserUtils {
xmlLang = defaultLanguage;
}
String subject = parseContent(parser);
String subject = parseElementText(parser);
if (message.getSubject(xmlLang) == null) {
message.addSubject(xmlLang, subject);
@ -108,8 +127,8 @@ public class PacketParserUtils {
xmlLang = defaultLanguage;
}
String body = parseContent(parser);
String body = parseElementText(parser);
if (message.getBody(xmlLang) == null) {
message.addBody(xmlLang, body);
}
@ -140,29 +159,156 @@ public class PacketParserUtils {
}
/**
* Returns the content of a tag as string regardless of any tags included.
* Returns the textual content of an element as String.
* <p>
* The parser must be positioned on a START_TAG of an element which MUST NOT contain Mixed
* Content (as defined in XML 3.2.2), or else an XmlPullParserException will be thrown.
* </p>
* This method is used for the parts where the XMPP specification requires elements that contain
* only text or are the empty element.
*
* @param parser
* @return the textual content of the element as String
* @throws XmlPullParserException
* @throws IOException
*/
public static String parseElementText(XmlPullParser parser) throws XmlPullParserException, IOException {
assert (parser.getEventType() == XmlPullParser.START_TAG);
String res;
if (parser.isEmptyElementTag()) {
res = "";
}
else {
// Advance to the text of the Element
int event = parser.next();
if (event != XmlPullParser.TEXT) {
throw new XmlPullParserException(
"Non-empty element tag not followed by text, while Mixed Content (XML 3.2.2) is disallowed");
}
res = parser.getText();
event = parser.next();
if (event != XmlPullParser.END_TAG) {
throw new XmlPullParserException(
"Non-empty element tag contains child-elements, while Mixed Content (XML 3.2.2) is disallowed");
}
}
return res;
}
/**
* Returns the current element as string.
* <p>
* The parser must be positioned on START_TAG.
* </p>
* Note that only the outermost namespace attributes ("xmlns") will be returned, not nested ones.
*
* @param parser the XML pull parser
* @return the element as string
* @throws XmlPullParserException
* @throws IOException
*/
public static String parseElement(XmlPullParser parser) throws XmlPullParserException, IOException {
assert(parser.getEventType() == XmlPullParser.START_TAG);
return parseContentDepth(parser, parser.getDepth());
}
/**
* Returns the content of a element as string.
* <p>
* The parser must be positioned on the START_TAG of the element which content is going to get
* returned. If the current element is the empty element, then the empty string is returned. If
* it is a element which contains just text, then just the text is returned. If it contains
* nested elements (and text), then everything from the current opening tag to the corresponding
* closing tag of the same depth is returned as String.
* </p>
* Note that only the outermost namespace attributes ("xmlns") will be returned, not nested ones.
*
* @param parser the XML pull parser
* @return the content of a tag as string
* @throws XmlPullParserException if parser encounters invalid XML
* @throws IOException if an IO error occurs
*/
private static String parseContent(XmlPullParser parser)
public static String parseContent(XmlPullParser parser)
throws XmlPullParserException, IOException {
int parserDepth = parser.getDepth();
return parseContentDepth(parser, parserDepth);
assert(parser.getEventType() == XmlPullParser.START_TAG);
if (parser.isEmptyElementTag()) {
return "";
}
// Advance the parser, since we want to parse the content of the current element
parser.next();
return parseContentDepth(parser, parser.getDepth());
}
/**
* Returns the content from the current position of the parser up to the closing tag of the
* given depth. Note that only the outermost namespace attributes ("xmlns") will be returned,
* not nested ones.
* <p>
* This method is able to parse the content with MX- and KXmlParser. In order to achieve
* this some trade-off has to be make, because KXmlParser does not support xml-roundtrip (ie.
* return a String on getText() on START_TAG and END_TAG). We are therefore required to work
* around this limitation, which results in only partial support for XML namespaces ("xmlns"):
* Only the outermost namespace of elements will be included in the resulting String.
* </p>
*
* @param parser
* @param depth
* @return the content of the current depth
* @throws XmlPullParserException
* @throws IOException
*/
public static String parseContentDepth(XmlPullParser parser, int depth) throws XmlPullParserException, IOException {
StringBuilder content = new StringBuilder();
while (!(parser.next() == XmlPullParser.END_TAG && parser.getDepth() == depth)) {
String text = parser.getText();
if (text == null) {
throw new IllegalStateException("Parser should never return 'null' on getText() here");
XmlStringBuilder xml = new XmlStringBuilder();
int event = parser.getEventType();
boolean isEmptyElement = false;
// XmlPullParser reports namespaces in nested elements even if *only* the outer ones defines
// it. This 'flag' ensures that when a namespace is set for an element, it won't be set again
// in a nested element. It's an ugly workaround that has the potential to break things.
String namespaceElement = null;;
while (true) {
if (event == XmlPullParser.START_TAG) {
xml.halfOpenElement(parser.getName());
if (namespaceElement == null) {
String namespace = parser.getNamespace();
if (StringUtils.isNotEmpty(namespace)) {
xml.attribute("xmlns", namespace);
namespaceElement = parser.getName();
}
}
for (int i = 0; i < parser.getAttributeCount(); i++) {
xml.attribute(parser.getAttributeName(i), parser.getAttributeValue(i));
}
if (parser.isEmptyElementTag()) {
xml.closeEmptyElement();
isEmptyElement = true;
}
else {
xml.rightAngelBracket();
}
}
content.append(text);
else if (event == XmlPullParser.END_TAG) {
if (isEmptyElement) {
// Do nothing as the element was already closed, just reset the flag
isEmptyElement = false;
}
else {
xml.closeElement(parser.getName());
}
if (namespaceElement != null && namespaceElement.equals(parser.getName())) {
// We are on the closing tag, which defined the namespace as starting tag, reset the 'flag'
namespaceElement = null;
}
if (parser.getDepth() <= depth) {
// Abort parsing, we are done
break;
}
}
else if (event == XmlPullParser.TEXT) {
xml.append(parser.getText());
}
event = parser.next();
}
return content.toString();
return xml.toString();
}
/**

View File

@ -19,7 +19,6 @@ package org.jivesoftware.smack;
import static org.junit.Assert.*;
import java.io.StringReader;
import java.util.Collection;
import java.util.Collections;
import java.util.concurrent.CopyOnWriteArrayList;
@ -31,11 +30,11 @@ import org.jivesoftware.smack.packet.RosterPacket;
import org.jivesoftware.smack.packet.IQ.Type;
import org.jivesoftware.smack.packet.RosterPacket.Item;
import org.jivesoftware.smack.packet.RosterPacket.ItemType;
import org.jivesoftware.smack.test.util.TestUtils;
import org.jivesoftware.smack.util.PacketParserUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.xmlpull.v1.XmlPullParserFactory;
import org.xmlpull.v1.XmlPullParser;
/**
@ -315,8 +314,6 @@ public class RosterTest {
final String contactJID = "nurse@example.com";
final Roster roster = connection.getRoster();
assertNotNull("Can't get the roster from the provided connection!", roster);
final XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
final StringBuilder sb = new StringBuilder();
sb.append("<iq id=\"rostertest1\" type=\"set\" ")
.append("to=\"").append(connection.getUser()).append("\">")
@ -324,8 +321,7 @@ public class RosterTest {
.append("<item jid=\"").append(contactJID).append("\"/>")
.append("</query>")
.append("</iq>");
parser.setInput(new StringReader(sb.toString()));
parser.next();
final XmlPullParser parser = TestUtils.getIQParser(sb.toString());
final IQ rosterPush = PacketParserUtils.parseIQ(parser, connection);
initRoster(connection, roster);
rosterListener.reset();
@ -449,8 +445,6 @@ public class RosterTest {
final String contactJID = "nurse@example.com";
final Roster roster = connection.getRoster();
assertNotNull("Can't get the roster from the provided connection!", roster);
final XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
final StringBuilder sb = new StringBuilder();
sb.append("<iq id=\"rostertest2\" type=\"set\" ")
.append("to=\"").append(connection.getUser()).append("\">")
@ -460,8 +454,7 @@ public class RosterTest {
.append("</item>")
.append("</query>")
.append("</iq>");
parser.setInput(new StringReader(sb.toString()));
parser.next();
final XmlPullParser parser = TestUtils.getIQParser(sb.toString());
final IQ rosterPush = PacketParserUtils.parseIQ(parser, connection);
initRoster(connection, roster);
rosterListener.reset();

View File

@ -35,7 +35,7 @@ public class ParsingExceptionTest {
private final static String EXTENSION2 =
"<extension2 xmlns='namespace'>" +
"<bar node='testNode'>" +
"<i id='testid1' >" +
"<i id='testid1'>" +
"</i>" +
"</bar>" +
"</extension2>";
@ -52,12 +52,14 @@ public class ParsingExceptionTest {
@Test
public void consumeUnparsedInput() throws Exception {
final String MESSAGE_EXCEPTION_ELEMENT =
"<" + ThrowException.ELEMENT + " xmlns='" + ThrowException.NAMESPACE + "'>" +
"<nothingInHere>" +
"</nothingInHere>" +
"</" + ThrowException.ELEMENT + ">";
XmlPullParser parser = TestUtils.getMessageParser(
"<message from='user@server.example' to='francisco@denmark.lit' id='foo'>" +
"<" + ThrowException.ELEMENT + " xmlns='" + ThrowException.NAMESPACE + "'>" +
"<nothingInHere>" +
"</nothingInHere>" +
"</" + ThrowException.ELEMENT + ">" +
MESSAGE_EXCEPTION_ELEMENT +
EXTENSION2 +
"</message>");
int parserDepth = parser.getDepth();
@ -68,8 +70,7 @@ public class ParsingExceptionTest {
content = PacketParserUtils.parseContentDepth(parser, parserDepth);
}
assertNotNull(content);
assertEquals(content, "<nothingInHere></nothingInHere>" + "</" + ThrowException.ELEMENT + ">" + EXTENSION2);
assertEquals(MESSAGE_EXCEPTION_ELEMENT + EXTENSION2 + "</message>", content);
}
static class ThrowException implements PacketExtensionProvider {

View File

@ -17,9 +17,10 @@
package org.jivesoftware.smack.test.util;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.xmlpull.v1.XmlPullParserFactory;
import org.jivesoftware.smack.util.PacketParserUtils;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
@ -39,12 +40,18 @@ final public class TestUtils {
return getParser(stanza, "presence");
}
public static XmlPullParser getParser(String stanza, String startTag) {
public static XmlPullParser getParser(String string, String startTag) {
return getParser(new StringReader(string), startTag);
}
public static XmlPullParser getParser(Reader reader, String startTag) {
XmlPullParser parser;
try {
parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
parser.setInput(new StringReader(stanza));
parser = PacketParserUtils.newXmppParser();
parser.setInput(reader);
if (startTag == null) {
return parser;
}
boolean found = false;
while (!found) {
@ -53,8 +60,7 @@ final public class TestUtils {
}
if (!found)
throw new IllegalArgumentException("Cannot parse start tag [" + startTag + "] from stanza [" + stanza
+ "]");
throw new IllegalArgumentException("Can not find start tag '" + startTag + "'");
} catch (XmlPullParserException e) {
throw new RuntimeException(e);
} catch (IOException e) {

View File

@ -27,7 +27,6 @@ import static org.junit.Assert.fail;
import java.util.Locale;
import java.util.Properties;
import org.custommonkey.xmlunit.DetailedDiff;
import org.custommonkey.xmlunit.Diff;
import org.custommonkey.xmlunit.examples.RecursiveElementNameAndTextQualifier;
import org.jivesoftware.smack.packet.Message;
@ -654,7 +653,13 @@ public class PacketParserUtilsTest {
}
@Test
/**
* RFC6121 5.2.3 explicitly disallows mixed content in <body/> elements. Make sure that we throw
* an exception if we encounter such an element.
*
* @throws Exception
*/
@Test(expected=XmlPullParserException.class)
public void invalidMessageBodyContainingTagTest() throws Exception {
String control = XMLBuilder.create("message")
.a("from", "romeo@montague.lit/orchard")
@ -668,23 +673,10 @@ public class PacketParserUtilsTest {
.a("style", "font-weight: bold;")
.t("Bad Message Body")
.asString(outputProperties);
try {
Message message = (Message) PacketParserUtils.parseMessage(TestUtils.getMessageParser(control));
String body = "<span style=\"font-weight: bold;\">"
+ "Bad Message Body</span>";
assertEquals(body, message.getBody());
assertXMLNotEqual(control, message.toXML().toString());
DetailedDiff diffs = new DetailedDiff(new Diff(control, message.toXML().toString()));
// body has no namespace URI, span is escaped
assertEquals(6, diffs.getAllDifferences().size());
} catch(XmlPullParserException e) {
fail("No parser exception should be thrown" + e.getMessage());
}
Message message = (Message) PacketParserUtils.parseMessage(TestUtils.getMessageParser(control));
fail("Should throw exception. Instead got message: " + message.toXML().toString());
}
@Test
@ -794,7 +786,7 @@ public class PacketParserUtilsTest {
public void parseContentDepthTest() throws Exception {
final String stanza = "<iq type='result' to='foo@bar.com' from='baz.com' id='42'/>";
XmlPullParser parser = TestUtils.getParser(stanza, "iq");
String content = PacketParserUtils.parseContentDepth(parser, 1);
String content = PacketParserUtils.parseContent(parser);
assertEquals("", content);
}

View File

@ -30,11 +30,11 @@ import java.util.logging.Logger;
import org.jivesoftware.smack.packet.IQ;
import org.jivesoftware.smack.provider.IQProvider;
import org.jivesoftware.smack.util.Base32Encoder;
import org.jivesoftware.smack.util.PacketParserUtils;
import org.jivesoftware.smack.util.StringEncoder;
import org.jivesoftware.smackx.caps.EntityCapsManager;
import org.jivesoftware.smackx.disco.packet.DiscoverInfo;
import org.jivesoftware.smackx.disco.provider.DiscoverInfoProvider;
import org.xmlpull.v1.XmlPullParserFactory;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
@ -159,8 +159,7 @@ public class SimpleDirectoryPersistentCache implements EntityCapsPersistentCache
Reader reader = new StringReader(fileContent);
XmlPullParser parser;
try {
parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
parser = PacketParserUtils.newXmppParser();
parser.setInput(reader);
} catch (XmlPullParserException xppe) {
LOGGER.log(Level.SEVERE, "Exception initializing parser", xppe);

View File

@ -20,7 +20,6 @@ import org.jivesoftware.smack.packet.PacketExtension;
import org.jivesoftware.smack.provider.PacketExtensionProvider;
import org.jivesoftware.smack.provider.ProviderManager;
import org.jivesoftware.smack.util.PacketParserUtils;
import org.jivesoftware.smack.util.StringUtils;
import org.jivesoftware.smackx.pubsub.Item;
import org.jivesoftware.smackx.pubsub.PayloadItem;
import org.jivesoftware.smackx.pubsub.SimplePayload;
@ -41,7 +40,6 @@ public class ItemProvider implements PacketExtensionProvider
{
String id = parser.getAttributeValue(null, "id");
String node = parser.getAttributeValue(null, "node");
String elem = parser.getName();
int tag = parser.next();
@ -56,57 +54,8 @@ public class ItemProvider implements PacketExtensionProvider
if (ProviderManager.getExtensionProvider(payloadElemName, payloadNS) == null)
{
boolean done = false;
boolean isEmptyElement = false;
StringBuilder payloadText = new StringBuilder();
while (!done)
{
if (tag == XmlPullParser.END_TAG && parser.getName().equals(elem))
{
done = true;
continue;
}
else if (parser.getEventType() == XmlPullParser.START_TAG)
{
payloadText.append("<").append(parser.getName());
if (parser.getName().equals(payloadElemName) && (payloadNS.length() > 0))
payloadText.append(" xmlns=\"").append(payloadNS).append("\"");
int n = parser.getAttributeCount();
for (int i = 0; i < n; i++)
payloadText.append(" ").append(parser.getAttributeName(i)).append("=\"")
.append(parser.getAttributeValue(i)).append("\"");
if (parser.isEmptyElementTag())
{
payloadText.append("/>");
isEmptyElement = true;
}
else
{
payloadText.append(">");
}
}
else if (parser.getEventType() == XmlPullParser.END_TAG)
{
if (isEmptyElement)
{
isEmptyElement = false;
}
else
{
payloadText.append("</").append(parser.getName()).append(">");
}
}
else if (parser.getEventType() == XmlPullParser.TEXT)
{
payloadText.append(StringUtils.escapeForXML(parser.getText()));
}
tag = parser.next();
}
return new PayloadItem<SimplePayload>(id, node, new SimplePayload(payloadElemName, payloadNS, payloadText.toString()));
String payloadText = PacketParserUtils.parseElement(parser);
return new PayloadItem<SimplePayload>(id, node, new SimplePayload(payloadElemName, payloadNS, payloadText));
}
else
{

View File

@ -33,7 +33,6 @@ import org.jivesoftware.smack.SmackException;
import org.jivesoftware.smack.SmackException.NoResponseException;
import org.jivesoftware.smack.SmackException.SecurityRequiredException;
import org.jivesoftware.smack.XMPPException.StreamErrorException;
import org.xmlpull.v1.XmlPullParserFactory;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
@ -131,8 +130,7 @@ class PacketReader {
*/
private void resetParser() throws SmackException {
try {
parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
parser = PacketParserUtils.newXmppParser();
parser.setInput(connection.getReader());
}
catch (XmlPullParserException e) {