Date: Wed, 18 Jun 2014 09:06:48 +0200
Subject: [PATCH] Improve parseContent() and parseContentDepth()
The idea is that xml-roundtrip should *never* be expected from a
XmlPullParser. So what we need is a method that parses the content of an
element without relying on getText() returning text if on START_TAG or
END_TAG. This is already done by PubSubs ItemProvider.
Also add PacketParserUtils.parseElement() which will return the current
element as String and use this method in PubSub's ItemProvider.
---
.../smack/util/PacketParserUtils.java | 174 ++++++++++++++++--
.../org/jivesoftware/smack/RosterTest.java | 13 +-
.../smack/parsing/ParsingExceptionTest.java | 15 +-
.../smack/test/util/TestUtils.java | 20 +-
.../smack/util/PacketParserUtilsTest.java | 32 ++--
.../cache/SimpleDirectoryPersistentCache.java | 5 +-
.../smackx/pubsub/provider/ItemProvider.java | 55 +-----
.../jivesoftware/smack/tcp/PacketReader.java | 4 +-
8 files changed, 201 insertions(+), 117 deletions(-)
diff --git a/smack-core/src/main/java/org/jivesoftware/smack/util/PacketParserUtils.java b/smack-core/src/main/java/org/jivesoftware/smack/util/PacketParserUtils.java
index aec0a2dbe..28be0b191 100644
--- a/smack-core/src/main/java/org/jivesoftware/smack/util/PacketParserUtils.java
+++ b/smack-core/src/main/java/org/jivesoftware/smack/util/PacketParserUtils.java
@@ -44,6 +44,7 @@ import org.jivesoftware.smack.provider.ProviderManager;
import org.jivesoftware.smack.sasl.SASLMechanism.SASLFailure;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
+import org.xmlpull.v1.XmlPullParserFactory;
/**
* Utility class that helps to parse packets. Any parsing packets method that must be shared
@@ -54,6 +55,24 @@ import org.xmlpull.v1.XmlPullParserException;
public class PacketParserUtils {
private static final Logger LOGGER = Logger.getLogger(PacketParserUtils.class.getName());
+ /**
+ * Creates a new XmlPullParser suitable for parsing XMPP. This means in particular that
+ * FEATURE_PROCESS_NAMESPACES is enabled.
+ *
+ * Note that not all XmlPullParser implementations will return a String on
+ * getText()
if the parser is on START_TAG or END_TAG. So you must not rely on this
+ * behavior when using the parser.
+ *
+ *
+ * @return A suitable XmlPullParser for XMPP parsing
+ * @throws XmlPullParserException
+ */
+ public static XmlPullParser newXmppParser() throws XmlPullParserException {
+ XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
+ parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
+ return parser;
+ }
+
/**
* Parses a message packet.
*
@@ -96,7 +115,7 @@ public class PacketParserUtils {
xmlLang = defaultLanguage;
}
- String subject = parseContent(parser);
+ String subject = parseElementText(parser);
if (message.getSubject(xmlLang) == null) {
message.addSubject(xmlLang, subject);
@@ -108,8 +127,8 @@ public class PacketParserUtils {
xmlLang = defaultLanguage;
}
- String body = parseContent(parser);
-
+ String body = parseElementText(parser);
+
if (message.getBody(xmlLang) == null) {
message.addBody(xmlLang, body);
}
@@ -140,29 +159,156 @@ public class PacketParserUtils {
}
/**
- * Returns the content of a tag as string regardless of any tags included.
+ * Returns the textual content of an element as String.
+ *
+ * The parser must be positioned on a START_TAG of an element which MUST NOT contain Mixed
+ * Content (as defined in XML 3.2.2), or else an XmlPullParserException will be thrown.
+ *
+ * This method is used for the parts where the XMPP specification requires elements that contain
+ * only text or are the empty element.
+ *
+ * @param parser
+ * @return the textual content of the element as String
+ * @throws XmlPullParserException
+ * @throws IOException
+ */
+ public static String parseElementText(XmlPullParser parser) throws XmlPullParserException, IOException {
+ assert (parser.getEventType() == XmlPullParser.START_TAG);
+ String res;
+ if (parser.isEmptyElementTag()) {
+ res = "";
+ }
+ else {
+ // Advance to the text of the Element
+ int event = parser.next();
+ if (event != XmlPullParser.TEXT) {
+ throw new XmlPullParserException(
+ "Non-empty element tag not followed by text, while Mixed Content (XML 3.2.2) is disallowed");
+ }
+ res = parser.getText();
+ event = parser.next();
+ if (event != XmlPullParser.END_TAG) {
+ throw new XmlPullParserException(
+ "Non-empty element tag contains child-elements, while Mixed Content (XML 3.2.2) is disallowed");
+ }
+ }
+ return res;
+ }
+
+ /**
+ * Returns the current element as string.
+ *
+ * The parser must be positioned on START_TAG.
+ *
+ * Note that only the outermost namespace attributes ("xmlns") will be returned, not nested ones.
+ *
+ * @param parser the XML pull parser
+ * @return the element as string
+ * @throws XmlPullParserException
+ * @throws IOException
+ */
+ public static String parseElement(XmlPullParser parser) throws XmlPullParserException, IOException {
+ assert(parser.getEventType() == XmlPullParser.START_TAG);
+ return parseContentDepth(parser, parser.getDepth());
+ }
+
+ /**
+ * Returns the content of a element as string.
+ *
+ * The parser must be positioned on the START_TAG of the element which content is going to get
+ * returned. If the current element is the empty element, then the empty string is returned. If
+ * it is a element which contains just text, then just the text is returned. If it contains
+ * nested elements (and text), then everything from the current opening tag to the corresponding
+ * closing tag of the same depth is returned as String.
+ *
+ * Note that only the outermost namespace attributes ("xmlns") will be returned, not nested ones.
*
* @param parser the XML pull parser
* @return the content of a tag as string
* @throws XmlPullParserException if parser encounters invalid XML
* @throws IOException if an IO error occurs
*/
- private static String parseContent(XmlPullParser parser)
+ public static String parseContent(XmlPullParser parser)
throws XmlPullParserException, IOException {
- int parserDepth = parser.getDepth();
- return parseContentDepth(parser, parserDepth);
+ assert(parser.getEventType() == XmlPullParser.START_TAG);
+ if (parser.isEmptyElementTag()) {
+ return "";
+ }
+ // Advance the parser, since we want to parse the content of the current element
+ parser.next();
+ return parseContentDepth(parser, parser.getDepth());
}
+ /**
+ * Returns the content from the current position of the parser up to the closing tag of the
+ * given depth. Note that only the outermost namespace attributes ("xmlns") will be returned,
+ * not nested ones.
+ *
+ * This method is able to parse the content with MX- and KXmlParser. In order to achieve
+ * this some trade-off has to be make, because KXmlParser does not support xml-roundtrip (ie.
+ * return a String on getText() on START_TAG and END_TAG). We are therefore required to work
+ * around this limitation, which results in only partial support for XML namespaces ("xmlns"):
+ * Only the outermost namespace of elements will be included in the resulting String.
+ *
+ *
+ * @param parser
+ * @param depth
+ * @return the content of the current depth
+ * @throws XmlPullParserException
+ * @throws IOException
+ */
public static String parseContentDepth(XmlPullParser parser, int depth) throws XmlPullParserException, IOException {
- StringBuilder content = new StringBuilder();
- while (!(parser.next() == XmlPullParser.END_TAG && parser.getDepth() == depth)) {
- String text = parser.getText();
- if (text == null) {
- throw new IllegalStateException("Parser should never return 'null' on getText() here");
+ XmlStringBuilder xml = new XmlStringBuilder();
+ int event = parser.getEventType();
+ boolean isEmptyElement = false;
+ // XmlPullParser reports namespaces in nested elements even if *only* the outer ones defines
+ // it. This 'flag' ensures that when a namespace is set for an element, it won't be set again
+ // in a nested element. It's an ugly workaround that has the potential to break things.
+ String namespaceElement = null;;
+ while (true) {
+ if (event == XmlPullParser.START_TAG) {
+ xml.halfOpenElement(parser.getName());
+ if (namespaceElement == null) {
+ String namespace = parser.getNamespace();
+ if (StringUtils.isNotEmpty(namespace)) {
+ xml.attribute("xmlns", namespace);
+ namespaceElement = parser.getName();
+ }
+ }
+ for (int i = 0; i < parser.getAttributeCount(); i++) {
+ xml.attribute(parser.getAttributeName(i), parser.getAttributeValue(i));
+ }
+ if (parser.isEmptyElementTag()) {
+ xml.closeEmptyElement();
+ isEmptyElement = true;
+ }
+ else {
+ xml.rightAngelBracket();
+ }
}
- content.append(text);
+ else if (event == XmlPullParser.END_TAG) {
+ if (isEmptyElement) {
+ // Do nothing as the element was already closed, just reset the flag
+ isEmptyElement = false;
+ }
+ else {
+ xml.closeElement(parser.getName());
+ }
+ if (namespaceElement != null && namespaceElement.equals(parser.getName())) {
+ // We are on the closing tag, which defined the namespace as starting tag, reset the 'flag'
+ namespaceElement = null;
+ }
+ if (parser.getDepth() <= depth) {
+ // Abort parsing, we are done
+ break;
+ }
+ }
+ else if (event == XmlPullParser.TEXT) {
+ xml.append(parser.getText());
+ }
+ event = parser.next();
}
- return content.toString();
+ return xml.toString();
}
/**
diff --git a/smack-core/src/test/java/org/jivesoftware/smack/RosterTest.java b/smack-core/src/test/java/org/jivesoftware/smack/RosterTest.java
index 27e00253e..41e354745 100644
--- a/smack-core/src/test/java/org/jivesoftware/smack/RosterTest.java
+++ b/smack-core/src/test/java/org/jivesoftware/smack/RosterTest.java
@@ -19,7 +19,6 @@ package org.jivesoftware.smack;
import static org.junit.Assert.*;
-import java.io.StringReader;
import java.util.Collection;
import java.util.Collections;
import java.util.concurrent.CopyOnWriteArrayList;
@@ -31,11 +30,11 @@ import org.jivesoftware.smack.packet.RosterPacket;
import org.jivesoftware.smack.packet.IQ.Type;
import org.jivesoftware.smack.packet.RosterPacket.Item;
import org.jivesoftware.smack.packet.RosterPacket.ItemType;
+import org.jivesoftware.smack.test.util.TestUtils;
import org.jivesoftware.smack.util.PacketParserUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-import org.xmlpull.v1.XmlPullParserFactory;
import org.xmlpull.v1.XmlPullParser;
/**
@@ -315,8 +314,6 @@ public class RosterTest {
final String contactJID = "nurse@example.com";
final Roster roster = connection.getRoster();
assertNotNull("Can't get the roster from the provided connection!", roster);
- final XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
- parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
final StringBuilder sb = new StringBuilder();
sb.append("")
@@ -324,8 +321,7 @@ public class RosterTest {
.append("- ")
.append("")
.append("
");
- parser.setInput(new StringReader(sb.toString()));
- parser.next();
+ final XmlPullParser parser = TestUtils.getIQParser(sb.toString());
final IQ rosterPush = PacketParserUtils.parseIQ(parser, connection);
initRoster(connection, roster);
rosterListener.reset();
@@ -449,8 +445,6 @@ public class RosterTest {
final String contactJID = "nurse@example.com";
final Roster roster = connection.getRoster();
assertNotNull("Can't get the roster from the provided connection!", roster);
- final XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
- parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
final StringBuilder sb = new StringBuilder();
sb.append("")
@@ -460,8 +454,7 @@ public class RosterTest {
.append("")
.append("")
.append("");
- parser.setInput(new StringReader(sb.toString()));
- parser.next();
+ final XmlPullParser parser = TestUtils.getIQParser(sb.toString());
final IQ rosterPush = PacketParserUtils.parseIQ(parser, connection);
initRoster(connection, roster);
rosterListener.reset();
diff --git a/smack-core/src/test/java/org/jivesoftware/smack/parsing/ParsingExceptionTest.java b/smack-core/src/test/java/org/jivesoftware/smack/parsing/ParsingExceptionTest.java
index ee058887e..ebf801c23 100644
--- a/smack-core/src/test/java/org/jivesoftware/smack/parsing/ParsingExceptionTest.java
+++ b/smack-core/src/test/java/org/jivesoftware/smack/parsing/ParsingExceptionTest.java
@@ -35,7 +35,7 @@ public class ParsingExceptionTest {
private final static String EXTENSION2 =
"" +
"" +
- "" +
+ "" +
"" +
"" +
"";
@@ -52,12 +52,14 @@ public class ParsingExceptionTest {
@Test
public void consumeUnparsedInput() throws Exception {
+ final String MESSAGE_EXCEPTION_ELEMENT =
+ "<" + ThrowException.ELEMENT + " xmlns='" + ThrowException.NAMESPACE + "'>" +
+ "" +
+ "" +
+ "" + ThrowException.ELEMENT + ">";
XmlPullParser parser = TestUtils.getMessageParser(
"" +
- "<" + ThrowException.ELEMENT + " xmlns='" + ThrowException.NAMESPACE + "'>" +
- "" +
- "" +
- "" + ThrowException.ELEMENT + ">" +
+ MESSAGE_EXCEPTION_ELEMENT +
EXTENSION2 +
"");
int parserDepth = parser.getDepth();
@@ -68,8 +70,7 @@ public class ParsingExceptionTest {
content = PacketParserUtils.parseContentDepth(parser, parserDepth);
}
assertNotNull(content);
- assertEquals(content, "" + "" + ThrowException.ELEMENT + ">" + EXTENSION2);
-
+ assertEquals(MESSAGE_EXCEPTION_ELEMENT + EXTENSION2 + "", content);
}
static class ThrowException implements PacketExtensionProvider {
diff --git a/smack-core/src/test/java/org/jivesoftware/smack/test/util/TestUtils.java b/smack-core/src/test/java/org/jivesoftware/smack/test/util/TestUtils.java
index ebfca81af..d5d4d7c5d 100644
--- a/smack-core/src/test/java/org/jivesoftware/smack/test/util/TestUtils.java
+++ b/smack-core/src/test/java/org/jivesoftware/smack/test/util/TestUtils.java
@@ -17,9 +17,10 @@
package org.jivesoftware.smack.test.util;
import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
-import org.xmlpull.v1.XmlPullParserFactory;
+import org.jivesoftware.smack.util.PacketParserUtils;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
@@ -39,12 +40,18 @@ final public class TestUtils {
return getParser(stanza, "presence");
}
- public static XmlPullParser getParser(String stanza, String startTag) {
+ public static XmlPullParser getParser(String string, String startTag) {
+ return getParser(new StringReader(string), startTag);
+ }
+
+ public static XmlPullParser getParser(Reader reader, String startTag) {
XmlPullParser parser;
try {
- parser = XmlPullParserFactory.newInstance().newPullParser();
- parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
- parser.setInput(new StringReader(stanza));
+ parser = PacketParserUtils.newXmppParser();
+ parser.setInput(reader);
+ if (startTag == null) {
+ return parser;
+ }
boolean found = false;
while (!found) {
@@ -53,8 +60,7 @@ final public class TestUtils {
}
if (!found)
- throw new IllegalArgumentException("Cannot parse start tag [" + startTag + "] from stanza [" + stanza
- + "]");
+ throw new IllegalArgumentException("Can not find start tag '" + startTag + "'");
} catch (XmlPullParserException e) {
throw new RuntimeException(e);
} catch (IOException e) {
diff --git a/smack-core/src/test/java/org/jivesoftware/smack/util/PacketParserUtilsTest.java b/smack-core/src/test/java/org/jivesoftware/smack/util/PacketParserUtilsTest.java
index 80436300f..39e484ee2 100644
--- a/smack-core/src/test/java/org/jivesoftware/smack/util/PacketParserUtilsTest.java
+++ b/smack-core/src/test/java/org/jivesoftware/smack/util/PacketParserUtilsTest.java
@@ -27,7 +27,6 @@ import static org.junit.Assert.fail;
import java.util.Locale;
import java.util.Properties;
-import org.custommonkey.xmlunit.DetailedDiff;
import org.custommonkey.xmlunit.Diff;
import org.custommonkey.xmlunit.examples.RecursiveElementNameAndTextQualifier;
import org.jivesoftware.smack.packet.Message;
@@ -654,7 +653,13 @@ public class PacketParserUtilsTest {
}
- @Test
+ /**
+ * RFC6121 5.2.3 explicitly disallows mixed content in elements. Make sure that we throw
+ * an exception if we encounter such an element.
+ *
+ * @throws Exception
+ */
+ @Test(expected=XmlPullParserException.class)
public void invalidMessageBodyContainingTagTest() throws Exception {
String control = XMLBuilder.create("message")
.a("from", "romeo@montague.lit/orchard")
@@ -668,23 +673,10 @@ public class PacketParserUtilsTest {
.a("style", "font-weight: bold;")
.t("Bad Message Body")
.asString(outputProperties);
-
- try {
- Message message = (Message) PacketParserUtils.parseMessage(TestUtils.getMessageParser(control));
- String body = "