mirror of
https://codeberg.org/Mercury-IM/Smack
synced 2024-11-22 22:32:06 +01:00
Unify StringUtils.escapeForXML()
Rework StringUtils.escapeForXML() so that it can be used also for
StringUtils.xmlAttribEncodeBinary(). escapeForXML() now uses a
switch/case statement, which should leave the (JIT) compiler more room
for optimizations.
Removing the "do not escape unicode character references", because
this behavior, introduced with
8264ebdfb5
, is incorrect.
This commit is contained in:
parent
c592b4f046
commit
dbab9b8995
1 changed files with 49 additions and 84 deletions
|
@ -30,11 +30,11 @@ import java.util.logging.Logger;
|
||||||
public class StringUtils {
|
public class StringUtils {
|
||||||
private static final Logger LOGGER = Logger.getLogger(StringUtils.class.getName());
|
private static final Logger LOGGER = Logger.getLogger(StringUtils.class.getName());
|
||||||
|
|
||||||
private static final char[] QUOTE_ENCODE = """.toCharArray();
|
public static final String QUOTE_ENCODE = """;
|
||||||
private static final char[] APOS_ENCODE = "'".toCharArray();
|
public static final String APOS_ENCODE = "'";
|
||||||
private static final char[] AMP_ENCODE = "&".toCharArray();
|
public static final String AMP_ENCODE = "&";
|
||||||
private static final char[] LT_ENCODE = "<".toCharArray();
|
public static final String LT_ENCODE = "<";
|
||||||
private static final char[] GT_ENCODE = ">".toCharArray();
|
public static final String GT_ENCODE = ">";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the name portion of a XMPP address. For example, for the
|
* Returns the name portion of a XMPP address. For example, for the
|
||||||
|
@ -283,34 +283,6 @@ public class StringUtils {
|
||||||
return buf.toString();
|
return buf.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Encodes a string for use in an XML attribute by escaping characters with
|
|
||||||
* a special meaning. In particular, white spaces are encoded as character
|
|
||||||
* references, such that they are not replaced by ' ' on parsing.
|
|
||||||
*/
|
|
||||||
private static String xmlAttribEncodeBinary(String value) {
|
|
||||||
StringBuilder s = new StringBuilder();
|
|
||||||
char buf[] = value.toCharArray();
|
|
||||||
for (char c : buf) {
|
|
||||||
switch (c) {
|
|
||||||
case '<': s.append("<"); break;
|
|
||||||
case '>': s.append(">"); break;
|
|
||||||
case '&': s.append("&"); break;
|
|
||||||
case '"': s.append("""); break;
|
|
||||||
case '\'': s.append("'"); break;
|
|
||||||
default:
|
|
||||||
if (c <= 0x1f || (0x7f <= c && c <= 0x9f)) { // includes \t, \n, \r
|
|
||||||
s.append("&#x");
|
|
||||||
s.append(String.format("%X", (int)c));
|
|
||||||
s.append(';');
|
|
||||||
} else {
|
|
||||||
s.append(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return s.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a string representing a XML attribute. The value parameter is escaped as necessary. In particular,
|
* Returns a string representing a XML attribute. The value parameter is escaped as necessary. In particular,
|
||||||
* white spaces are encoded as character references, such that they are not replaced by ' ' on parsing.
|
* white spaces are encoded as character references, such that they are not replaced by ' ' on parsing.
|
||||||
|
@ -318,7 +290,7 @@ public class StringUtils {
|
||||||
* @param value value of the XML attribute
|
* @param value value of the XML attribute
|
||||||
*/
|
*/
|
||||||
public static String xmlAttrib(String name, String value) {
|
public static String xmlAttrib(String name, String value) {
|
||||||
return name + "=\"" + xmlAttribEncodeBinary(value) + "\"";
|
return name + "=\"" + escapeForXML(value, true) + "\"";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -326,69 +298,62 @@ public class StringUtils {
|
||||||
* Escapes all necessary characters in the String so that it can be used
|
* Escapes all necessary characters in the String so that it can be used
|
||||||
* in an XML doc.
|
* in an XML doc.
|
||||||
*
|
*
|
||||||
* <strong>Warning:</strong> This method does not escape unicode character references
|
|
||||||
* (i.e. references of the from ë)
|
|
||||||
*
|
|
||||||
* @param string the string to escape.
|
* @param string the string to escape.
|
||||||
* @return the string with appropriate characters escaped.
|
* @return the string with appropriate characters escaped.
|
||||||
*/
|
*/
|
||||||
public static String escapeForXML(String string) {
|
public static String escapeForXML(String string) {
|
||||||
|
return escapeForXML(string, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String escapeForXML(final String string, final boolean escapeWhitespace) {
|
||||||
if (string == null) {
|
if (string == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
final char[] input = string.toCharArray();
|
||||||
|
final int len = input.length;
|
||||||
|
final StringBuilder out = new StringBuilder((int)(len*1.3));
|
||||||
|
CharSequence toAppend;
|
||||||
char ch;
|
char ch;
|
||||||
int i=0;
|
int last = 0;
|
||||||
int last=0;
|
int i = 0;
|
||||||
char[] input = string.toCharArray();
|
while (i < len) {
|
||||||
int len = input.length;
|
toAppend = null;
|
||||||
StringBuilder out = new StringBuilder((int)(len*1.3));
|
|
||||||
for (; i < len; i++) {
|
|
||||||
ch = input[i];
|
ch = input[i];
|
||||||
if (ch > '>') {
|
switch(ch) {
|
||||||
|
case '<':
|
||||||
|
toAppend = LT_ENCODE;
|
||||||
|
break;
|
||||||
|
case '>':
|
||||||
|
toAppend = GT_ENCODE;
|
||||||
|
break;
|
||||||
|
case '&':
|
||||||
|
toAppend = AMP_ENCODE;
|
||||||
|
break;
|
||||||
|
case '"':
|
||||||
|
toAppend = QUOTE_ENCODE;
|
||||||
|
break;
|
||||||
|
case '\'':
|
||||||
|
toAppend = APOS_ENCODE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// includes \t, \n, \r
|
||||||
|
if (escapeWhitespace && (ch <= 0x1f || (0x7f <= ch && ch <= 0x9f))) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("&#x");
|
||||||
|
sb.append(String.format("%X", (int) ch));
|
||||||
|
sb.append(';');
|
||||||
|
toAppend = sb;
|
||||||
}
|
}
|
||||||
else if (ch == '<') {
|
break;
|
||||||
|
}
|
||||||
|
if (toAppend != null) {
|
||||||
if (i > last) {
|
if (i > last) {
|
||||||
out.append(input, last, i - last);
|
out.append(input, last, i - last);
|
||||||
}
|
}
|
||||||
last = i + 1;
|
out.append(toAppend);
|
||||||
out.append(LT_ENCODE);
|
last = ++i;
|
||||||
}
|
} else {
|
||||||
else if (ch == '>') {
|
i++;
|
||||||
if (i > last) {
|
|
||||||
out.append(input, last, i - last);
|
|
||||||
}
|
|
||||||
last = i + 1;
|
|
||||||
out.append(GT_ENCODE);
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (ch == '&') {
|
|
||||||
if (i > last) {
|
|
||||||
out.append(input, last, i - last);
|
|
||||||
}
|
|
||||||
// Do nothing if the string is of the form ë (unicode value)
|
|
||||||
if (!(len > i + 5
|
|
||||||
&& input[i + 1] == '#'
|
|
||||||
&& Character.isDigit(input[i + 2])
|
|
||||||
&& Character.isDigit(input[i + 3])
|
|
||||||
&& Character.isDigit(input[i + 4])
|
|
||||||
&& input[i + 5] == ';')) {
|
|
||||||
last = i + 1;
|
|
||||||
out.append(AMP_ENCODE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (ch == '"') {
|
|
||||||
if (i > last) {
|
|
||||||
out.append(input, last, i - last);
|
|
||||||
}
|
|
||||||
last = i + 1;
|
|
||||||
out.append(QUOTE_ENCODE);
|
|
||||||
}
|
|
||||||
else if (ch == '\'') {
|
|
||||||
if (i > last) {
|
|
||||||
out.append(input, last, i - last);
|
|
||||||
}
|
|
||||||
last = i + 1;
|
|
||||||
out.append(APOS_ENCODE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (last == 0) {
|
if (last == 0) {
|
||||||
|
|
Loading…
Reference in a new issue