mirror of
https://codeberg.org/Mercury-IM/Smack
synced 2024-09-27 10:19:33 +02:00
dbab9b8995
Rework StringUtils.escapeForXML() so that it can be used also for
StringUtils.xmlAttribEncodeBinary(). escapeForXML() now uses a
switch/case statement, which should leave the (JIT) compiler more room
for optimizations.
Removing the "do not escape unicode character references", because
this behavior, introduced with
8264ebdfb5
, is incorrect.
553 lines
20 KiB
Java
553 lines
20 KiB
Java
/**
|
|
*
|
|
* Copyright 2003-2007 Jive Software.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package org.jivesoftware.smack.util;
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.security.MessageDigest;
|
|
import java.security.NoSuchAlgorithmException;
|
|
import java.util.Random;
|
|
import java.util.logging.Level;
|
|
import java.util.logging.Logger;
|
|
|
|
/**
|
|
* A collection of utility methods for String objects.
|
|
*/
|
|
public class StringUtils {
|
|
private static final Logger LOGGER = Logger.getLogger(StringUtils.class.getName());
|
|
|
|
public static final String QUOTE_ENCODE = """;
|
|
public static final String APOS_ENCODE = "'";
|
|
public static final String AMP_ENCODE = "&";
|
|
public static final String LT_ENCODE = "<";
|
|
public static final String GT_ENCODE = ">";
|
|
|
|
/**
|
|
* Returns the name portion of a XMPP address. For example, for the
|
|
* address "matt@jivesoftware.com/Smack", "matt" would be returned. If no
|
|
* username is present in the address, the empty string will be returned.
|
|
*
|
|
* @param XMPPAddress the XMPP address.
|
|
* @return the name portion of the XMPP address.
|
|
*/
|
|
public static String parseName(String XMPPAddress) {
|
|
if (XMPPAddress == null) {
|
|
return null;
|
|
}
|
|
int atIndex = XMPPAddress.lastIndexOf("@");
|
|
if (atIndex <= 0) {
|
|
return "";
|
|
}
|
|
else {
|
|
return XMPPAddress.substring(0, atIndex);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the server portion of a XMPP address. For example, for the
|
|
* address "matt@jivesoftware.com/Smack", "jivesoftware.com" would be returned.
|
|
* If no server is present in the address, the empty string will be returned.
|
|
*
|
|
* @param XMPPAddress the XMPP address.
|
|
* @return the server portion of the XMPP address.
|
|
*/
|
|
public static String parseServer(String XMPPAddress) {
|
|
if (XMPPAddress == null) {
|
|
return null;
|
|
}
|
|
int atIndex = XMPPAddress.lastIndexOf("@");
|
|
// If the String ends with '@', return the empty string.
|
|
if (atIndex + 1 > XMPPAddress.length()) {
|
|
return "";
|
|
}
|
|
int slashIndex = XMPPAddress.indexOf("/");
|
|
if (slashIndex > 0 && slashIndex > atIndex) {
|
|
return XMPPAddress.substring(atIndex + 1, slashIndex);
|
|
}
|
|
else {
|
|
return XMPPAddress.substring(atIndex + 1);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the resource portion of a XMPP address. For example, for the
|
|
* address "matt@jivesoftware.com/Smack", "Smack" would be returned. If no
|
|
* resource is present in the address, the empty string will be returned.
|
|
*
|
|
* @param XMPPAddress the XMPP address.
|
|
* @return the resource portion of the XMPP address.
|
|
*/
|
|
public static String parseResource(String XMPPAddress) {
|
|
if (XMPPAddress == null) {
|
|
return null;
|
|
}
|
|
int slashIndex = XMPPAddress.indexOf("/");
|
|
if (slashIndex + 1 > XMPPAddress.length() || slashIndex < 0) {
|
|
return "";
|
|
}
|
|
else {
|
|
return XMPPAddress.substring(slashIndex + 1);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the XMPP address with any resource information removed. For example,
|
|
* for the address "matt@jivesoftware.com/Smack", "matt@jivesoftware.com" would
|
|
* be returned.
|
|
*
|
|
* @param XMPPAddress the XMPP address.
|
|
* @return the bare XMPP address without resource information.
|
|
*/
|
|
public static String parseBareAddress(String XMPPAddress) {
|
|
if (XMPPAddress == null) {
|
|
return null;
|
|
}
|
|
int slashIndex = XMPPAddress.indexOf("/");
|
|
if (slashIndex < 0) {
|
|
return XMPPAddress;
|
|
}
|
|
else if (slashIndex == 0) {
|
|
return "";
|
|
}
|
|
else {
|
|
return XMPPAddress.substring(0, slashIndex);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns true if jid is a full JID (i.e. a JID with resource part).
|
|
*
|
|
* @param jid
|
|
* @return true if full JID, false otherwise
|
|
*/
|
|
public static boolean isFullJID(String jid) {
|
|
if (parseName(jid).length() <= 0 || parseServer(jid).length() <= 0
|
|
|| parseResource(jid).length() <= 0) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Escapes the node portion of a JID according to "JID Escaping" (JEP-0106).
|
|
* Escaping replaces characters prohibited by node-prep with escape sequences,
|
|
* as follows:<p>
|
|
*
|
|
* <table border="1">
|
|
* <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr>
|
|
* <tr><td><space></td><td>\20</td></tr>
|
|
* <tr><td>"</td><td>\22</td></tr>
|
|
* <tr><td>&</td><td>\26</td></tr>
|
|
* <tr><td>'</td><td>\27</td></tr>
|
|
* <tr><td>/</td><td>\2f</td></tr>
|
|
* <tr><td>:</td><td>\3a</td></tr>
|
|
* <tr><td><</td><td>\3c</td></tr>
|
|
* <tr><td>></td><td>\3e</td></tr>
|
|
* <tr><td>@</td><td>\40</td></tr>
|
|
* <tr><td>\</td><td>\5c</td></tr>
|
|
* </table><p>
|
|
*
|
|
* This process is useful when the node comes from an external source that doesn't
|
|
* conform to nodeprep. For example, a username in LDAP may be "Joe Smith". Because
|
|
* the <space> character isn't a valid part of a node, the username should
|
|
* be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com"
|
|
* after case-folding, etc. has been applied).<p>
|
|
*
|
|
* All node escaping and un-escaping must be performed manually at the appropriate
|
|
* time; the JID class will not escape or un-escape automatically.
|
|
*
|
|
* @param node the node.
|
|
* @return the escaped version of the node.
|
|
*/
|
|
public static String escapeNode(String node) {
|
|
if (node == null) {
|
|
return null;
|
|
}
|
|
StringBuilder buf = new StringBuilder(node.length() + 8);
|
|
for (int i=0, n=node.length(); i<n; i++) {
|
|
char c = node.charAt(i);
|
|
switch (c) {
|
|
case '"': buf.append("\\22"); break;
|
|
case '&': buf.append("\\26"); break;
|
|
case '\'': buf.append("\\27"); break;
|
|
case '/': buf.append("\\2f"); break;
|
|
case ':': buf.append("\\3a"); break;
|
|
case '<': buf.append("\\3c"); break;
|
|
case '>': buf.append("\\3e"); break;
|
|
case '@': buf.append("\\40"); break;
|
|
case '\\': buf.append("\\5c"); break;
|
|
default: {
|
|
if (Character.isWhitespace(c)) {
|
|
buf.append("\\20");
|
|
}
|
|
else {
|
|
buf.append(c);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return buf.toString();
|
|
}
|
|
|
|
/**
|
|
* Un-escapes the node portion of a JID according to "JID Escaping" (JEP-0106).<p>
|
|
* Escaping replaces characters prohibited by node-prep with escape sequences,
|
|
* as follows:<p>
|
|
*
|
|
* <table border="1">
|
|
* <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr>
|
|
* <tr><td><space></td><td>\20</td></tr>
|
|
* <tr><td>"</td><td>\22</td></tr>
|
|
* <tr><td>&</td><td>\26</td></tr>
|
|
* <tr><td>'</td><td>\27</td></tr>
|
|
* <tr><td>/</td><td>\2f</td></tr>
|
|
* <tr><td>:</td><td>\3a</td></tr>
|
|
* <tr><td><</td><td>\3c</td></tr>
|
|
* <tr><td>></td><td>\3e</td></tr>
|
|
* <tr><td>@</td><td>\40</td></tr>
|
|
* <tr><td>\</td><td>\5c</td></tr>
|
|
* </table><p>
|
|
*
|
|
* This process is useful when the node comes from an external source that doesn't
|
|
* conform to nodeprep. For example, a username in LDAP may be "Joe Smith". Because
|
|
* the <space> character isn't a valid part of a node, the username should
|
|
* be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com"
|
|
* after case-folding, etc. has been applied).<p>
|
|
*
|
|
* All node escaping and un-escaping must be performed manually at the appropriate
|
|
* time; the JID class will not escape or un-escape automatically.
|
|
*
|
|
* @param node the escaped version of the node.
|
|
* @return the un-escaped version of the node.
|
|
*/
|
|
public static String unescapeNode(String node) {
|
|
if (node == null) {
|
|
return null;
|
|
}
|
|
char [] nodeChars = node.toCharArray();
|
|
StringBuilder buf = new StringBuilder(nodeChars.length);
|
|
for (int i=0, n=nodeChars.length; i<n; i++) {
|
|
compare: {
|
|
char c = node.charAt(i);
|
|
if (c == '\\' && i+2<n) {
|
|
char c2 = nodeChars[i+1];
|
|
char c3 = nodeChars[i+2];
|
|
if (c2 == '2') {
|
|
switch (c3) {
|
|
case '0': buf.append(' '); i+=2; break compare;
|
|
case '2': buf.append('"'); i+=2; break compare;
|
|
case '6': buf.append('&'); i+=2; break compare;
|
|
case '7': buf.append('\''); i+=2; break compare;
|
|
case 'f': buf.append('/'); i+=2; break compare;
|
|
}
|
|
}
|
|
else if (c2 == '3') {
|
|
switch (c3) {
|
|
case 'a': buf.append(':'); i+=2; break compare;
|
|
case 'c': buf.append('<'); i+=2; break compare;
|
|
case 'e': buf.append('>'); i+=2; break compare;
|
|
}
|
|
}
|
|
else if (c2 == '4') {
|
|
if (c3 == '0') {
|
|
buf.append("@");
|
|
i+=2;
|
|
break compare;
|
|
}
|
|
}
|
|
else if (c2 == '5') {
|
|
if (c3 == 'c') {
|
|
buf.append("\\");
|
|
i+=2;
|
|
break compare;
|
|
}
|
|
}
|
|
}
|
|
buf.append(c);
|
|
}
|
|
}
|
|
return buf.toString();
|
|
}
|
|
|
|
/**
|
|
* Returns a string representing a XML attribute. The value parameter is escaped as necessary. In particular,
|
|
* white spaces are encoded as character references, such that they are not replaced by ' ' on parsing.
|
|
* @param name name of the XML attribute
|
|
* @param value value of the XML attribute
|
|
*/
|
|
public static String xmlAttrib(String name, String value) {
|
|
return name + "=\"" + escapeForXML(value, true) + "\"";
|
|
}
|
|
|
|
|
|
/**
|
|
* Escapes all necessary characters in the String so that it can be used
|
|
* in an XML doc.
|
|
*
|
|
* @param string the string to escape.
|
|
* @return the string with appropriate characters escaped.
|
|
*/
|
|
public static String escapeForXML(String string) {
|
|
return escapeForXML(string, false);
|
|
}
|
|
|
|
public static String escapeForXML(final String string, final boolean escapeWhitespace) {
|
|
if (string == null) {
|
|
return null;
|
|
}
|
|
final char[] input = string.toCharArray();
|
|
final int len = input.length;
|
|
final StringBuilder out = new StringBuilder((int)(len*1.3));
|
|
CharSequence toAppend;
|
|
char ch;
|
|
int last = 0;
|
|
int i = 0;
|
|
while (i < len) {
|
|
toAppend = null;
|
|
ch = input[i];
|
|
switch(ch) {
|
|
case '<':
|
|
toAppend = LT_ENCODE;
|
|
break;
|
|
case '>':
|
|
toAppend = GT_ENCODE;
|
|
break;
|
|
case '&':
|
|
toAppend = AMP_ENCODE;
|
|
break;
|
|
case '"':
|
|
toAppend = QUOTE_ENCODE;
|
|
break;
|
|
case '\'':
|
|
toAppend = APOS_ENCODE;
|
|
break;
|
|
default:
|
|
// includes \t, \n, \r
|
|
if (escapeWhitespace && (ch <= 0x1f || (0x7f <= ch && ch <= 0x9f))) {
|
|
StringBuilder sb = new StringBuilder();
|
|
sb.append("&#x");
|
|
sb.append(String.format("%X", (int) ch));
|
|
sb.append(';');
|
|
toAppend = sb;
|
|
}
|
|
break;
|
|
}
|
|
if (toAppend != null) {
|
|
if (i > last) {
|
|
out.append(input, last, i - last);
|
|
}
|
|
out.append(toAppend);
|
|
last = ++i;
|
|
} else {
|
|
i++;
|
|
}
|
|
}
|
|
if (last == 0) {
|
|
return string;
|
|
}
|
|
if (i > last) {
|
|
out.append(input, last, i - last);
|
|
}
|
|
return out.toString();
|
|
}
|
|
|
|
/**
|
|
* Used by the hash method.
|
|
*/
|
|
private static MessageDigest digest = null;
|
|
|
|
/**
|
|
* Hashes a String using the SHA-1 algorithm and returns the result as a
|
|
* String of hexadecimal numbers. This method is synchronized to avoid
|
|
* excessive MessageDigest object creation. If calling this method becomes
|
|
* a bottleneck in your code, you may wish to maintain a pool of
|
|
* MessageDigest objects instead of using this method.
|
|
* <p>
|
|
* A hash is a one-way function -- that is, given an
|
|
* input, an output is easily computed. However, given the output, the
|
|
* input is almost impossible to compute. This is useful for passwords
|
|
* since we can store the hash and a hacker will then have a very hard time
|
|
* determining the original password.
|
|
*
|
|
* @param data the String to compute the hash of.
|
|
* @return a hashed version of the passed-in String
|
|
*/
|
|
public synchronized static String hash(String data) {
|
|
if (digest == null) {
|
|
try {
|
|
digest = MessageDigest.getInstance("SHA-1");
|
|
}
|
|
catch (NoSuchAlgorithmException nsae) {
|
|
LOGGER.log(Level.SEVERE, "Failed to load the SHA-1 MessageDigest. Smack will be unable to function normally.", nsae);
|
|
}
|
|
}
|
|
// Now, compute hash.
|
|
try {
|
|
digest.update(data.getBytes("UTF-8"));
|
|
}
|
|
catch (UnsupportedEncodingException e) {
|
|
LOGGER.log(Level.SEVERE, "Error computing hash", e);
|
|
}
|
|
return encodeHex(digest.digest());
|
|
}
|
|
|
|
/**
|
|
* Encodes an array of bytes as String representation of hexadecimal.
|
|
*
|
|
* @param bytes an array of bytes to convert to a hex string.
|
|
* @return generated hex string.
|
|
*/
|
|
public static String encodeHex(byte[] bytes) {
|
|
StringBuilder hex = new StringBuilder(bytes.length * 2);
|
|
|
|
for (byte aByte : bytes) {
|
|
if (((int) aByte & 0xff) < 0x10) {
|
|
hex.append("0");
|
|
}
|
|
hex.append(Integer.toString((int) aByte & 0xff, 16));
|
|
}
|
|
|
|
return hex.toString();
|
|
}
|
|
|
|
/**
|
|
* Encodes a String as a base64 String.
|
|
*
|
|
* @param data a String to encode.
|
|
* @return a base64 encoded String.
|
|
*/
|
|
public static String encodeBase64(String data) {
|
|
byte [] bytes = null;
|
|
try {
|
|
bytes = data.getBytes("ISO-8859-1");
|
|
}
|
|
catch (UnsupportedEncodingException uee) {
|
|
throw new IllegalStateException(uee);
|
|
}
|
|
return encodeBase64(bytes);
|
|
}
|
|
|
|
/**
|
|
* Encodes a byte array into a base64 String.
|
|
*
|
|
* @param data a byte array to encode.
|
|
* @return a base64 encode String.
|
|
*/
|
|
public static String encodeBase64(byte[] data) {
|
|
return encodeBase64(data, false);
|
|
}
|
|
|
|
/**
|
|
* Encodes a byte array into a bse64 String.
|
|
*
|
|
* @param data The byte arry to encode.
|
|
* @param lineBreaks True if the encoding should contain line breaks and false if it should not.
|
|
* @return A base64 encoded String.
|
|
*/
|
|
public static String encodeBase64(byte[] data, boolean lineBreaks) {
|
|
return encodeBase64(data, 0, data.length, lineBreaks);
|
|
}
|
|
|
|
/**
|
|
* Encodes a byte array into a bse64 String.
|
|
*
|
|
* @param data The byte arry to encode.
|
|
* @param offset the offset of the bytearray to begin encoding at.
|
|
* @param len the length of bytes to encode.
|
|
* @param lineBreaks True if the encoding should contain line breaks and false if it should not.
|
|
* @return A base64 encoded String.
|
|
*/
|
|
public static String encodeBase64(byte[] data, int offset, int len, boolean lineBreaks) {
|
|
return Base64.encodeBytes(data, offset, len, (lineBreaks ? Base64.NO_OPTIONS : Base64.DONT_BREAK_LINES));
|
|
}
|
|
|
|
/**
|
|
* Decodes a base64 String.
|
|
* Unlike Base64.decode() this method does not try to detect and decompress a gzip-compressed input.
|
|
*
|
|
* @param data a base64 encoded String to decode.
|
|
* @return the decoded String.
|
|
*/
|
|
public static byte[] decodeBase64(String data) {
|
|
byte[] bytes;
|
|
try {
|
|
bytes = data.getBytes("UTF-8");
|
|
} catch (java.io.UnsupportedEncodingException uee) {
|
|
bytes = data.getBytes();
|
|
}
|
|
|
|
bytes = Base64.decode(bytes, 0, bytes.length, Base64.NO_OPTIONS);
|
|
return bytes;
|
|
}
|
|
|
|
/**
|
|
* Pseudo-random number generator object for use with randomString().
|
|
* The Random class is not considered to be cryptographically secure, so
|
|
* only use these random Strings for low to medium security applications.
|
|
*/
|
|
private static Random randGen = new Random();
|
|
|
|
/**
|
|
* Array of numbers and letters of mixed case. Numbers appear in the list
|
|
* twice so that there is a more equal chance that a number will be picked.
|
|
* We can use the array to get a random number or letter by picking a random
|
|
* array index.
|
|
*/
|
|
private static char[] numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +
|
|
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
|
|
|
|
/**
|
|
* Returns a random String of numbers and letters (lower and upper case)
|
|
* of the specified length. The method uses the Random class that is
|
|
* built-in to Java which is suitable for low to medium grade security uses.
|
|
* This means that the output is only pseudo random, i.e., each number is
|
|
* mathematically generated so is not truly random.<p>
|
|
*
|
|
* The specified length must be at least one. If not, the method will return
|
|
* null.
|
|
*
|
|
* @param length the desired length of the random String to return.
|
|
* @return a random String of numbers and letters of the specified length.
|
|
*/
|
|
public static String randomString(int length) {
|
|
if (length < 1) {
|
|
return null;
|
|
}
|
|
// Create a char buffer to put random letters and numbers in.
|
|
char [] randBuffer = new char[length];
|
|
for (int i=0; i<randBuffer.length; i++) {
|
|
randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
|
|
}
|
|
return new String(randBuffer);
|
|
}
|
|
|
|
/**
|
|
* Returns true if string is not null and is not empty, false otherwise
|
|
* Examples:
|
|
* isNotEmpty(null) - false
|
|
* isNotEmpty("") - false
|
|
* isNotEmpty(" ") - true
|
|
* isNotEmpty("empty") - true
|
|
*
|
|
* @param string checked String
|
|
* @return true if string is not null and is not empty, false otherwise
|
|
*/
|
|
public static boolean isNotEmpty(CharSequence string) {
|
|
return string != null && string.length() != 0;
|
|
}
|
|
}
|