package lib; import java.net.URLEncoder; import java.io.UnsupportedEncodingException; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.regex.Pattern; import java.util.regex.Matcher; /** * Convenience methods for escaping special characters related to HTML, XML, * and regular expressions. * *
To keep you safe by default, WEB4J goes to some effort to escape * characters in your data when appropriate, such that you usually * don't need to think too much about escaping special characters. Thus, you * shouldn't need to directly use the services of this class very often. * *
For Model Objects containing free form user input, * it is highly recommended that you use {@link SafeText}, not String. * Free form user input is open to malicious use, such as * Cross Site Scripting * attacks. * Using SafeText will protect you from such attacks, by always escaping * special characters automatically in its toString() method. * *
The following WEB4J classes will automatically escape special characters * for you, when needed : *
This method exists as a defence against Cross Site Scripting (XSS) hacks. * The idea is to neutralize control characters commonly used by scripts, such that * they will not be executed by the browser. This is done by replacing the control * characters with their escaped equivalents. * See {@link hirondelle.web4j.security.SafeText} as well. * *
The following characters are replaced with corresponding * HTML character entities : *
Character | Replacement |
---|---|
< | < |
> | > |
& | & |
" | " |
\t | |
! | ! |
# | # |
$ | $ |
% | % |
' | ' |
( | ( |
) | ) |
* | * |
+ | + |
, | , |
- | - |
. | . |
/ | / |
: | : |
; | ; |
= | = |
? | ? |
@ | @ |
[ | [ |
\ | \ |
] | ] |
^ | ^ |
_ | _ |
` | ` |
{ | { |
| | | |
} | } |
~ | ~ |
Note that JSTL's {@code Replaces all '&' characters with '&'.
*
* An ampersand character may appear in the query string of a URL.
* The ampersand character is indeed valid in a URL.
* However, URLs usually appear as an HREF attribute, and
* such attributes have the additional constraint that ampersands
* must be escaped.
*
* The JSTL Used to ensure that HTTP query strings are in proper form, by escaping
* special characters such as spaces.
*
* It is important to note that if a query string appears in an HREF
* attribute, then there are two issues - ensuring the query string is valid HTTP
* (it is URL-encoded), and ensuring it is valid HTML (ensuring the
* ampersand is escaped).
*/
public static String forURL(String aURLFragment){
String result = null;
try {
result = URLEncoder.encode(aURLFragment, "UTF-8");
}
catch (UnsupportedEncodingException ex){
throw new RuntimeException("UTF-8 not supported", ex);
}
return result;
}
/**
* Escape characters for text appearing as XML data, between tags.
*
* The following characters are replaced with corresponding character entities :
* Note that JSTL's {@code The escaped characters include :
* Synonym for Matcher.quoteReplacement(String).
*
* The following methods use replacement strings which treat
* '$' and '\' as special characters:
* If replacement text can contain arbitrary characters, then you
* will usually need to escape that text, to ensure special characters
* are interpreted literally.
*/
public static String forReplacementString(String aInput){
return Matcher.quoteReplacement(aInput);
}
/**
* Disable all tags in aText.
*
* Insensitive to case.
*/
public static String forScriptTagsOnly(String aText){
String result = null;
Matcher matcher = SCRIPT.matcher(aText);
result = matcher.replaceAll("<SCRIPT>");
matcher = SCRIPT_END.matcher(result);
result = matcher.replaceAll("</SCRIPT>");
return result;
}
// PRIVATE //
private EscapeChars(){
//empty - prevent construction
}
private static final Pattern SCRIPT = Pattern.compile(
"", Pattern.CASE_INSENSITIVE
);
private static void addCharEntity(Integer aIdx, StringBuilder aBuilder){
String padding = "";
if( aIdx <= 9 ){
padding = "00";
}
else if( aIdx <= 99 ){
padding = "0";
}
else {
//no prefix
}
String number = padding + aIdx.toString();
aBuilder.append("" + number + ";");
}
}
*
*
*
* Character Encoding
* < <
* > >
* & &
* " "
* ' '
*
*/
public static String forRegex(String aRegexFragment){
final StringBuilder result = new StringBuilder();
final StringCharacterIterator iterator =
new StringCharacterIterator(aRegexFragment)
;
char character = iterator.current();
while (character != CharacterIterator.DONE ){
/*
* All literals need to have backslashes doubled.
*/
if (character == '.') {
result.append("\\.");
}
else if (character == '\\') {
result.append("\\\\");
}
else if (character == '?') {
result.append("\\?");
}
else if (character == '*') {
result.append("\\*");
}
else if (character == '+') {
result.append("\\+");
}
else if (character == '&') {
result.append("\\&");
}
else if (character == ':') {
result.append("\\:");
}
else if (character == '{') {
result.append("\\{");
}
else if (character == '}') {
result.append("\\}");
}
else if (character == '[') {
result.append("\\[");
}
else if (character == ']') {
result.append("\\]");
}
else if (character == '(') {
result.append("\\(");
}
else if (character == ')') {
result.append("\\)");
}
else if (character == '^') {
result.append("\\^");
}
else if (character == '$') {
result.append("\\$");
}
else {
//the char is not a special one
//add it to the result as is
result.append(character);
}
character = iterator.next();
}
return result.toString();
}
/**
* Escape '$' and '\' characters in replacement strings.
*
*
*
*
*