001/*
002 * Licensed under the Apache License, Version 2.0 (the "License");
003 * you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at
005 *
006 *     http://www.apache.org/licenses/LICENSE-2.0
007 *
008 * Unless required by applicable law or agreed to in writing, software
009 * distributed under the License is distributed on an "AS IS" BASIS,
010 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011 * See the License for the specific language governing permissions and
012 * limitations under the License.
013 */
014package org.gbif.ws.util;
015
016import java.util.regex.Matcher;
017import java.util.regex.Pattern;
018
019import org.apache.commons.lang3.StringUtils;
020import org.slf4j.Logger;
021import org.slf4j.LoggerFactory;
022
023/**
024 * Class with utility methods for XSS filtering.
025 */
026public class XSSUtil {
027  private static final Logger LOG = LoggerFactory.getLogger(XSSUtil.class);
028
029  private XSSUtil() {
030    // empty private constructor
031  }
032
033  private static final Pattern NULL_CHAR = Pattern.compile("\0");
034  private static final Pattern[] PATTERNS =
035      new Pattern[] {
036        // Avoid anything in a <script> type of expression
037        Pattern.compile("<script>(.*?)</script>", Pattern.CASE_INSENSITIVE),
038        // Avoid anything in a src='...' type of expression
039        Pattern.compile(
040            "src[\r\n]*=[\r\n]*\\\'(.*?)\\\'",
041            Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
042        Pattern.compile(
043            "src[\r\n]*=[\r\n]*\\\"(.*?)\\\"",
044            Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
045        // Remove any lonesome </script> tag
046        Pattern.compile("</script>", Pattern.CASE_INSENSITIVE),
047        // Avoid anything in a <iframe> type of expression
048        Pattern.compile("<iframe>(.*?)</iframe>", Pattern.CASE_INSENSITIVE),
049        // Remove any lonesome <script ...> tag
050        Pattern.compile(
051            "<script(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
052        // Remove any lonesome <img ...> tag
053        Pattern.compile(
054            "<img(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
055        // Avoid eval(...) expressions
056        Pattern.compile(
057            "eval\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
058        // Avoid expression(...) expressions
059        Pattern.compile(
060            "expression\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL),
061        // Avoid javascript:... expressions
062        Pattern.compile("javascript:", Pattern.CASE_INSENSITIVE),
063        // Avoid vbscript:... expressions
064        Pattern.compile("vbscript:", Pattern.CASE_INSENSITIVE),
065        // Avoid onload= expressions
066        Pattern.compile(
067            "on(load|error|mouseover|submit|reset|focus|click)(.*?)=",
068            Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL)
069      };
070
071  /**
072   * Method tests whether a string contains malicious XSS script or not.
073   *
074   * @param value decoded string to test
075   *
076   * @return true if string matches at least one XSS pattern, or false otherwise
077   */
078  public static boolean containsXSS(String value) {
079    if (value != null) {
080
081      // Avoid null characters
082      String cleanValue = NULL_CHAR.matcher(value).replaceAll("");
083
084      // Remove all sections that match a pattern
085      for (Pattern scriptPattern : PATTERNS) {
086        Matcher matcher = scriptPattern.matcher(cleanValue);
087        if (matcher.find()) {
088          LOG.warn("Potentially malicious XSS script found: {}", cleanValue);
089          return true;
090        }
091      }
092    }
093    return false;
094  }
095
096  /**
097   * Strip all instances of all XXS patterns that match.
098   *
099   * @param value
100   * @return
101   */
102  public static String stripXSS(String value) {
103
104    if (value == null) {
105      return null;
106    }
107
108    // Avoid null characters
109    String cleanValue = NULL_CHAR.matcher(value).replaceAll("");
110    if (StringUtils.isBlank(cleanValue)) {
111      return cleanValue;
112    }
113
114    // Remove all sections that match a pattern
115    for (Pattern scriptPattern : PATTERNS) {
116      Matcher matcher = scriptPattern.matcher(cleanValue);
117      cleanValue = matcher.replaceAll("");
118    }
119    return cleanValue;
120  }
121}