diff --git a/system/libraries/Input.php b/system/libraries/Input.php index c6c84fca..2bef3ff4 100644 --- a/system/libraries/Input.php +++ b/system/libraries/Input.php @@ -334,33 +334,37 @@ class Input_Core { // * Removed parentheses where possible // * Split up alternation alternatives // * Made some quantifiers possessive - - // Fix &entity\n; - $data = str_replace(array('&','<','>'), array('&amp;','&lt;','&gt;'), $data); - $data = preg_replace('/(&#*\w+)[\x00-\x20]+;/u', '$1;', $data); - $data = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $data); - $data = html_entity_decode($data, ENT_COMPAT, 'UTF-8'); - - // Remove any attribute starting with "on" or xmlns - $data = preg_replace('#(?:on[a-z]+|xmlns)\s*=\s*[\'"\x00-\x20]?[^\'>"]*[\'"\x00-\x20]?\s?#iu', '', $data); - - // Remove javascript: and vbscript: protocols - $data = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $data); - $data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $data); - $data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $data); - - // Only works in IE: - $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $data); - $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $data); - $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $data); - - // Remove namespaced elements (we do not need them) - $data = preg_replace('#]*+>#i', '', $data); + // + // Gallery Modifications: + // * Wrap the loop around all the changes to detect nested exploits do { - // Remove really unwanted tags $old_data = $data; + + // Fix &entity\n; + $data = str_replace(array('&','<','>'), array('&amp;','&lt;','&gt;'), $data); + $data = preg_replace('/(&#*\w+)[\x00-\x20]+;/u', '$1;', $data); + $data = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $data); + $data = html_entity_decode($data, ENT_COMPAT, 'UTF-8'); + + // Remove any attribute starting with "on" or xmlns + $data = preg_replace('#(?:on[a-z]+|xmlns)\s*=\s*[\'"\x00-\x20]?[^\'>"]*[\'"\x00-\x20]?\s?#iu', '', $data); + + // Remove javascript: and vbscript: protocols + $data = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $data); + $data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $data); + $data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $data); + + // Only works in IE: + $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $data); + $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $data); + $data = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $data); + + // Remove namespaced elements (we do not need them) + $data = preg_replace('#]*+>#i', '', $data); + + // Remove really unwanted tags $data = preg_replace('#]*+>#i', '', $data); } while ($old_data !== $data);