diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php index 663158e1ef3..c622ebf1aba 100644 --- a/htdocs/core/lib/functions.lib.php +++ b/htdocs/core/lib/functions.lib.php @@ -778,6 +778,16 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options = do { $oldstringtoclean = $out; + if (!empty($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML)) { + $dom = new DOMDocument; + try { + $dom->loadHTML($out, LIBXML_ERR_NONE|LIBXML_HTML_NOIMPLIED|LIBXML_HTML_NODEFDTD|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOXMLDECL); + } catch(Exception $e) { + return 'InvalidHTMLString'; + } + $out = $dom->saveHTML(); + } + // Ckeditor use the numeric entitic for apostrophe so we force it to text entity (all other special chars are correctly // encoded using text entities). This is a fix for CKeditor. $out = preg_replace('/'/i', ''', $out); @@ -794,7 +804,8 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options = // We should also exclude non expected attributes if (!empty($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES)) { - $out = dol_string_onlythesehtmlattributes($out); + // Warning, the function may add a LF so we are forced to trim to compare with old $out without having always a difference and an infinit loop. + $out = trim(dol_string_onlythesehtmlattributes($out)); } } while ($oldstringtoclean != $out); break; @@ -6311,7 +6322,7 @@ function dol_string_onlythesehtmltags($stringtoclean, $cleanalsosomestyles = 1, $stringtoclean = preg_replace('/:|+58|:/i', '', $stringtoclean); // refused string ':' encoded (no reason to have a : encoded like this) to disable 'javascript:...' $stringtoclean = preg_replace('/javascript\s*:/i', '', $stringtoclean); - $temp = strip_tags($stringtoclean, $allowed_tags_string); + $temp = strip_tags($stringtoclean, $allowed_tags_string); // Warning: This remove also undesired > changing string obfuscated with > that pass injection detection into harmfull string if ($cleanalsosomestyles) { // Clean for remaining html tags $temp = preg_replace('/position\s*:\s*(absolute|fixed)\s*!\s*important/i', '', $temp); // Note: If hacker try to introduce css comment into string to bypass this regex, the string must also be encoded by the dol_htmlentitiesbr during output so it become harmless @@ -6361,8 +6372,8 @@ function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes } $return = $dom->saveHTML(); - //$return = '
aaaabbssdd
'."\naaa
aabb
"; + $return = preg_replace('/^/', '', $return); $return = preg_replace('/<\/body><\/html>$/', '', $return); return $return; diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php index 16cc8438c91..8e99a03021e 100644 --- a/test/phpunit/SecurityTest.php +++ b/test/phpunit/SecurityTest.php @@ -363,6 +363,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase $_POST["param13"]='n n > < " XSS'; $_POST["param13b"]='n n > < " XSS'; $_POST["param14"]="Text with ' encoded with the numeric html entity converted into text entity ' (like when submited by CKEditor)"; + $_POST["param15"]="