diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php index 663158e1ef3..c622ebf1aba 100644 --- a/htdocs/core/lib/functions.lib.php +++ b/htdocs/core/lib/functions.lib.php @@ -778,6 +778,16 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options = do { $oldstringtoclean = $out; + if (!empty($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML)) { + $dom = new DOMDocument; + try { + $dom->loadHTML($out, LIBXML_ERR_NONE|LIBXML_HTML_NOIMPLIED|LIBXML_HTML_NODEFDTD|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOXMLDECL); + } catch(Exception $e) { + return 'InvalidHTMLString'; + } + $out = $dom->saveHTML(); + } + // Ckeditor use the numeric entitic for apostrophe so we force it to text entity (all other special chars are correctly // encoded using text entities). This is a fix for CKeditor. $out = preg_replace('/'/i', ''', $out); @@ -794,7 +804,8 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options = // We should also exclude non expected attributes if (!empty($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES)) { - $out = dol_string_onlythesehtmlattributes($out); + // Warning, the function may add a LF so we are forced to trim to compare with old $out without having always a difference and an infinit loop. + $out = trim(dol_string_onlythesehtmlattributes($out)); } } while ($oldstringtoclean != $out); break; @@ -6311,7 +6322,7 @@ function dol_string_onlythesehtmltags($stringtoclean, $cleanalsosomestyles = 1, $stringtoclean = preg_replace('/:|�+58|:/i', '', $stringtoclean); // refused string ':' encoded (no reason to have a : encoded like this) to disable 'javascript:...' $stringtoclean = preg_replace('/javascript\s*:/i', '', $stringtoclean); - $temp = strip_tags($stringtoclean, $allowed_tags_string); + $temp = strip_tags($stringtoclean, $allowed_tags_string); // Warning: This remove also undesired changing string obfuscated with that pass injection detection into harmfull string if ($cleanalsosomestyles) { // Clean for remaining html tags $temp = preg_replace('/position\s*:\s*(absolute|fixed)\s*!\s*important/i', '', $temp); // Note: If hacker try to introduce css comment into string to bypass this regex, the string must also be encoded by the dol_htmlentitiesbr during output so it become harmless @@ -6361,8 +6372,8 @@ function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes } $return = $dom->saveHTML(); - //$return = 'aaaa

bb

ssdd

'."\n

aaa

aa

bb

"; + $return = preg_replace('/^/', '', $return); $return = preg_replace('/<\/body><\/html>$/', '', $return); return $return; diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php index 16cc8438c91..8e99a03021e 100644 --- a/test/phpunit/SecurityTest.php +++ b/test/phpunit/SecurityTest.php @@ -363,6 +363,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase $_POST["param13"]='n n > < " XSS'; $_POST["param13b"]='n n > < " XSS'; $_POST["param14"]="Text with ' encoded with the numeric html entity converted into text entity ' (like when submited by CKEditor)"; + $_POST["param15"]=" src=>0xbeefed"; //$_POST["param13"]='javascript%26colon%26%23x3B%3Balert(1)'; //$_POST["param14"]='javascripT&javascript#x3a alert(1)'; @@ -480,7 +481,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase // Test with restricthtml we must remove html open/close tag and content but not htmlentities (we can decode html entities for ascii chars like n) $result=GETPOST("param6", 'restricthtml'); - print __METHOD__." result=".$result."\n"; + print __METHOD__." result param6=".$result."\n"; $this->assertEquals('">', $result); $result=GETPOST("param7", 'restricthtml'); @@ -503,6 +504,29 @@ class SecurityTest extends PHPUnit\Framework\TestCase print __METHOD__." result=".$result."\n"; $this->assertEquals("Text with ' encoded with the numeric html entity converted into text entity ' (like when submited by CKEditor)", $result, 'Test 14'); + $result=GETPOST("param15", 'restricthtml'); // src=>0xbeefed + print __METHOD__." result=".$result."\n"; + $this->assertEquals("0xbeefed", $result, 'Test 15a'); // The GETPOST return a harmull string + + // Test with restricthtml + MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES to test disabling of bad atrributes + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1; + + $result=GETPOST("param15", 'restricthtml'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('InvalidHTMLString', $result, 'Test 15b'); + + unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML); + + // Test with restricthtml + MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES to test disabling of bad atrributes + $conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1; + + $result=GETPOST("param15", 'restricthtml'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('0xbeefed', $result, 'Test 15b'); + + unset($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES); + + // Special test for GETPOST of backtopage, backtolist or backtourl parameter $_POST["backtopage"]='//www.google.com';