From 0dfa7bdbcc93704333576aa5e634301be74976d0 Mon Sep 17 00:00:00 2001
From: Laurent Destailleur
Date: Tue, 6 Jul 2021 00:47:43 +0200
Subject: [PATCH] Add option MAIN_RESTRICTHTML_ONLY_VALID_HTML
---
htdocs/core/lib/functions.lib.php | 17 ++++++++++++++---
test/phpunit/SecurityTest.php | 26 +++++++++++++++++++++++++-
2 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php
index 663158e1ef3..c622ebf1aba 100644
--- a/htdocs/core/lib/functions.lib.php
+++ b/htdocs/core/lib/functions.lib.php
@@ -778,6 +778,16 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options =
do {
$oldstringtoclean = $out;
+ if (!empty($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML)) {
+ $dom = new DOMDocument;
+ try {
+ $dom->loadHTML($out, LIBXML_ERR_NONE|LIBXML_HTML_NOIMPLIED|LIBXML_HTML_NODEFDTD|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOXMLDECL);
+ } catch(Exception $e) {
+ return 'InvalidHTMLString';
+ }
+ $out = $dom->saveHTML();
+ }
+
// Ckeditor use the numeric entitic for apostrophe so we force it to text entity (all other special chars are correctly
// encoded using text entities). This is a fix for CKeditor.
$out = preg_replace('/'/i', ''', $out);
@@ -794,7 +804,8 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options =
// We should also exclude non expected attributes
if (!empty($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES)) {
- $out = dol_string_onlythesehtmlattributes($out);
+ // Warning, the function may add a LF so we are forced to trim to compare with old $out without having always a difference and an infinit loop.
+ $out = trim(dol_string_onlythesehtmlattributes($out));
}
} while ($oldstringtoclean != $out);
break;
@@ -6311,7 +6322,7 @@ function dol_string_onlythesehtmltags($stringtoclean, $cleanalsosomestyles = 1,
$stringtoclean = preg_replace('/:|+58|:/i', '', $stringtoclean); // refused string ':' encoded (no reason to have a : encoded like this) to disable 'javascript:...'
$stringtoclean = preg_replace('/javascript\s*:/i', '', $stringtoclean);
- $temp = strip_tags($stringtoclean, $allowed_tags_string);
+ $temp = strip_tags($stringtoclean, $allowed_tags_string); // Warning: This remove also undesired > changing string obfuscated with > that pass injection detection into harmfull string
if ($cleanalsosomestyles) { // Clean for remaining html tags
$temp = preg_replace('/position\s*:\s*(absolute|fixed)\s*!\s*important/i', '', $temp); // Note: If hacker try to introduce css comment into string to bypass this regex, the string must also be encoded by the dol_htmlentitiesbr during output so it become harmless
@@ -6361,8 +6372,8 @@ function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes
}
$return = $dom->saveHTML();
-
//$return = 'aaaa
bbssdd
'."\naaa
aabb
";
+
$return = preg_replace('/^/', '', $return);
$return = preg_replace('/<\/body><\/html>$/', '', $return);
return $return;
diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php
index 16cc8438c91..8e99a03021e 100644
--- a/test/phpunit/SecurityTest.php
+++ b/test/phpunit/SecurityTest.php
@@ -363,6 +363,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
$_POST["param13"]='n n > < " XSS';
$_POST["param13b"]='n n > < " XSS';
$_POST["param14"]="Text with ' encoded with the numeric html entity converted into text entity ' (like when submited by CKEditor)";
+ $_POST["param15"]="
src=>0xbeefed";
//$_POST["param13"]='javascript%26colon%26%23x3B%3Balert(1)';
//$_POST["param14"]='javascripT&javascript#x3a alert(1)';
@@ -480,7 +481,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
// Test with restricthtml we must remove html open/close tag and content but not htmlentities (we can decode html entities for ascii chars like n)
$result=GETPOST("param6", 'restricthtml');
- print __METHOD__." result=".$result."\n";
+ print __METHOD__." result param6=".$result."\n";
$this->assertEquals('">', $result);
$result=GETPOST("param7", 'restricthtml');
@@ -503,6 +504,29 @@ class SecurityTest extends PHPUnit\Framework\TestCase
print __METHOD__." result=".$result."\n";
$this->assertEquals("Text with ' encoded with the numeric html entity converted into text entity ' (like when submited by CKEditor)", $result, 'Test 14');
+ $result=GETPOST("param15", 'restricthtml'); //
src=>0xbeefed
+ print __METHOD__." result=".$result."\n";
+ $this->assertEquals("
0xbeefed", $result, 'Test 15a'); // The GETPOST return a harmull string
+
+ // Test with restricthtml + MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES to test disabling of bad atrributes
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
+
+ $result=GETPOST("param15", 'restricthtml');
+ print __METHOD__." result=".$result."\n";
+ $this->assertEquals('InvalidHTMLString', $result, 'Test 15b');
+
+ unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML);
+
+ // Test with restricthtml + MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES to test disabling of bad atrributes
+ $conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1;
+
+ $result=GETPOST("param15", 'restricthtml');
+ print __METHOD__." result=".$result."\n";
+ $this->assertEquals('
0xbeefed', $result, 'Test 15b');
+
+ unset($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES);
+
+
// Special test for GETPOST of backtopage, backtolist or backtourl parameter
$_POST["backtopage"]='//www.google.com';