From 43f9210ab4bfdea73aee6f973138087e85d1f8ae Mon Sep 17 00:00:00 2001 From: Laurent Destailleur Date: Wed, 29 Nov 2023 20:19:21 +0100 Subject: [PATCH] SEC: Add option MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY --- ChangeLog | 1 + htdocs/admin/system/security.php | 10 +- htdocs/core/lib/functions.lib.php | 41 ++++++- test/phpunit/SecurityTest.php | 184 +++++++++++++++++++++++++++--- 4 files changed, 217 insertions(+), 19 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0e3d34b2c77..d0da9d7a87b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -175,6 +175,7 @@ NEW: When an user unset the batch management of products, transformation of each SEC: #25512 applicative anti bruteforce - security on too many login attempts (#25520) SEC: Add action confirm_... as sensitive to need a CSRF token SEC: Disable not used PHP streams +SEC: Add option MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY For developers or integrators: diff --git a/htdocs/admin/system/security.php b/htdocs/admin/system/security.php index 368f6cf5ae2..64f08acbcc9 100644 --- a/htdocs/admin/system/security.php +++ b/htdocs/admin/system/security.php @@ -632,10 +632,16 @@ print '
'; print 'MAIN_SECURITY_MAXFILESIZE_DOWNLOADED = '.getDolGlobalString('MAIN_SECURITY_MAXFILESIZE_DOWNLOADED', ''.$langs->trans("Undefined").'   ('.$langs->trans("Recommended").': 100000000)')."
"; print '
'; -print 'MAIN_RESTRICTHTML_ONLY_VALID_HTML = '.getDolGlobalString('MAIN_RESTRICTHTML_ONLY_VALID_HTML', ''.$langs->trans("Undefined").'   ('.$langs->trans("Recommended").': 1)')."
"; +print 'MAIN_RESTRICTHTML_ONLY_VALID_HTML = '.(getDolGlobalString('MAIN_RESTRICTHTML_ONLY_VALID_HTML') ? '1' : ''.$langs->trans("Undefined").''); +print '   ('.$langs->trans("Recommended").": 1)
"; print '
'; -print 'MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = '.getDolGlobalString('MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES', ''.$langs->trans("Undefined").'   ('.$langs->trans("Recommended").': 1)')."
"; +print 'MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = '.(getDolGlobalString('MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY') ? '1' : ''.$langs->trans("Undefined").''); +print '   ('.$langs->trans("Recommended").': 1)   -   Module "tidy" must be enabled (currently: '.((extension_loaded('tidy') && class_exists("tidy")) ? 'Enabled' : 'Not available').")
"; +print '
'; + +print 'MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = '.(getDolGlobalString('MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES') ? '1' : ''.$langs->trans("Undefined").''); +print '   ('.$langs->trans("Recommended").": 1)
"; print '
'; print 'MAIN_DISALLOW_URL_INTO_DESCRIPTIONS = '.getDolGlobalString('MAIN_DISALLOW_URL_INTO_DESCRIPTIONS', ''.$langs->trans("Undefined").'   ('.$langs->trans("Recommended").': 1)')."
"; diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php index cc020139501..d4bef79259e 100644 --- a/htdocs/core/lib/functions.lib.php +++ b/htdocs/core/lib/functions.lib.php @@ -1704,6 +1704,8 @@ function dol_escape_htmltag($stringtoescape, $keepb = 0, $keepn = 0, $noescapeta { if ($noescapetags == 'common') { $noescapetags = 'html,body,a,b,em,hr,i,u,ul,li,br,div,img,font,p,span,strong,table,tr,td,th,tbody,h1,h2,h3,h4,h5,h6,h7,h8,h9'; + // Add also html5 tags + $noescapetags .= ',header,footer,nav,section,menu,menuitem'; } if ($cleanalsojavascript) { $stringtoescape = dol_string_onlythesehtmltags($stringtoescape, 0, 0, $cleanalsojavascript, 0, array(), 0); @@ -7318,7 +7320,8 @@ function dol_string_onlythesehtmltags($stringtoclean, $cleanalsosomestyles = 1, if (empty($allowed_tags)) { $allowed_tags = array( "html", "head", "meta", "body", "article", "a", "abbr", "b", "blockquote", "br", "cite", "div", "dl", "dd", "dt", "em", "font", "img", "ins", "hr", "i", "li", - "ol", "p", "q", "s", "section", "span", "strike", "strong", "title", "table", "tr", "th", "td", "u", "ul", "sup", "sub", "blockquote", "pre", "h1", "h2", "h3", "h4", "h5", "h6" + "ol", "p", "q", "s", "span", "strike", "strong", "title", "table", "tr", "th", "td", "u", "ul", "sup", "sub", "blockquote", "pre", "h1", "h2", "h3", "h4", "h5", "h6", + "header", "footer", "nav", "section", "menu", "menuitem" // html5 tags ); } $allowed_tags[] = "comment"; // this tags is added to manage comment that are replaced into ... @@ -7605,6 +7608,42 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = ' } } + if (!empty($out) && getDolGlobalString('MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY') && $check != 'restricthtmlallowunvalid') { + try { + // Try cleaning using tidy + if (extension_loaded('tidy') && class_exists("tidy")) { + //print "aaa".$out."\n"; + + // See options at https://tidy.sourceforge.net/docs/quickref.html + $config = array( + 'clean' => false, + 'quote-marks' => false, // do not replace " that are used for real text content (not a string symbol for html attribute) into " + 'doctype' => 'strict', + 'show-body-only' => true, + "indent-attributes" => false, + "vertical-space" => false, + 'ident' => false, + "wrap" => 0 + // HTML5 tags + //'new-blocklevel-tags' => 'article aside audio bdi canvas details dialog figcaption figure footer header hgroup main menu menuitem nav section source summary template track video', + //'new-blocklevel-tags' => 'footer header section menu menuitem' + //'new-empty-tags' => 'command embed keygen source track wbr', + //'new-inline-tags' => 'audio command datalist embed keygen mark menuitem meter output progress source time video wbr', + ); + + // Tidy + $tidy = new tidy(); + $out = $tidy->repairString($out, $config, 'utf8'); + + //print "xxx".$out;exit; + } + } catch (Exception $e) { + // If error, invalid HTML string with no way to clean it + //print $e->getMessage(); + $out = 'InvalidHTMLStringCantBeCleaned'; + } + } + // Clean some html entities that are useless so text is cleaner $out = preg_replace('/&(tab|newline);/i', ' ', $out); diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php index 9ff1b0da4b5..5be5934183d 100644 --- a/test/phpunit/SecurityTest.php +++ b/test/phpunit/SecurityTest.php @@ -176,6 +176,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase $this->assertEquals($tmplangs->defaultlang, 'malicioustextwithquote_MALICIOUSTEXTWITHQUOTE'); } + /** * testSqlAndScriptInjectWithPHPUnit * @@ -379,9 +380,11 @@ class SecurityTest extends PHPUnit\Framework\TestCase // Force default mode $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0; + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 0; $conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 0; $_COOKIE["id"]=111; + $_POST["param0"]='A real string with aaa and " and \' and & inside content'; $_GET["param1"]="222"; $_POST["param1"]="333"; $_GET["param2"]='a/b#e(pr)qq-rr\cc'; @@ -413,20 +416,27 @@ class SecurityTest extends PHPUnit\Framework\TestCase $_POST["param19"]='XSS'; //$_POST["param19"]='XSS'; + + $result=GETPOST('id', 'int'); // Must return nothing print __METHOD__." result=".$result."\n"; - $this->assertEquals($result, ''); + $this->assertEquals('', $result); $result=GETPOST("param1", 'int'); print __METHOD__." result=".$result."\n"; - $this->assertEquals($result, 222, 'Test on param1 with no 3rd param'); + $this->assertEquals(222, $result, 'Test on param1 with no 3rd param'); $result=GETPOST("param1", 'int', 2); print __METHOD__." result=".$result."\n"; - $this->assertEquals($result, 333, 'Test on param1 with 3rd param = 2'); + $this->assertEquals(333, $result, 'Test on param1 with 3rd param = 2'); // Test with alpha + $result=GETPOST("param0", 'alpha'); // a simple format, so " completely removed + $resultexpected = 'A real string with aaa and and \' and & inside content'; + print __METHOD__." result=".$result."\n"; + $this->assertEquals($resultexpected, $result, 'Test on param0'); + $result=GETPOST("param2", 'alpha'); print __METHOD__." result=".$result."\n"; $this->assertEquals($result, $_GET["param2"], 'Test on param2'); @@ -472,7 +482,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase // Test with nohtml $result=GETPOST("param6", 'nohtml'); - print __METHOD__." result=".$result."\n"; + print __METHOD__." result6=".$result."\n"; $this->assertEquals('">', $result); // Test with alpha = alphanohtml. We must convert the html entities like n and disable all entities @@ -525,16 +535,23 @@ class SecurityTest extends PHPUnit\Framework\TestCase print __METHOD__." result=".$result."\n"; $this->assertEquals('n n > < XSS', $result, 'Test that html entities are decoded with alpha'); + // Test with alphawithlgt $result=GETPOST("param11", 'alphawithlgt'); print __METHOD__." result=".$result."\n"; $this->assertEquals(trim($_POST["param11"]), $result, 'Test an email string with alphawithlgt'); + // Test with restricthtml: we must remove html open/close tag and content but not htmlentities (we can decode html entities for ascii chars like n) + $result=GETPOST("param0", 'restricthtml'); + $resultexpected = 'A real string with aaa and " and \' and & inside content'; + print __METHOD__." result=".$result."\n"; + $this->assertEquals($resultexpected, $result, 'Test on param0'); + $result=GETPOST("param6", 'restricthtml'); - print __METHOD__." result param6=".$result."\n"; + print __METHOD__." result for param6=".$result." - before=".$_POST["param6"]."\n"; $this->assertEquals('">', $result); $result=GETPOST("param7", 'restricthtml'); @@ -570,19 +587,83 @@ class SecurityTest extends PHPUnit\Framework\TestCase $this->assertEquals('XSS', $result, 'Test 19'); - // Test with restricthtml + MAIN_RESTRICTHTML_ONLY_VALID_HTML to test disabling of bad atrributes - $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1; + // Test with restricthtml + MAIN_RESTRICTHTML_ONLY_VALID_HTML only to test disabling of bad atrributes + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1; + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 0; + + //$_POST["param0"] = 'A real string with aaa and " inside content'; + $result=GETPOST("param0", 'restricthtml'); + $resultexpected = 'A real string with aaa and " and \' and & inside content'; + print __METHOD__." result for param0=".$result."\n"; + $this->assertEquals($resultexpected, $result, 'Test on param0'); $result=GETPOST("param15", 'restricthtml'); // param15 = src=>0xbeefed that is a dangerous string - print __METHOD__." result=".$result."\n"; - // $this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ... + print __METHOD__." result for param15=".$result."\n"; + //$this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ... //$this->assertEquals(' src=>0xbeefed', $result, 'Test 15b'); // ... on other PHP and libxml versions, we got a HTML that has been cleaned + $result=GETPOST("param6", 'restricthtml'); // param6 = "> + print __METHOD__." result for param6=".$result." - before=".$_POST["param6"]."\n"; + $this->assertEquals('">', $result); + + $result=GETPOST("param7", 'restricthtml'); // param7 = "c:\this is a path~1\aaan &#x110;" abcdef + print __METHOD__." result param7 = ".$result."\n"; + $this->assertEquals('"c:\this is a path~1\aaan 110;" abcdef', $result); + + + // Test with restricthtml + MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY only to test disabling of bad atrributes + + if (extension_loaded('tidy') && class_exists("tidy")) { + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0; + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1; + + $result=GETPOST("param0", 'restricthtml'); + $resultexpected = 'A real string with aaa and " and \' and & inside content'; + print __METHOD__." result for param0=".$result."\n"; + $this->assertEquals($resultexpected, $result, 'Test on param0'); + + $result=GETPOST("param15", 'restricthtml'); // param15 = src=>0xbeefed that is a dangerous string + print __METHOD__." result=".$result."\n"; + + $result=GETPOST("param6", 'restricthtml'); + print __METHOD__." result for param6=".$result." - before=".$_POST["param6"]."\n"; + $this->assertEquals('">', $result); + + $result=GETPOST("param7", 'restricthtml'); + print __METHOD__." result param7 = ".$result."\n"; + $this->assertEquals('"c:\this is a path~1\aaan &#x110;" abcdef', $result); + } + + + // Test with restricthtml + MAIN_RESTRICTHTML_ONLY_VALID_HTML + MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY to test disabling of bad atrributes + + if (extension_loaded('tidy') && class_exists("tidy")) { + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1; + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1; + + $result=GETPOST("param0", 'restricthtml'); + $resultexpected = 'A real string with aaa and " and \' and & inside content'; + print __METHOD__." result for param0=".$result."\n"; + $this->assertEquals($resultexpected, $result, 'Test on param0'); + + $result=GETPOST("param15", 'restricthtml'); // param15 = src=>0xbeefed that is a dangerous string + print __METHOD__." result=".$result."\n"; + + $result=GETPOST("param6", 'restricthtml'); + print __METHOD__." result for param6=".$result." - before=".$_POST["param6"]."\n"; + $this->assertEquals('">', $result); + + $result=GETPOST("param7", 'restricthtml'); + print __METHOD__." result param7 = ".$result."\n"; + $this->assertEquals('"c:\this is a path~1\aaan 110;" abcdef', $result); + } - unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML); // Test with restricthtml + MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES to test disabling of bad atrributes + + unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML); + unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY); $conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1; $result=GETPOST("param15", 'restricthtml'); @@ -664,7 +745,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase /** * testEncodeDecode * - * @return number + * @return int */ public function testEncodeDecode() { @@ -686,7 +767,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase /** * testDolStringOnlyTheseHtmlTags * - * @return number + * @return int */ public function testDolHTMLEntityDecode() { @@ -704,7 +785,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase /** * testDolStringOnlyTheseHtmlTags * - * @return number + * @return int */ public function testDolStringOnlyTheseHtmlTags() { @@ -734,7 +815,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase /** * testDolStringOnlyTheseHtmlAttributes * - * @return number + * @return int */ public function testDolStringOnlyTheseHtmlAttributes() { @@ -753,7 +834,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase /** * testGetRandomPassword * - * @return number + * @return int */ public function testGetRandomPassword() { @@ -804,7 +885,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase /** * testGetRandomPassword * - * @return number + * @return int */ public function testGetURLContent() { @@ -1058,6 +1139,77 @@ class SecurityTest extends PHPUnit\Framework\TestCase } + /** + * testDolPrintHTML. + * This method include calls to dol_htmlwithnojs() + * + * @return int + */ + public function testDolPrintHTML() + { + global $conf; + + // Set options for cleaning data + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1; + // Enabled option MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY if possible + if (extension_loaded('tidy') && class_exists("tidy")) { + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1; + } + $conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1; + + + + // For a string that is already HTML (contains HTML tags) with special tags but badly formated + $stringtotest = "">"; + $stringfixed = "">"; + //$result = dol_htmlentitiesbr($stringtotest); + //$result = dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0); + //$result = dol_htmlwithnojs(dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0)); + //$result = dol_escape_htmltag(dol_htmlwithnojs(dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0)), 1, 1, 'common', 0, 1); + $result = dolPrintHTML($stringtotest); + print __METHOD__." result=".$result."\n"; + $this->assertEquals($stringfixed, $result, 'Error'); // Expected '' because should failed because login 'auto' does not exists + + + // For a string that is already HTML (contains HTML tags) with special tags but badly formated + $stringtotest = "testA\n

hhhh

ddd
aaa
"; + if (getDolGlobalString("MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY")) { + $stringfixed = "testA\n

hhhh

\nddd\n
aaa
\n"; + } else { + $stringfixed = "testA\n

hhhh

ddd
aaa
"; + } + //$result = dol_htmlentitiesbr($stringtotest); + //$result = dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0); + //$result = dol_htmlwithnojs(dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0)); + //$result = dol_escape_htmltag(dol_htmlwithnojs(dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0)), 1, 1, 'common', 0, 1); + $result = dolPrintHTML($stringtotest); + print __METHOD__." result=".$result."\n"; + $this->assertEquals($stringfixed, $result, 'Error'); // Expected '' because should failed because login 'auto' does not exists + + + // For a string that is already HTML (contains HTML tags) but badly formated + $stringtotest = "testB\n

hhh

\ntd alone

iii

"; + if (getDolGlobalString("MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY")) { + $stringfixed = "testB\n

hhh

\n

iii

\n\n\n\n\n
td alone
"; + } else { + $stringfixed = "testB\n

hhh

\ntd alone

iii

"; + } + $result = dolPrintHTML($stringtotest); + print __METHOD__." result=".$result."\n"; + $this->assertEquals($stringfixed, $result, 'Error'); // Expected '' because should failed because login 'auto' does not exists + + + // For a string with no HTML tags + $stringtotest = "testC\ntest"; + $stringfixed = "testC
\ntest"; + $result = dolPrintHTML($stringtotest); + print __METHOD__." result=".$result."\n"; + $this->assertEquals($stringfixed, $result, 'Error'); // Expected '' because should failed because login 'auto' does not exists + + return 0; + } + + /** * testCheckLoginPassEntity *