This commit is contained in:
ldestailleur
2025-07-15 21:51:50 +02:00
parent a145b3b44d
commit c0a0acf129
3 changed files with 54 additions and 11 deletions

View File

@@ -8530,14 +8530,21 @@ function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes
{ {
if (is_null($allowed_attributes)) { if (is_null($allowed_attributes)) {
$allowed_attributes = array( $allowed_attributes = array(
"allow", "allowfullscreen", "alt", "async", "class", "content", "contenteditable", "crossorigin", "data-html", "frameborder", "height", "href", "id", "name", "property", "rel", "src", "style", "target", "title", "type", "width", "allow", "allowfullscreen", "alt", "async", "class", "contenteditable", "crossorigin", "data-html", "frameborder", "height", "href", "id", "name", "property", "rel", "src", "style", "target", "title", "type", "width",
// HTML5 // HTML5
"header", "footer", "nav", "section", "menu", "menuitem" "header", "footer", "nav", "section", "menu", "menuitem"
); );
} }
// Always add content and http-equiv for meta tags, required to force encoding and keep html content in utf8 by load/saveHTML functions.
if (!in_array("content", $allowed_attributes)) {
$allowed_attributes[] = "content";
}
if (!in_array("http-equiv", $allowed_attributes)) {
$allowed_attributes[] = "http-equiv";
}
if (class_exists('DOMDocument') && !empty($stringtoclean)) { if (class_exists('DOMDocument') && !empty($stringtoclean)) {
$stringtoclean = '<?xml encoding="UTF-8"><html><body>'.$stringtoclean.'</body></html>'; $stringtoclean = '<?xml encoding="UTF-8"><html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>'.$stringtoclean.'</body></html>';
// Warning: loadHTML does not support HTML5 on old libxml versions. // Warning: loadHTML does not support HTML5 on old libxml versions.
$dom = new DOMDocument('', 'UTF-8'); $dom = new DOMDocument('', 'UTF-8');
@@ -8588,12 +8595,15 @@ function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes
} }
} }
$dom->encoding = 'UTF-8';
$return = $dom->saveHTML(); // This may add a LF at end of lines, so we will trim later $return = $dom->saveHTML(); // This may add a LF at end of lines, so we will trim later
//$return = '<html><body>aaaa</p>bb<p>ssdd</p>'."\n<p>aaa</p>aa<p>bb</p>"; //$return = '<html><body>aaaa</p>bb<p>ssdd</p>'."\n<p>aaa</p>aa<p>bb</p>";
$return = preg_replace('/^'.preg_quote('<?xml encoding="UTF-8">', '/').'/', '', $return); $return = preg_replace('/^'.preg_quote('<?xml encoding="UTF-8">', '/').'/', '', $return);
$return = preg_replace('/^'.preg_quote('<html><body>', '/').'/', '', $return); $return = preg_replace('/^'.preg_quote('<html><head><', '/').'[^<>]*'.preg_quote('></head><body>', '/').'/', '', $return);
$return = preg_replace('/'.preg_quote('</body></html>', '/').'$/', '', $return); $return = preg_replace('/'.preg_quote('</body></html>', '/').'$/', '', trim($return));
return trim($return); return trim($return);
} else { } else {
return $stringtoclean; return $stringtoclean;
@@ -8765,17 +8775,24 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
// like 'abc' that wrongly ends up, without the trick, with '<p>abc</p>' // like 'abc' that wrongly ends up, without the trick, with '<p>abc</p>'
if (dol_textishtml($out)) { if (dol_textishtml($out)) {
$out = '<?xml encoding="UTF-8"><div class="tricktoremove">'.$out.'</div>'; $out = '<?xml encoding="UTF-8"><html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body><div class="tricktoremove">'.$out.'</div></body></html>';
} else { } else {
$out = '<?xml encoding="UTF-8"><div class="tricktoremove">'.dol_nl2br($out).'</div>'; $out = '<?xml encoding="UTF-8"><html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body><div class="tricktoremove">'.dol_nl2br($out).'</div></body></html>';
} }
$dom->loadHTML($out, LIBXML_HTML_NODEFDTD | LIBXML_ERR_NONE | LIBXML_HTML_NOIMPLIED | LIBXML_NONET | LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NOXMLDECL); $dom->loadHTML($out, LIBXML_HTML_NODEFDTD | LIBXML_ERR_NONE | LIBXML_HTML_NOIMPLIED | LIBXML_NONET | LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NOXMLDECL);
$dom->encoding = 'UTF-8';
$out = trim($dom->saveHTML()); $out = trim($dom->saveHTML());
// Remove the trick added to solve pb with text without parent tag // Remove the trick added to solve pb with text in utf8 and text without parent tag
$out = preg_replace('/^<\?xml encoding="UTF-8"><div class="tricktoremove">/', '', $out); $out = preg_replace('/^'.preg_quote('<?xml encoding="UTF-8">', '/').'/', '', $out);
$out = preg_replace('/<\/div>$/', '', $out); $out = preg_replace('/^'.preg_quote('<html><head><', '/').'[^<>]+'.preg_quote('></head><body><div class="tricktoremove">', '/').'/', '', $out);
$out = preg_replace('/'.preg_quote('</div></body></html>', '/').'$/', '', trim($out));
// $out = preg_replace('/^<\?xml encoding="UTF-8"><div class="tricktoremove">/', '', $out);
// $out = preg_replace('/<\/div>$/', '', $out);
// var_dump('rrrrrrrrrrrrrrrrrrrrrrrrrrrrr'.$out);
} catch (Exception $e) { } catch (Exception $e) {
// If error, invalid HTML string with no way to clean it // If error, invalid HTML string with no way to clean it
//print $e->getMessage(); //print $e->getMessage();
@@ -8889,7 +8906,7 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
$out = preg_replace('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', '', $out); $out = preg_replace('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', '', $out);
} while ($oldstringtoclean != $out); } while ($oldstringtoclean != $out);
// Check the limit of external links that are automatically executed in a Rich text content. We count: // Check the limit of external links that are automatically executed in a Rich text content. We count:
// '<img' to avoid <img src="http...">, we can only accept "<img src="data:..." // '<img' to avoid <img src="http...">, we can only accept "<img src="data:..."
// 'url(' to avoid inline style like background: url(http... // 'url(' to avoid inline style like background: url(http...
// '<link' to avoid <link href="http..."> // '<link' to avoid <link href="http...">

1
test/phpunit/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/DemoTest.php

View File

@@ -387,7 +387,8 @@ class SecurityTest extends CommonClassTest
{ {
$stringtotest = 'eée'; $stringtotest = 'eée';
$decodedstring = dol_string_onlythesehtmlattributes($stringtotest); $decodedstring = dol_string_onlythesehtmlattributes($stringtotest);
$this->assertEquals('e&eacute;e', $decodedstring, 'Function did not sanitize correctly with test 1'); //$this->assertEquals('e&eacute;e', $decodedstring, 'Function did not sanitize correctly with test 1');
$this->assertEquals('eée', $decodedstring, 'Function did not sanitize correctly with test 1');
$stringtotest = '<div onload="ee"><a href="123"><span class="abc">abc</span></a></div>'; $stringtotest = '<div onload="ee"><a href="123"><span class="abc">abc</span></a></div>';
$decodedstring = dol_string_onlythesehtmlattributes($stringtotest); $decodedstring = dol_string_onlythesehtmlattributes($stringtotest);
@@ -1296,6 +1297,30 @@ class SecurityTest extends CommonClassTest
{ {
global $conf; global $conf;
// Test on a string in hindi with MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES because
// in past this case was losing the UTF8.
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 0;
$result = dol_htmlwithnojs('String in Hindi लेखाकर्म', 0, 'restricthtml');
print __METHOD__." result=".$result."\n";
$this->assertEquals('String in Hindi लेखाकर्म', $result, 'Test js sanitizing a Hindi string is ko');
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1;
$result = dol_htmlwithnojs('String in Hindi लेखाकर्म', 0, 'restricthtml');
print __METHOD__." result=".$result."\n";
$this->assertEquals('String in Hindi लेखाकर्म', $result, 'Test js sanitizing a Hindi string is ko');
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
$result = dol_htmlwithnojs('String in Hindi लेखाकर्म', 0, 'restricthtml');
print __METHOD__." result=".$result."\n";
$this->assertEquals('String in Hindi लेखाकर्म', $result, 'Test js sanitizing a Hindi string is ko');
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 0; $conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 0;
// If we set this to 1, it will also convert emoticon in htmlentities, so tests must be modified. // If we set this to 1, it will also convert emoticon in htmlentities, so tests must be modified.