This commit is contained in:
ldestailleur
2025-07-15 21:51:50 +02:00
parent a145b3b44d
commit c0a0acf129
3 changed files with 54 additions and 11 deletions

View File

@@ -8530,14 +8530,21 @@ function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes
{
if (is_null($allowed_attributes)) {
$allowed_attributes = array(
"allow", "allowfullscreen", "alt", "async", "class", "content", "contenteditable", "crossorigin", "data-html", "frameborder", "height", "href", "id", "name", "property", "rel", "src", "style", "target", "title", "type", "width",
"allow", "allowfullscreen", "alt", "async", "class", "contenteditable", "crossorigin", "data-html", "frameborder", "height", "href", "id", "name", "property", "rel", "src", "style", "target", "title", "type", "width",
// HTML5
"header", "footer", "nav", "section", "menu", "menuitem"
);
}
// Always add content and http-equiv for meta tags, required to force encoding and keep html content in utf8 by load/saveHTML functions.
if (!in_array("content", $allowed_attributes)) {
$allowed_attributes[] = "content";
}
if (!in_array("http-equiv", $allowed_attributes)) {
$allowed_attributes[] = "http-equiv";
}
if (class_exists('DOMDocument') && !empty($stringtoclean)) {
$stringtoclean = '<?xml encoding="UTF-8"><html><body>'.$stringtoclean.'</body></html>';
$stringtoclean = '<?xml encoding="UTF-8"><html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>'.$stringtoclean.'</body></html>';
// Warning: loadHTML does not support HTML5 on old libxml versions.
$dom = new DOMDocument('', 'UTF-8');
@@ -8588,12 +8595,15 @@ function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes
}
}
$dom->encoding = 'UTF-8';
$return = $dom->saveHTML(); // This may add a LF at end of lines, so we will trim later
//$return = '<html><body>aaaa</p>bb<p>ssdd</p>'."\n<p>aaa</p>aa<p>bb</p>";
$return = preg_replace('/^'.preg_quote('<?xml encoding="UTF-8">', '/').'/', '', $return);
$return = preg_replace('/^'.preg_quote('<html><body>', '/').'/', '', $return);
$return = preg_replace('/'.preg_quote('</body></html>', '/').'$/', '', $return);
$return = preg_replace('/^'.preg_quote('<html><head><', '/').'[^<>]*'.preg_quote('></head><body>', '/').'/', '', $return);
$return = preg_replace('/'.preg_quote('</body></html>', '/').'$/', '', trim($return));
return trim($return);
} else {
return $stringtoclean;
@@ -8765,17 +8775,24 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
// like 'abc' that wrongly ends up, without the trick, with '<p>abc</p>'
if (dol_textishtml($out)) {
$out = '<?xml encoding="UTF-8"><div class="tricktoremove">'.$out.'</div>';
$out = '<?xml encoding="UTF-8"><html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body><div class="tricktoremove">'.$out.'</div></body></html>';
} else {
$out = '<?xml encoding="UTF-8"><div class="tricktoremove">'.dol_nl2br($out).'</div>';
$out = '<?xml encoding="UTF-8"><html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body><div class="tricktoremove">'.dol_nl2br($out).'</div></body></html>';
}
$dom->loadHTML($out, LIBXML_HTML_NODEFDTD | LIBXML_ERR_NONE | LIBXML_HTML_NOIMPLIED | LIBXML_NONET | LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NOXMLDECL);
$dom->encoding = 'UTF-8';
$out = trim($dom->saveHTML());
// Remove the trick added to solve pb with text without parent tag
$out = preg_replace('/^<\?xml encoding="UTF-8"><div class="tricktoremove">/', '', $out);
$out = preg_replace('/<\/div>$/', '', $out);
// Remove the trick added to solve pb with text in utf8 and text without parent tag
$out = preg_replace('/^'.preg_quote('<?xml encoding="UTF-8">', '/').'/', '', $out);
$out = preg_replace('/^'.preg_quote('<html><head><', '/').'[^<>]+'.preg_quote('></head><body><div class="tricktoremove">', '/').'/', '', $out);
$out = preg_replace('/'.preg_quote('</div></body></html>', '/').'$/', '', trim($out));
// $out = preg_replace('/^<\?xml encoding="UTF-8"><div class="tricktoremove">/', '', $out);
// $out = preg_replace('/<\/div>$/', '', $out);
// var_dump('rrrrrrrrrrrrrrrrrrrrrrrrrrrrr'.$out);
} catch (Exception $e) {
// If error, invalid HTML string with no way to clean it
//print $e->getMessage();
@@ -8889,7 +8906,7 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
$out = preg_replace('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', '', $out);
} while ($oldstringtoclean != $out);
// Check the limit of external links that are automatically executed in a Rich text content. We count:
// Check the limit of external links that are automatically executed in a Rich text content. We count:
// '<img' to avoid <img src="http...">, we can only accept "<img src="data:..."
// 'url(' to avoid inline style like background: url(http...
// '<link' to avoid <link href="http...">