Fix: Do not transform [__XXX__] string when

MAIN_RESTRICTHTML_ONLY_VALID_HTML is on.
This commit is contained in:
Laurent Destailleur
2025-12-01 13:54:27 +01:00
parent ebacc05fe3
commit 41fc03c63b
3 changed files with 35 additions and 7 deletions

View File

@@ -9392,19 +9392,41 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
//$out = '<html><head><meta charset="utf-8"></head><body><div class="tricktoremove">'.dol_nl2br($out).'</div></body></html>';
}
// Note: <a href="https://[__aaa__]/aaa.html"> is transformed into <a href="https://[__aaa__]/aaa.html">
// We don't want that, so we protect [__xxx__] by replacing [ and ] before loadHTML and restore them after saveHTML
$out = preg_replace_callback(
'/\[__([0-9a-zA-Z_]+)__\]/',
/**
* @param array<int,string> $m Array of matches
* @return string Translated string for the key
*/
function ($m) {
return 'BRACKETSTART__' . $m[1] . '__BRACKETEND'; },
$out);
$dom->loadHTML($out, LIBXML_HTML_NODEFDTD | LIBXML_ERR_NONE | LIBXML_HTML_NOIMPLIED | LIBXML_NONET | LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NOXMLDECL);
$dom->encoding = 'UTF-8';
$out = trim($dom->saveHTML());
// Restore [ and ] that were protected before loadHTML
$out = preg_replace_callback(
'/BRACKETSTART__([0-9a-zA-Z_]+)__BRACKETEND/',
/**
* @param array<int,string> $m Array of matches
* @return string Translated string for the key
*/
function ($m) {
return '[__' . $m[1] . '__]'; },
$out);
// Remove the trick added to solve pb with text in utf8 and text without parent tag
//$out = preg_replace('/^'.preg_quote('<?xml encoding="UTF-8">', '/').'/', '', $out);
$out = preg_replace('/^' . preg_quote('<html><head><', '/') . '[^<>]+' . preg_quote('></head><body><div class="tricktoremove">', '/') . '/', '', $out);
$out = preg_replace('/' . preg_quote('</div></body></html>', '/') . '$/', '', trim($out));
// $out = preg_replace('/^<\?xml encoding="UTF-8"><div class="tricktoremove">/', '', $out);
// $out = preg_replace('/<\/div>$/', '', $out);
// var_dump('rrrrrrrrrrrrrrrrrrrrrrrrrrrrr'.$out);
//$out = preg_replace('/^<\?xml encoding="UTF-8"><div class="tricktoremove">/', '', $out);
//$out = preg_replace('/<\/div>$/', '', $out);
if (!$outishtml) { // If $out was not HTML content we made before a dol_nl2br so we must do the opposite operation now
$out = str_replace('<br>', '', $out);