From c0a0acf1290ef20974a3f7c0e4d02aa0fc8f5cb5 Mon Sep 17 00:00:00 2001
From: ldestailleur
Date: Tue, 15 Jul 2025 21:51:50 +0200
Subject: [PATCH] FIX #34746
---
htdocs/core/lib/functions.lib.php | 37 ++++++++++++++++++++++---------
test/phpunit/.gitignore | 1 +
test/phpunit/SecurityTest.php | 27 +++++++++++++++++++++-
3 files changed, 54 insertions(+), 11 deletions(-)
create mode 100644 test/phpunit/.gitignore
diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php
index a0da95da416..51d9f061f30 100644
--- a/htdocs/core/lib/functions.lib.php
+++ b/htdocs/core/lib/functions.lib.php
@@ -8530,14 +8530,21 @@ function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes
{
if (is_null($allowed_attributes)) {
$allowed_attributes = array(
- "allow", "allowfullscreen", "alt", "async", "class", "content", "contenteditable", "crossorigin", "data-html", "frameborder", "height", "href", "id", "name", "property", "rel", "src", "style", "target", "title", "type", "width",
+ "allow", "allowfullscreen", "alt", "async", "class", "contenteditable", "crossorigin", "data-html", "frameborder", "height", "href", "id", "name", "property", "rel", "src", "style", "target", "title", "type", "width",
// HTML5
"header", "footer", "nav", "section", "menu", "menuitem"
);
}
+ // Always add content and http-equiv for meta tags, required to force encoding and keep html content in utf8 by load/saveHTML functions.
+ if (!in_array("content", $allowed_attributes)) {
+ $allowed_attributes[] = "content";
+ }
+ if (!in_array("http-equiv", $allowed_attributes)) {
+ $allowed_attributes[] = "http-equiv";
+ }
if (class_exists('DOMDocument') && !empty($stringtoclean)) {
- $stringtoclean = ''.$stringtoclean.'';
+ $stringtoclean = ''.$stringtoclean.'';
// Warning: loadHTML does not support HTML5 on old libxml versions.
$dom = new DOMDocument('', 'UTF-8');
@@ -8588,12 +8595,15 @@ function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes
}
}
+ $dom->encoding = 'UTF-8';
+
$return = $dom->saveHTML(); // This may add a LF at end of lines, so we will trim later
//$return = 'aaaa
bbssdd
'."\naaa
aabb
";
$return = preg_replace('/^'.preg_quote('', '/').'/', '', $return);
- $return = preg_replace('/^'.preg_quote('', '/').'/', '', $return);
- $return = preg_replace('/'.preg_quote('', '/').'$/', '', $return);
+ $return = preg_replace('/^'.preg_quote('<', '/').'[^<>]*'.preg_quote('>', '/').'/', '', $return);
+ $return = preg_replace('/'.preg_quote('', '/').'$/', '', trim($return));
+
return trim($return);
} else {
return $stringtoclean;
@@ -8765,17 +8775,24 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
// like 'abc' that wrongly ends up, without the trick, with 'abc
'
if (dol_textishtml($out)) {
- $out = ''.$out.'
';
+ $out = ''.$out.'
';
} else {
- $out = ''.dol_nl2br($out).'
';
+ $out = ''.dol_nl2br($out).'
';
}
$dom->loadHTML($out, LIBXML_HTML_NODEFDTD | LIBXML_ERR_NONE | LIBXML_HTML_NOIMPLIED | LIBXML_NONET | LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NOXMLDECL);
+
+ $dom->encoding = 'UTF-8';
+
$out = trim($dom->saveHTML());
- // Remove the trick added to solve pb with text without parent tag
- $out = preg_replace('/^<\?xml encoding="UTF-8">/', '', $out);
- $out = preg_replace('/<\/div>$/', '', $out);
+ // Remove the trick added to solve pb with text in utf8 and text without parent tag
+ $out = preg_replace('/^'.preg_quote('', '/').'/', '', $out);
+ $out = preg_replace('/^'.preg_quote('<', '/').'[^<>]+'.preg_quote('>
', '/').'/', '', $out);
+ $out = preg_replace('/'.preg_quote('
', '/').'$/', '', trim($out));
+ // $out = preg_replace('/^<\?xml encoding="UTF-8">
/', '', $out);
+ // $out = preg_replace('/<\/div>$/', '', $out);
+ // var_dump('rrrrrrrrrrrrrrrrrrrrrrrrrrrrr'.$out);
} catch (Exception $e) {
// If error, invalid HTML string with no way to clean it
//print $e->getMessage();
@@ -8889,7 +8906,7 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
$out = preg_replace('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', '', $out);
} while ($oldstringtoclean != $out);
- // Check the limit of external links that are automatically executed in a Rich text content. We count:
+ // Check the limit of external links that are automatically executed in a Rich text content. We count:
// '

, we can only accept "

diff --git a/test/phpunit/.gitignore b/test/phpunit/.gitignore
new file mode 100644
index 00000000000..647a82afb84
--- /dev/null
+++ b/test/phpunit/.gitignore
@@ -0,0 +1 @@
+/DemoTest.php
diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php
index 3fdef280fed..1d34886db18 100644
--- a/test/phpunit/SecurityTest.php
+++ b/test/phpunit/SecurityTest.php
@@ -387,7 +387,8 @@ class SecurityTest extends CommonClassTest
{
$stringtotest = 'eée';
$decodedstring = dol_string_onlythesehtmlattributes($stringtotest);
- $this->assertEquals('eée', $decodedstring, 'Function did not sanitize correctly with test 1');
+ //$this->assertEquals('eée', $decodedstring, 'Function did not sanitize correctly with test 1');
+ $this->assertEquals('eée', $decodedstring, 'Function did not sanitize correctly with test 1');
$stringtotest = '
';
$decodedstring = dol_string_onlythesehtmlattributes($stringtotest);
@@ -1296,6 +1297,30 @@ class SecurityTest extends CommonClassTest
{
global $conf;
+ // Test on a string in hindi with MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES because
+ // in past this case was losing the UTF8.
+ $conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 0;
+
+ $result = dol_htmlwithnojs('String in Hindi लेखाकर्म', 0, 'restricthtml');
+ print __METHOD__." result=".$result."\n";
+ $this->assertEquals('String in Hindi लेखाकर्म', $result, 'Test js sanitizing a Hindi string is ko');
+
+ $conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1;
+
+ $result = dol_htmlwithnojs('String in Hindi लेखाकर्म', 0, 'restricthtml');
+ print __METHOD__." result=".$result."\n";
+ $this->assertEquals('String in Hindi लेखाकर्म', $result, 'Test js sanitizing a Hindi string is ko');
+
+ $conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1;
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
+
+ $result = dol_htmlwithnojs('String in Hindi लेखाकर्म', 0, 'restricthtml');
+ print __METHOD__." result=".$result."\n";
+ $this->assertEquals('String in Hindi लेखाकर्म', $result, 'Test js sanitizing a Hindi string is ko');
+
+
+
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 0;
// If we set this to 1, it will also convert emoticon in htmlentities, so tests must be modified.