diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php index ed6a0165d93..41387eacc26 100644 --- a/htdocs/core/lib/functions.lib.php +++ b/htdocs/core/lib/functions.lib.php @@ -2191,52 +2191,47 @@ function dol_escape_htmltag($stringtoescape, $keepb = 0, $keepn = 0, $noescapeta } if (count($tmparrayoftags)) { + // Now we will protect tags (defined into $tmparrayoftags) that we want to keep untouched + $reg = array(); - $tmp = str_ireplace('__DOUBLEQUOTE', '', $tmp); // The keyword DOUBLEQUOTE is forbidden. Reserved, so we removed it if we find it. + // Remove reserved keywords. They are forbidden in a source string + $tmp = str_ireplace(array('__DOUBLEQUOTE', '__BEGINTAGTOREPLACE', '__ENDTAGTOREPLACE', '__BEGINENDTAGTOREPLACE'), '', $tmp); foreach ($tmparrayoftags as $tagtoreplace) { + // For case of tag without attributes '', '', '', we protect them to avoid transformation by htmlentities() later $tmp = preg_replace('/<'.preg_quote($tagtoreplace, '/').'>/', '__BEGINTAGTOREPLACE'.$tagtoreplace.'__', $tmp); $tmp = str_ireplace('', '__ENDTAGTOREPLACE'.$tagtoreplace.'__', $tmp); $tmp = preg_replace('/<'.preg_quote($tagtoreplace, '/').' \/>/', '__BEGINENDTAGTOREPLACE'.$tagtoreplace.'__', $tmp); - // For case of tag with attribute + // For case of tag with attributes do { $tmpold = $tmp; - if (preg_match('/<'.preg_quote($tagtoreplace, '/').'\s+([^>]+)>/', $tmp, $reg)) { - $tmpattributes = str_ireplace(array('[', ']'), '_', $reg[1]); // We must never have [ ] inside the attribute string - $tmpattributes = str_ireplace('href="http:', '__HREFHTTPA', $tmpattributes); - $tmpattributes = str_ireplace('href="https:', '__HREFHTTPSA', $tmpattributes); - $tmpattributes = str_ireplace('src="http:', '__SRCHTTPIMG', $tmpattributes); - $tmpattributes = str_ireplace('src="https:', '__SRCHTTPSIMG', $tmpattributes); + if (preg_match('/<'.preg_quote($tagtoreplace, '/').'(\s+)([^>]+)>/', $tmp, $reg)) { + // We want to protect the attribute part ... in '' to avoid transformation by htmlentities() later + $tmpattributes = str_ireplace(array('[', ']'), '_', $reg[2]); // We must never have [ ] inside the attribute string $tmpattributes = str_ireplace('"', '__DOUBLEQUOTE', $tmpattributes); - $tmpattributes = preg_replace('/[^a-z0-9_\/\?\;\s=&\.\-@:\.#\+]/i', '', $tmpattributes); + $tmpattributes = preg_replace('/[^a-z0-9_%,\/\?\;\s=&\.\-@:\.#\+]/i', '', $tmpattributes); //$tmpattributes = preg_replace("/float:\s*(left|right)/", "", $tmpattributes); // Disabled: we must not remove content - $tmp = preg_replace('/<'.preg_quote($tagtoreplace, '/').'\s+'.preg_quote($reg[1], '/').'>/', '__BEGINTAGTOREPLACE'.$tagtoreplace.'['.$tmpattributes.']__', $tmp); - } - if (preg_match('/<'.preg_quote($tagtoreplace, '/').'\s+([^>]+)\s+\/>/', $tmp, $reg)) { - $tmpattributes = str_ireplace(array('[', ']'), '_', $reg[1]); // We must not have [ ] inside the attribute string - $tmpattributes = str_ireplace('"', '__DOUBLEQUOTE', $tmpattributes); - $tmpattributes = preg_replace('/[^a-z0-9_\/\?\;\s=&\.\-@:\.#\+]/i', '', $tmpattributes); - //$tmpattributes = preg_replace("/float:\s*(left|right)/", "", $tmpattributes); // Disabled: we must not remove content. - $tmp = preg_replace('/<'.preg_quote($tagtoreplace, '/').'\s+'.preg_quote($reg[1], '/').'\s+\/>/', '__BEGINENDTAGTOREPLACE'.$tagtoreplace.'['.$tmpattributes.']__', $tmp); + $tmp = str_replace('<'.$tagtoreplace.$reg[1].$reg[2].'>', '__BEGINTAGTOREPLACE'.$tagtoreplace.'['.$tmpattributes.']__', $tmp); } $diff = strcmp($tmpold, $tmp); } while ($diff); } - $tmp = str_ireplace('"', '__DOUBLEQUOT', $tmp); + $tmp = str_ireplace('"', '__DOUBLEQUOTE', $tmp); $tmp = str_ireplace('<', '__LESSTAN', $tmp); $tmp = str_ireplace('>', '__GREATERTHAN', $tmp); } - // Warning: htmlentities encode HTML tags like , but not < > "es; ' ' & that remains untouched. - $result = htmlentities($tmp, ENT_COMPAT, 'UTF-8'); // Convert & into & and more... + // Warning: htmlentities encode HTML tags like & into & and more (but not < > "es; ' ' & that remains untouched). + $result = htmlentities($tmp, ENT_COMPAT, 'UTF-8'); //print $result; if (count($tmparrayoftags)) { + // Restore protected tags foreach ($tmparrayoftags as $tagtoreplace) { $result = str_ireplace('__BEGINTAGTOREPLACE'.$tagtoreplace.'__', '<'.$tagtoreplace.'>', $result); $result = preg_replace('/__BEGINTAGTOREPLACE'.$tagtoreplace.'\[([^\]]*)\]__/', '<'.$tagtoreplace.' \1>', $result); @@ -2245,19 +2240,13 @@ function dol_escape_htmltag($stringtoescape, $keepb = 0, $keepn = 0, $noescapeta $result = preg_replace('/__BEGINENDTAGTOREPLACE'.$tagtoreplace.'\[([^\]]*)\]__/', '<'.$tagtoreplace.' \1 />', $result); } - $result = str_ireplace('__HREFHTTPA', 'href="http:', $result); - $result = str_ireplace('__HREFHTTPSA', 'href="https:', $result); - $result = str_ireplace('__SRCHTTPIMG', 'src="http:', $result); - $result = str_ireplace('__SRCHTTPSIMG', 'src="https:', $result); $result = str_ireplace('__DOUBLEQUOTE', '"', $result); + $result = str_ireplace('__LESSTAN', '<', $result); + $result = str_ireplace('__GREATERTHAN', '>', $result); } $result = str_ireplace('__SIMPLEQUOTE', ''', $result); - $result = str_ireplace('__DOUBLEQUOT', '"', $result); - $result = str_ireplace('__LESSTAN', '<', $result); - $result = str_ireplace('__GREATERTHAN', '>', $result); - //$result="\n\n\n".var_export($tmp, true)."\n\n\n".var_export($result, true); return $result; diff --git a/test/phpunit/FunctionsLibTest.php b/test/phpunit/FunctionsLibTest.php index 9770cb09c46..bf5b02a69ee 100644 --- a/test/phpunit/FunctionsLibTest.php +++ b/test/phpunit/FunctionsLibTest.php @@ -1133,6 +1133,18 @@ class FunctionsLibTest extends CommonClassTest $result = dol_escape_htmltag($input, 1); $this->assertEquals('x&<b>#</b>,"', $result); + $input = ''; // & and " are converted into html entities, are not removed + $result = dol_escape_htmltag($input, 1, 1, 'common', 0, 1); + $this->assertEquals('', $result); + + $input = ''; // & and " are converted into html entities, are not removed + $result = dol_escape_htmltag($input, 1, 1, 'common'); + $this->assertEquals('', $result); + + $input = ''; // & and " are converted into html entities, are not removed + $result = dol_escape_htmltag($input, 1); + $this->assertEquals('<img src="data:image/png;base64, 123/456+789==" style="height: 123px; width:456px">', $result); + $input = ''; // & and " are converted into html entities, are not removed $result = dol_escape_htmltag($input, 1, 1, 'common', 0, 1); $this->assertEquals('', $result);