diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php index 3f4c27799cd..77ac6af5d58 100644 --- a/htdocs/core/lib/functions.lib.php +++ b/htdocs/core/lib/functions.lib.php @@ -908,65 +908,7 @@ function sanitizeVal($out = '', $check = 'alphanohtml', $filter = null, $options break; case 'restricthtml': // Recommended for most html textarea case 'restricthtmlallowunvalid': - do { - $oldstringtoclean = $out; - - if (!empty($out) && !empty($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML) && $check != 'restricthtmlallowunvalid') { - try { - $dom = new DOMDocument; - // Add a trick to solve pb with text without parent tag - // like '

Foo

bar

' that wrongly ends up without the trick into '

Foo

bar

' - // like 'abc' that wrongly ends up without the tric into with '

abc

' - $out = '
'.$out.'
'; - - $dom->loadHTML($out, LIBXML_ERR_NONE|LIBXML_HTML_NOIMPLIED|LIBXML_HTML_NODEFDTD|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOXMLDECL); - $out = trim($dom->saveHTML()); - - // Remove the trick added to solve pb with text without parent tag - $out = preg_replace('/^
/', '', $out); - $out = preg_replace('/<\/div>$/', '', $out); - var_dump('xxx'); - var_dump($out); - } catch (Exception $e) { - //print $e->getMessage(); - return 'InvalidHTMLString'; - } - } - - // Ckeditor use the numeric entitic for apostrophe so we force it to text entity (all other special chars are - // encoded using text entities) so we can then exclude all numeric entities. - $out = preg_replace('/'/i', ''', $out); - - // We replace chars from a/A to z/Z encoded with numeric HTML entities with the real char so we won't loose the chars at the next step (preg_replace). - // No need to use a loop here, this step is not to sanitize (this is done at next step, this is to try to save chars, even if they are - // using a non coventionnel way to be encoded, to not have them sanitized just after) - //$out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+;?)/i', 'realCharForNumericEntities', $out); - $out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+;?)/i', function ($m) { - return realCharForNumericEntities($m); }, $out); - - - // Now we remove all remaining HTML entities starting with a number. We don't want such entities. - $out = preg_replace('/&#x?[0-9]+/i', '', $out); // For example if we have javascript with an entities without the ; to hide the 'a' of 'javascript'. - - $out = dol_string_onlythesehtmltags($out, 0, 1, 1); - - // We should also exclude non expected HTML attributes and clean content of some attributes. - if (!empty($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES)) { - // Warning, the function may add a LF so we are forced to trim to compare with old $out without having always a difference and an infinit loop. - $out = dol_string_onlythesehtmlattributes($out); - } - - // Restore entity ' into ' (restricthtml is for html content so we can use html entity) - $out = preg_replace('/'/i', "'", $out); - } while ($oldstringtoclean != $out); - - // Check the limit of external links in a Rich text content. We count ' getDolGlobalInt("MAIN_SECURITY_MAX_IMG_IN_HTML_CONTENT", 1000)) { - return 'TooManyLinksIntoHTMLString'; - } - + $out = dol_htmlwithnojs($out, 1); break; case 'custom': if (!empty($out)) { @@ -7151,6 +7093,87 @@ function dol_nl2br($stringtoencode, $nl2brmode = 0, $forxml = false) } } +/** + * Sanitize a HTML to remove js and dangerous content + * + * @param string $stringtoencode String to encode + * @param int $nouseofiframesandbox Allow use of option MAIN_SECURITY_USE_SANDBOX_FOR_HTMLWITHNOJS for html sanitizing + * @return string HTML sanitized + */ +function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0) +{ + global $conf; + + if (empty($nouseofiframesandbox) && !empty($conf->global->MAIN_SECURITY_USE_SANDBOX_FOR_HTMLWITHNOJS)) { + // TODO using sandbox on inline html content is not possible yet with current browsers + //$s = ''; + return $stringtoencode; + } else { + $out = $stringtoencode; + + do { + $oldstringtoclean = $out; + + if (!empty($out) && !empty($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML) && $check != 'restricthtmlallowunvalid') { + try { + $dom = new DOMDocument; + // Add a trick to solve pb with text without parent tag + // like '

Foo

bar

' that wrongly ends up without the trick into '

Foo

bar

' + // like 'abc' that wrongly ends up without the tric into with '

abc

' + $out = '
'.$out.'
'; + + $dom->loadHTML($out, LIBXML_ERR_NONE|LIBXML_HTML_NOIMPLIED|LIBXML_HTML_NODEFDTD|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOXMLDECL); + $out = trim($dom->saveHTML()); + + // Remove the trick added to solve pb with text without parent tag + $out = preg_replace('/^
/', '', $out); + $out = preg_replace('/<\/div>$/', '', $out); + } catch (Exception $e) { + // If error, invalid HTML string with no way to clean it + //print $e->getMessage(); + $out = 'InvalidHTMLString'; + } + } + + // Ckeditor use the numeric entitic for apostrophe so we force it to text entity (all other special chars are + // encoded using text entities) so we can then exclude all numeric entities. + $out = preg_replace('/'/i', ''', $out); + + // We replace chars from a/A to z/Z encoded with numeric HTML entities with the real char so we won't loose the chars at the next step (preg_replace). + // No need to use a loop here, this step is not to sanitize (this is done at next step, this is to try to save chars, even if they are + // using a non coventionnel way to be encoded, to not have them sanitized just after) + //$out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+;?)/i', 'realCharForNumericEntities', $out); + $out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+;?)/i', function ($m) { + return realCharForNumericEntities($m); }, $out); + + + // Now we remove all remaining HTML entities starting with a number. We don't want such entities. + $out = preg_replace('/&#x?[0-9]+/i', '', $out); // For example if we have javascript with an entities without the ; to hide the 'a' of 'javascript'. + + $out = dol_string_onlythesehtmltags($out, 0, 1, 1); + + // We should also exclude non expected HTML attributes and clean content of some attributes. + if (!empty($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES)) { + // Warning, the function may add a LF so we are forced to trim to compare with old $out without having always a difference and an infinit loop. + $out = dol_string_onlythesehtmlattributes($out); + } + + // Restore entity ' into ' (restricthtml is for html content so we can use html entity) + $out = preg_replace('/'/i', "'", $out); + } while ($oldstringtoclean != $out); + + // Check the limit of external links in a Rich text content. We count ' getDolGlobalInt("MAIN_SECURITY_MAX_IMG_IN_HTML_CONTENT", 1000)) { + $out = 'TooManyLinksIntoHTMLString'; + } + + return $out; + } +} /** * This function is called to encode a string into a HTML string but differs from htmlentities because diff --git a/htdocs/main.inc.php b/htdocs/main.inc.php index a0aafc22f42..94b69a065e4 100644 --- a/htdocs/main.inc.php +++ b/htdocs/main.inc.php @@ -80,7 +80,7 @@ function realCharForNumericEntities($matches) * Warning: Such a protection can't be enough. It is not reliable as it will always be possible to bypass this. Good protection can * only be guaranted by escaping data during output. * - * @param string $val Brut value found into $_GET, $_POST or PHP_SELF + * @param string $val Brute value found into $_GET, $_POST or PHP_SELF * @param string $type 0=POST, 1=GET, 2=PHP_SELF, 3=GET without sql reserved keywords (the less tolerant test) * @return int >0 if there is an injection, 0 if none */ @@ -226,6 +226,9 @@ function analyseVarsForSqlAndScriptsInjection(&$var, $type) } } +// To disable the WAF for GET and POST, uncomment this +//define('NOSCANGETFORINJECTION', 1); +//define('NOSCANPOSTFORINJECTION', 1); // Check consistency of NOREQUIREXXX DEFINES if ((defined('NOREQUIREDB') || defined('NOREQUIRETRAN')) && !defined('NOREQUIREMENU')) { diff --git a/htdocs/ticket/class/actions_ticket.class.php b/htdocs/ticket/class/actions_ticket.class.php index c5e267fddf6..c49e7b4fbf4 100644 --- a/htdocs/ticket/class/actions_ticket.class.php +++ b/htdocs/ticket/class/actions_ticket.class.php @@ -218,7 +218,7 @@ class ActionsTicket // Deal with format differences (text / HTML) if (dol_textishtml($object->message)) { print '
'; - print $object->message; + print dol_htmlwithnojs($object->message); print '
'; /*print '
'; print $langs->trans("More").'...'; diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php index 7a965337c96..cd4c3600b4c 100644 --- a/test/phpunit/SecurityTest.php +++ b/test/phpunit/SecurityTest.php @@ -547,8 +547,8 @@ class SecurityTest extends PHPUnit\Framework\TestCase $result=GETPOST("param15", 'restricthtml'); // param15 = src=>0xbeefed that is a dangerous string print __METHOD__." result=".$result."\n"; - //$this->assertEquals('InvalidHTMLString', $result, 'Test 15b'); - $this->assertEquals(' src=>0xbeefed', $result, 'Test 15b'); + $this->assertEquals('InvalidHTMLString', $result, 'Test 15b'); + //$this->assertEquals(' src=>0xbeefed', $result, 'Test 15b'); unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML);