FIX Can use the WAF of HTML content (dol_htmlwithnojs) for output too

This commit is contained in:
Laurent Destailleur
2022-11-28 18:42:59 +01:00
parent db6ee9f75f
commit 5cfe40a4bc
4 changed files with 89 additions and 63 deletions

View File

@@ -908,65 +908,7 @@ function sanitizeVal($out = '', $check = 'alphanohtml', $filter = null, $options
break; break;
case 'restricthtml': // Recommended for most html textarea case 'restricthtml': // Recommended for most html textarea
case 'restricthtmlallowunvalid': case 'restricthtmlallowunvalid':
do { $out = dol_htmlwithnojs($out, 1);
$oldstringtoclean = $out;
if (!empty($out) && !empty($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML) && $check != 'restricthtmlallowunvalid') {
try {
$dom = new DOMDocument;
// Add a trick to solve pb with text without parent tag
// like '<h1>Foo</h1><p>bar</p>' that wrongly ends up without the trick into '<h1>Foo<p>bar</p></h1>'
// like 'abc' that wrongly ends up without the tric into with '<p>abc</p>'
$out = '<div class="tricktoremove">'.$out.'</div>';
$dom->loadHTML($out, LIBXML_ERR_NONE|LIBXML_HTML_NOIMPLIED|LIBXML_HTML_NODEFDTD|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOXMLDECL);
$out = trim($dom->saveHTML());
// Remove the trick added to solve pb with text without parent tag
$out = preg_replace('/^<div class="tricktoremove">/', '', $out);
$out = preg_replace('/<\/div>$/', '', $out);
var_dump('xxx');
var_dump($out);
} catch (Exception $e) {
//print $e->getMessage();
return 'InvalidHTMLString';
}
}
// Ckeditor use the numeric entitic for apostrophe so we force it to text entity (all other special chars are
// encoded using text entities) so we can then exclude all numeric entities.
$out = preg_replace('/&#39;/i', '&apos;', $out);
// We replace chars from a/A to z/Z encoded with numeric HTML entities with the real char so we won't loose the chars at the next step (preg_replace).
// No need to use a loop here, this step is not to sanitize (this is done at next step, this is to try to save chars, even if they are
// using a non coventionnel way to be encoded, to not have them sanitized just after)
//$out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+;?)/i', 'realCharForNumericEntities', $out);
$out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+;?)/i', function ($m) {
return realCharForNumericEntities($m); }, $out);
// Now we remove all remaining HTML entities starting with a number. We don't want such entities.
$out = preg_replace('/&#x?[0-9]+/i', '', $out); // For example if we have j&#x61vascript with an entities without the ; to hide the 'a' of 'javascript'.
$out = dol_string_onlythesehtmltags($out, 0, 1, 1);
// We should also exclude non expected HTML attributes and clean content of some attributes.
if (!empty($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES)) {
// Warning, the function may add a LF so we are forced to trim to compare with old $out without having always a difference and an infinit loop.
$out = dol_string_onlythesehtmlattributes($out);
}
// Restore entity &apos; into &#39; (restricthtml is for html content so we can use html entity)
$out = preg_replace('/&apos;/i', "&#39;", $out);
} while ($oldstringtoclean != $out);
// Check the limit of external links in a Rich text content. We count '<img' and 'url('
$reg = array();
preg_match_all('/(<img|url\()/i', $out, $reg);
if (count($reg[0]) > getDolGlobalInt("MAIN_SECURITY_MAX_IMG_IN_HTML_CONTENT", 1000)) {
return 'TooManyLinksIntoHTMLString';
}
break; break;
case 'custom': case 'custom':
if (!empty($out)) { if (!empty($out)) {
@@ -7151,6 +7093,87 @@ function dol_nl2br($stringtoencode, $nl2brmode = 0, $forxml = false)
} }
} }
/**
* Sanitize a HTML to remove js and dangerous content
*
* @param string $stringtoencode String to encode
* @param int $nouseofiframesandbox Allow use of option MAIN_SECURITY_USE_SANDBOX_FOR_HTMLWITHNOJS for html sanitizing
* @return string HTML sanitized
*/
function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0)
{
global $conf;
if (empty($nouseofiframesandbox) && !empty($conf->global->MAIN_SECURITY_USE_SANDBOX_FOR_HTMLWITHNOJS)) {
// TODO using sandbox on inline html content is not possible yet with current browsers
//$s = '<iframe class="iframewithsandbox" sandbox><html><body>';
//$s .= $stringtoencode;
//$s .= '</body></html></iframe>';
return $stringtoencode;
} else {
$out = $stringtoencode;
do {
$oldstringtoclean = $out;
if (!empty($out) && !empty($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML) && $check != 'restricthtmlallowunvalid') {
try {
$dom = new DOMDocument;
// Add a trick to solve pb with text without parent tag
// like '<h1>Foo</h1><p>bar</p>' that wrongly ends up without the trick into '<h1>Foo<p>bar</p></h1>'
// like 'abc' that wrongly ends up without the tric into with '<p>abc</p>'
$out = '<div class="tricktoremove">'.$out.'</div>';
$dom->loadHTML($out, LIBXML_ERR_NONE|LIBXML_HTML_NOIMPLIED|LIBXML_HTML_NODEFDTD|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOXMLDECL);
$out = trim($dom->saveHTML());
// Remove the trick added to solve pb with text without parent tag
$out = preg_replace('/^<div class="tricktoremove">/', '', $out);
$out = preg_replace('/<\/div>$/', '', $out);
} catch (Exception $e) {
// If error, invalid HTML string with no way to clean it
//print $e->getMessage();
$out = 'InvalidHTMLString';
}
}
// Ckeditor use the numeric entitic for apostrophe so we force it to text entity (all other special chars are
// encoded using text entities) so we can then exclude all numeric entities.
$out = preg_replace('/&#39;/i', '&apos;', $out);
// We replace chars from a/A to z/Z encoded with numeric HTML entities with the real char so we won't loose the chars at the next step (preg_replace).
// No need to use a loop here, this step is not to sanitize (this is done at next step, this is to try to save chars, even if they are
// using a non coventionnel way to be encoded, to not have them sanitized just after)
//$out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+;?)/i', 'realCharForNumericEntities', $out);
$out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+;?)/i', function ($m) {
return realCharForNumericEntities($m); }, $out);
// Now we remove all remaining HTML entities starting with a number. We don't want such entities.
$out = preg_replace('/&#x?[0-9]+/i', '', $out); // For example if we have j&#x61vascript with an entities without the ; to hide the 'a' of 'javascript'.
$out = dol_string_onlythesehtmltags($out, 0, 1, 1);
// We should also exclude non expected HTML attributes and clean content of some attributes.
if (!empty($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES)) {
// Warning, the function may add a LF so we are forced to trim to compare with old $out without having always a difference and an infinit loop.
$out = dol_string_onlythesehtmlattributes($out);
}
// Restore entity &apos; into &#39; (restricthtml is for html content so we can use html entity)
$out = preg_replace('/&apos;/i', "&#39;", $out);
} while ($oldstringtoclean != $out);
// Check the limit of external links in a Rich text content. We count '<img' and 'url('
$reg = array();
preg_match_all('/(<img|url\()/i', $out, $reg);
if (count($reg[0]) > getDolGlobalInt("MAIN_SECURITY_MAX_IMG_IN_HTML_CONTENT", 1000)) {
$out = 'TooManyLinksIntoHTMLString';
}
return $out;
}
}
/** /**
* This function is called to encode a string into a HTML string but differs from htmlentities because * This function is called to encode a string into a HTML string but differs from htmlentities because

View File

@@ -80,7 +80,7 @@ function realCharForNumericEntities($matches)
* Warning: Such a protection can't be enough. It is not reliable as it will always be possible to bypass this. Good protection can * Warning: Such a protection can't be enough. It is not reliable as it will always be possible to bypass this. Good protection can
* only be guaranted by escaping data during output. * only be guaranted by escaping data during output.
* *
* @param string $val Brut value found into $_GET, $_POST or PHP_SELF * @param string $val Brute value found into $_GET, $_POST or PHP_SELF
* @param string $type 0=POST, 1=GET, 2=PHP_SELF, 3=GET without sql reserved keywords (the less tolerant test) * @param string $type 0=POST, 1=GET, 2=PHP_SELF, 3=GET without sql reserved keywords (the less tolerant test)
* @return int >0 if there is an injection, 0 if none * @return int >0 if there is an injection, 0 if none
*/ */
@@ -226,6 +226,9 @@ function analyseVarsForSqlAndScriptsInjection(&$var, $type)
} }
} }
// To disable the WAF for GET and POST, uncomment this
//define('NOSCANGETFORINJECTION', 1);
//define('NOSCANPOSTFORINJECTION', 1);
// Check consistency of NOREQUIREXXX DEFINES // Check consistency of NOREQUIREXXX DEFINES
if ((defined('NOREQUIREDB') || defined('NOREQUIRETRAN')) && !defined('NOREQUIREMENU')) { if ((defined('NOREQUIREDB') || defined('NOREQUIRETRAN')) && !defined('NOREQUIREMENU')) {

View File

@@ -218,7 +218,7 @@ class ActionsTicket
// Deal with format differences (text / HTML) // Deal with format differences (text / HTML)
if (dol_textishtml($object->message)) { if (dol_textishtml($object->message)) {
print '<div class="longmessagecut">'; print '<div class="longmessagecut">';
print $object->message; print dol_htmlwithnojs($object->message);
print '</div>'; print '</div>';
/*print '<div class="clear center">'; /*print '<div class="clear center">';
print $langs->trans("More").'...'; print $langs->trans("More").'...';

View File

@@ -547,8 +547,8 @@ class SecurityTest extends PHPUnit\Framework\TestCase
$result=GETPOST("param15", 'restricthtml'); // param15 = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string $result=GETPOST("param15", 'restricthtml'); // param15 = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result=".$result."\n"; print __METHOD__." result=".$result."\n";
//$this->assertEquals('InvalidHTMLString', $result, 'Test 15b'); $this->assertEquals('InvalidHTMLString', $result, 'Test 15b');
$this->assertEquals('<img onerror> src=&gt;0xbeefed', $result, 'Test 15b'); //$this->assertEquals('<img onerror> src=&gt;0xbeefed', $result, 'Test 15b');
unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML); unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML);