2
0
forked from Wavyzz/dolibarr

FIX error management on emoji and utf8 validity by emailcollector

This commit is contained in:
Laurent Destailleur
2023-05-17 14:23:14 +02:00
parent 9743bbde60
commit 76de309cd9
2 changed files with 51 additions and 3 deletions

View File

@@ -8794,6 +8794,22 @@ function utf8_check($str)
return true; return true;
} }
/**
* Check if a string is in UTF8
*
* @param string $str String to check
* @return boolean True if string is valid UTF8 string, false if corrupted
*/
function utf8_valid($str)
{
/* 2 other methods to test if string is utf8
$validUTF8 = mb_check_encoding($messagetext, 'UTF-8');
$validUTF8b = ! (false === mb_detect_encoding($messagetext, 'UTF-8', true));
*/
return preg_match('//u', $str) ? true : false;
}
/** /**
* Check if a string is in ASCII * Check if a string is in ASCII
* *

View File

@@ -996,8 +996,10 @@ class EmailCollector extends CommonObject
} else { } else {
// Nothing can be done for this param // Nothing can be done for this param
$errorforthisaction++; $errorforthisaction++;
$this->error = 'The extract rule to use has on an unknown source (must be HEADER, SUBJECT or BODY)'; $this->error = 'The extract rule to use to overwrite properties has on an unknown source (must be HEADER, SUBJECT or BODY)';
$this->errors[] = $this->error; $this->errors[] = $this->error;
$operationslog .= '<br>'.$this->error;
} }
} elseif (preg_match('/^(SET|SETIFEMPTY):(.*)$/', $valueforproperty, $regforregex)) { } elseif (preg_match('/^(SET|SETIFEMPTY):(.*)$/', $valueforproperty, $regforregex)) {
$valuecurrent = ''; $valuecurrent = '';
@@ -1750,7 +1752,15 @@ class EmailCollector extends CommonObject
//$htmlmsg,$plainmsg,$charset,$attachments //$htmlmsg,$plainmsg,$charset,$attachments
$messagetext = $plainmsg ? $plainmsg : dol_string_nohtmltag($htmlmsg, 0); $messagetext = $plainmsg ? $plainmsg : dol_string_nohtmltag($htmlmsg, 0);
// Removed emojis // Removed emojis
$messagetext = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $messagetext);
if (utf8_valid($messagetext)) {
//$messagetext = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $messagetext);
$messagetext = $this->removeEmoji($messagetext);
} else {
$operationslog .= '<br>Discarded - Email body is not valid utf8';
dol_syslog(" Discarded - Email body is not valid utf8");
continue; // Exclude email
}
if ($searchfilterexcludebody) { if ($searchfilterexcludebody) {
if (preg_match('/'.preg_quote($searchfilterexcludebody, '/').'/ms', $messagetext)) { if (preg_match('/'.preg_quote($searchfilterexcludebody, '/').'/ms', $messagetext)) {
@@ -2264,8 +2274,10 @@ class EmailCollector extends CommonObject
} else { } else {
// Nothing can be done for this param // Nothing can be done for this param
$errorforactions++; $errorforactions++;
$this->error = 'The extract rule to use to load thirdparty has an unknown source (must be HEADER, SUBJECT or BODY)'; $this->error = 'The extract rule to use to load thirdparty for email '.$msgid.' has an unknown source (must be HEADER, SUBJECT or BODY)';
$this->errors[] = $this->error; $this->errors[] = $this->error;
$operationslog .= '<br>'.$this->error;
} }
} elseif (preg_match('/^(SET|SETIFEMPTY):(.*)$/', $valueforproperty, $reg)) { } elseif (preg_match('/^(SET|SETIFEMPTY):(.*)$/', $valueforproperty, $reg)) {
//if (preg_match('/^options_/', $tmpproperty)) $object->array_options[preg_replace('/^options_/', '', $tmpproperty)] = $reg[1]; //if (preg_match('/^options_/', $tmpproperty)) $object->array_options[preg_replace('/^options_/', '', $tmpproperty)] = $reg[1];
@@ -3378,4 +3390,24 @@ class EmailCollector extends CommonObject
return $subject; return $subject;
} }
/**
* Remove EMoji from email content
*
* @param string $text String to sanitize
* @return string Sanitized string
*/
protected function removeEmoji($text)
{
// Supprimer les caractères emoji en utilisant une expression régulière
$text = preg_replace('/[\x{1F600}-\x{1F64F}]/u', '', $text);
$text = preg_replace('/[\x{1F300}-\x{1F5FF}]/u', '', $text);
$text = preg_replace('/[\x{1F680}-\x{1F6FF}]/u', '', $text);
$text = preg_replace('/[\x{2600}-\x{26FF}]/u', '', $text);
$text = preg_replace('/[\x{2700}-\x{27BF}]/u', '', $text);
$text = preg_replace('/[\x{1F900}-\x{1F9FF}]/u', '', $text);
$text = preg_replace('/[\x{1F1E0}-\x{1F1FF}]/u', '', $text);
return $text;
}
} }