diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php index 03783e37561..eb72e2ed8c8 100644 --- a/htdocs/core/lib/functions.lib.php +++ b/htdocs/core/lib/functions.lib.php @@ -8794,6 +8794,22 @@ function utf8_check($str) return true; } +/** + * Check if a string is in UTF8 + * + * @param string $str String to check + * @return boolean True if string is valid UTF8 string, false if corrupted + */ +function utf8_valid($str) +{ + /* 2 other methods to test if string is utf8 + $validUTF8 = mb_check_encoding($messagetext, 'UTF-8'); + $validUTF8b = ! (false === mb_detect_encoding($messagetext, 'UTF-8', true)); + */ + return preg_match('//u', $str) ? true : false; +} + + /** * Check if a string is in ASCII * diff --git a/htdocs/emailcollector/class/emailcollector.class.php b/htdocs/emailcollector/class/emailcollector.class.php index 85a0d6e4332..e3c96752691 100644 --- a/htdocs/emailcollector/class/emailcollector.class.php +++ b/htdocs/emailcollector/class/emailcollector.class.php @@ -996,8 +996,10 @@ class EmailCollector extends CommonObject } else { // Nothing can be done for this param $errorforthisaction++; - $this->error = 'The extract rule to use has on an unknown source (must be HEADER, SUBJECT or BODY)'; + $this->error = 'The extract rule to use to overwrite properties has on an unknown source (must be HEADER, SUBJECT or BODY)'; $this->errors[] = $this->error; + + $operationslog .= '
'.$this->error; } } elseif (preg_match('/^(SET|SETIFEMPTY):(.*)$/', $valueforproperty, $regforregex)) { $valuecurrent = ''; @@ -1750,7 +1752,15 @@ class EmailCollector extends CommonObject //$htmlmsg,$plainmsg,$charset,$attachments $messagetext = $plainmsg ? $plainmsg : dol_string_nohtmltag($htmlmsg, 0); // Removed emojis - $messagetext = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $messagetext); + + if (utf8_valid($messagetext)) { + //$messagetext = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $messagetext); + $messagetext = $this->removeEmoji($messagetext); + } else { + $operationslog .= '
Discarded - Email body is not valid utf8'; + dol_syslog(" Discarded - Email body is not valid utf8"); + continue; // Exclude email + } if ($searchfilterexcludebody) { if (preg_match('/'.preg_quote($searchfilterexcludebody, '/').'/ms', $messagetext)) { @@ -2264,8 +2274,10 @@ class EmailCollector extends CommonObject } else { // Nothing can be done for this param $errorforactions++; - $this->error = 'The extract rule to use to load thirdparty has an unknown source (must be HEADER, SUBJECT or BODY)'; + $this->error = 'The extract rule to use to load thirdparty for email '.$msgid.' has an unknown source (must be HEADER, SUBJECT or BODY)'; $this->errors[] = $this->error; + + $operationslog .= '
'.$this->error; } } elseif (preg_match('/^(SET|SETIFEMPTY):(.*)$/', $valueforproperty, $reg)) { //if (preg_match('/^options_/', $tmpproperty)) $object->array_options[preg_replace('/^options_/', '', $tmpproperty)] = $reg[1]; @@ -3378,4 +3390,24 @@ class EmailCollector extends CommonObject return $subject; } + + /** + * Remove EMoji from email content + * + * @param string $text String to sanitize + * @return string Sanitized string + */ + protected function removeEmoji($text) + { + // Supprimer les caractères emoji en utilisant une expression régulière + $text = preg_replace('/[\x{1F600}-\x{1F64F}]/u', '', $text); + $text = preg_replace('/[\x{1F300}-\x{1F5FF}]/u', '', $text); + $text = preg_replace('/[\x{1F680}-\x{1F6FF}]/u', '', $text); + $text = preg_replace('/[\x{2600}-\x{26FF}]/u', '', $text); + $text = preg_replace('/[\x{2700}-\x{27BF}]/u', '', $text); + $text = preg_replace('/[\x{1F900}-\x{1F9FF}]/u', '', $text); + $text = preg_replace('/[\x{1F1E0}-\x{1F1FF}]/u', '', $text); + + return $text; + } }