diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php
index 03783e37561..eb72e2ed8c8 100644
--- a/htdocs/core/lib/functions.lib.php
+++ b/htdocs/core/lib/functions.lib.php
@@ -8794,6 +8794,22 @@ function utf8_check($str)
return true;
}
+/**
+ * Check if a string is in UTF8
+ *
+ * @param string $str String to check
+ * @return boolean True if string is valid UTF8 string, false if corrupted
+ */
+function utf8_valid($str)
+{
+ /* 2 other methods to test if string is utf8
+ $validUTF8 = mb_check_encoding($messagetext, 'UTF-8');
+ $validUTF8b = ! (false === mb_detect_encoding($messagetext, 'UTF-8', true));
+ */
+ return preg_match('//u', $str) ? true : false;
+}
+
+
/**
* Check if a string is in ASCII
*
diff --git a/htdocs/emailcollector/class/emailcollector.class.php b/htdocs/emailcollector/class/emailcollector.class.php
index 85a0d6e4332..e3c96752691 100644
--- a/htdocs/emailcollector/class/emailcollector.class.php
+++ b/htdocs/emailcollector/class/emailcollector.class.php
@@ -996,8 +996,10 @@ class EmailCollector extends CommonObject
} else {
// Nothing can be done for this param
$errorforthisaction++;
- $this->error = 'The extract rule to use has on an unknown source (must be HEADER, SUBJECT or BODY)';
+ $this->error = 'The extract rule to use to overwrite properties has on an unknown source (must be HEADER, SUBJECT or BODY)';
$this->errors[] = $this->error;
+
+ $operationslog .= '
'.$this->error;
}
} elseif (preg_match('/^(SET|SETIFEMPTY):(.*)$/', $valueforproperty, $regforregex)) {
$valuecurrent = '';
@@ -1750,7 +1752,15 @@ class EmailCollector extends CommonObject
//$htmlmsg,$plainmsg,$charset,$attachments
$messagetext = $plainmsg ? $plainmsg : dol_string_nohtmltag($htmlmsg, 0);
// Removed emojis
- $messagetext = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $messagetext);
+
+ if (utf8_valid($messagetext)) {
+ //$messagetext = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $messagetext);
+ $messagetext = $this->removeEmoji($messagetext);
+ } else {
+ $operationslog .= '
Discarded - Email body is not valid utf8';
+ dol_syslog(" Discarded - Email body is not valid utf8");
+ continue; // Exclude email
+ }
if ($searchfilterexcludebody) {
if (preg_match('/'.preg_quote($searchfilterexcludebody, '/').'/ms', $messagetext)) {
@@ -2264,8 +2274,10 @@ class EmailCollector extends CommonObject
} else {
// Nothing can be done for this param
$errorforactions++;
- $this->error = 'The extract rule to use to load thirdparty has an unknown source (must be HEADER, SUBJECT or BODY)';
+ $this->error = 'The extract rule to use to load thirdparty for email '.$msgid.' has an unknown source (must be HEADER, SUBJECT or BODY)';
$this->errors[] = $this->error;
+
+ $operationslog .= '
'.$this->error;
}
} elseif (preg_match('/^(SET|SETIFEMPTY):(.*)$/', $valueforproperty, $reg)) {
//if (preg_match('/^options_/', $tmpproperty)) $object->array_options[preg_replace('/^options_/', '', $tmpproperty)] = $reg[1];
@@ -3378,4 +3390,24 @@ class EmailCollector extends CommonObject
return $subject;
}
+
+ /**
+ * Remove EMoji from email content
+ *
+ * @param string $text String to sanitize
+ * @return string Sanitized string
+ */
+ protected function removeEmoji($text)
+ {
+ // Supprimer les caractères emoji en utilisant une expression régulière
+ $text = preg_replace('/[\x{1F600}-\x{1F64F}]/u', '', $text);
+ $text = preg_replace('/[\x{1F300}-\x{1F5FF}]/u', '', $text);
+ $text = preg_replace('/[\x{1F680}-\x{1F6FF}]/u', '', $text);
+ $text = preg_replace('/[\x{2600}-\x{26FF}]/u', '', $text);
+ $text = preg_replace('/[\x{2700}-\x{27BF}]/u', '', $text);
+ $text = preg_replace('/[\x{1F900}-\x{1F9FF}]/u', '', $text);
+ $text = preg_replace('/[\x{1F1E0}-\x{1F1FF}]/u', '', $text);
+
+ return $text;
+ }
}