commit 75d5d02: [Minor] Skip double utf8 checks

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Dec 23 17:49:05 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-12-23 17:48:34 +0000
URL: https://github.com/rspamd/rspamd/commit/75d5d020bce87534c3f802e20bbf0bca465705d2 (HEAD -> master)

[Minor] Skip double utf8 checks

---
 src/libmime/archives.c      |  2 +-
 src/libmime/content_type.c  |  2 +-
 src/libmime/mime_encoding.c | 15 +++++++++------
 src/libmime/mime_encoding.h |  3 ++-
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/libmime/archives.c b/src/libmime/archives.c
index 595ca8711..c6e2066f5 100644
--- a/src/libmime/archives.c
+++ b/src/libmime/archives.c
@@ -58,7 +58,7 @@ rspamd_archive_file_try_utf (struct rspamd_task *task,
 	const gchar *charset = NULL, *p, *end;
 	GString *res;
 
-	charset = rspamd_mime_charset_find_by_content (in, inlen);
+	charset = rspamd_mime_charset_find_by_content (in, inlen, TRUE);
 
 	if (charset) {
 		UChar *tmp;
diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c
index cad48a03b..8c50a484e 100644
--- a/src/libmime/content_type.c
+++ b/src/libmime/content_type.c
@@ -69,7 +69,7 @@ rspamd_rfc2231_decode (rspamd_mempool_t *pool,
 
 		if (charset == NULL) {
 			/* Try heuristic */
-			charset = rspamd_mime_charset_find_by_content (value_start, r);
+			charset = rspamd_mime_charset_find_by_content (value_start, r, TRUE);
 		}
 
 		if (charset == NULL) {
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
index 5b67aec65..04027552e 100644
--- a/src/libmime/mime_encoding.c
+++ b/src/libmime/mime_encoding.c
@@ -596,14 +596,17 @@ rspamd_mime_charset_utf_enforce (gchar *in, gsize len)
 }
 
 const char *
-rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen)
+rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen,
+									 bool check_utf8)
 {
 	int nconsumed;
 	bool is_reliable;
 	const gchar *ced_name;
 
-	if (rspamd_fast_utf8_validate (in, inlen) == 0) {
-		return UTF8_CHARSET;
+	if (check_utf8) {
+		if (rspamd_fast_utf8_validate (in, inlen) == 0) {
+			return UTF8_CHARSET;
+		}
 	}
 
 
@@ -641,7 +644,7 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
 		if (content_check) {
 			if (rspamd_fast_utf8_validate (in, len) != 0) {
 				real_charset = rspamd_mime_charset_find_by_content (in,
-						MIN (RSPAMD_CHARSET_MAX_CONTENT, len));
+						MIN (RSPAMD_CHARSET_MAX_CONTENT, len), FALSE);
 
 				if (real_charset) {
 
@@ -713,7 +716,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
 	if (part->ct->charset.len == 0) {
 		if (need_charset_heuristic) {
 			charset = rspamd_mime_charset_find_by_content (part_content->data,
-					MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len));
+					MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len), FALSE);
 
 			if (charset != NULL) {
 				msg_info_task ("detected charset %s", charset);
@@ -738,7 +741,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
 			/* We don't know the real charset but can try heuristic */
 			if (need_charset_heuristic) {
 				charset = rspamd_mime_charset_find_by_content (part_content->data,
-						MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len));
+						MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len), FALSE);
 				msg_info_task ("detected charset: %s", charset);
 				checked = TRUE;
 				text_part->real_charset = charset;
diff --git a/src/libmime/mime_encoding.h b/src/libmime/mime_encoding.h
index 22f0ee818..56216b46c 100644
--- a/src/libmime/mime_encoding.h
+++ b/src/libmime/mime_encoding.h
@@ -138,7 +138,8 @@ rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv,
  * @param inlen
  * @return detected charset name or NULL
  */
-const char *rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen);
+const char *rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen,
+												 bool check_utf8);
 
 #ifdef  __cplusplus
 }


More information about the Commits mailing list