commit 3c77297: [Rework] Change mime part specifics handling
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Nov 21 13:00:07 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-11-21 12:35:58 +0000
URL: https://github.com/rspamd/rspamd/commit/3c772979bbcfee948054e853aa8325b8e43ac944 (HEAD -> master)
[Rework] Change mime part specifics handling
---
src/libmime/archives.c | 23 ++++++-----------------
src/libmime/content_type.h | 4 ----
src/libmime/images.c | 6 +++---
src/libmime/message.c | 23 +++++++++++++++--------
src/libmime/message.h | 22 ++++++++++++++++++----
src/libmime/mime_expressions.c | 8 ++++----
src/libmime/mime_parser.c | 6 +++++-
src/lua/lua_mimepart.c | 24 ++++++++++++------------
src/lua/lua_task.c | 4 ++--
src/plugins/fuzzy_check.c | 4 ++--
10 files changed, 67 insertions(+), 57 deletions(-)
diff --git a/src/libmime/archives.c b/src/libmime/archives.c
index 5701ce95c..5b08dd19b 100644
--- a/src/libmime/archives.c
+++ b/src/libmime/archives.c
@@ -278,7 +278,7 @@ rspamd_archive_process_zip (struct rspamd_task *task,
cd += fname_len + comment_len + extra_len + cd_basic_len;
}
- part->flags |= RSPAMD_MIME_PART_ARCHIVE;
+ part->part_type = RSPAMD_MIME_PART_ARCHIVE;
part->specific.arch = arch;
if (part->cd) {
@@ -510,7 +510,7 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start,
}
end:
- part->flags |= RSPAMD_MIME_PART_ARCHIVE;
+ part->part_type = RSPAMD_MIME_PART_ARCHIVE;
part->specific.arch = arch;
arch->archive_name = &part->cd->filename;
arch->size = part->parsed_data.len;
@@ -734,7 +734,7 @@ rspamd_archive_process_rar (struct rspamd_task *task,
}
end:
-part->flags |= RSPAMD_MIME_PART_ARCHIVE;
+ part->part_type = RSPAMD_MIME_PART_ARCHIVE;
part->specific.arch = arch;
if (part->cd != NULL) {
arch->archive_name = &part->cd->filename;
@@ -1674,7 +1674,7 @@ rspamd_archive_process_7zip (struct rspamd_task *task,
while ((p = rspamd_7zip_read_next_section (task, p, end, arch)) != NULL);
- part->flags |= RSPAMD_MIME_PART_ARCHIVE;
+ part->part_type = RSPAMD_MIME_PART_ARCHIVE;
part->specific.arch = arch;
if (part->cd != NULL) {
arch->archive_name = &part->cd->filename;
@@ -1824,7 +1824,7 @@ rspamd_archive_process_gzip (struct rspamd_task *task,
set:
/* Set archive data */
- part->flags |= RSPAMD_MIME_PART_ARCHIVE;
+ part->part_type = RSPAMD_MIME_PART_ARCHIVE;
part->specific.arch = arch;
if (part->cd) {
@@ -1918,7 +1918,7 @@ rspamd_archives_process (struct rspamd_task *task)
const guchar gz_magic[] = {0x1F, 0x8B};
PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
- if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_IMAGE))) {
+ if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
if (part->parsed_data.len > 0) {
if (rspamd_archive_cheat_detect (part, "zip",
zip_magic, sizeof (zip_magic))) {
@@ -1936,17 +1936,6 @@ rspamd_archives_process (struct rspamd_task *task)
gz_magic, sizeof (gz_magic))) {
rspamd_archive_process_gzip (task, part);
}
-
- if (IS_CT_TEXT (part->ct) &&
- (part->flags & RSPAMD_MIME_PART_ARCHIVE) &&
- part->specific.arch) {
- struct rspamd_archive *arch = part->specific.arch;
-
- msg_info_task ("found %s archive with incorrect content-type: %T/%T",
- rspamd_archive_type_str (arch->type),
- &part->ct->type, &part->ct->subtype);
- part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
- }
}
}
}
diff --git a/src/libmime/content_type.h b/src/libmime/content_type.h
index 2e3bf5e40..49bba4269 100644
--- a/src/libmime/content_type.h
+++ b/src/libmime/content_type.h
@@ -34,10 +34,6 @@ enum rspamd_content_type_flags {
RSPAMD_CONTENT_TYPE_MISSING = 1 << 5,
};
-#define IS_CT_MULTIPART(ct) ((ct) && ((ct)->flags & RSPAMD_CONTENT_TYPE_MULTIPART))
-#define IS_CT_TEXT(ct) ((ct) && ((ct)->flags & RSPAMD_CONTENT_TYPE_TEXT))
-#define IS_CT_MESSAGE(ct) ((ct) &&((ct)->flags & RSPAMD_CONTENT_TYPE_MESSAGE))
-
enum rspamd_content_param_flags {
RSPAMD_CONTENT_PARAM_NORMAL = 0,
RSPAMD_CONTENT_PARAM_RFC2231 = (1 << 0),
diff --git a/src/libmime/images.c b/src/libmime/images.c
index faa7a6b2e..218e947fc 100644
--- a/src/libmime/images.c
+++ b/src/libmime/images.c
@@ -52,7 +52,7 @@ rspamd_images_process (struct rspamd_task *task)
struct rspamd_mime_part *part;
PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
- if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_ARCHIVE))) {
+ if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
if (part->detected_type &&
strcmp (part->detected_type, "image") == 0 &&
part->parsed_data.len > 0) {
@@ -610,7 +610,7 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part)
img->parent = part;
- part->flags |= RSPAMD_MIME_PART_IMAGE;
+ part->part_type = RSPAMD_MIME_PART_IMAGE;
part->specific.img = img;
}
}
@@ -715,7 +715,7 @@ rspamd_images_link (struct rspamd_task *task)
guint i;
PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
- if (part->flags & RSPAMD_MIME_PART_IMAGE) {
+ if (part->part_type == RSPAMD_MIME_PART_IMAGE) {
rspamd_image_process_part (task, part);
}
}
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 9009408cb..7c7609cf8 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -821,9 +821,10 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
gboolean found_html = FALSE, found_txt = FALSE, straight_ct = FALSE;
enum rspamd_action_type act;
- if ((IS_CT_TEXT (mime_part->ct) && (straight_ct = TRUE)) ||
- (mime_part->detected_type &&
- strcmp (mime_part->detected_type, "text") == 0)) {
+ if (((mime_part->ct && (mime_part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) &&
+ (straight_ct = TRUE)) ||
+ (mime_part->detected_type &&
+ strcmp (mime_part->detected_type, "text") == 0)) {
found_txt = TRUE;
html_tok.begin = "html";
@@ -872,7 +873,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
}
g_ptr_array_add (MESSAGE_FIELD (task, text_parts), text_part);
- mime_part->flags |= RSPAMD_MIME_PART_TEXT;
+ mime_part->part_type = RSPAMD_MIME_PART_TEXT;
mime_part->specific.txt = text_part;
act = rspamd_check_gtube (task, text_part);
@@ -1007,7 +1008,7 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
}
else {
/* Check sanity */
- if (IS_CT_TEXT (part->ct)) {
+ if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) {
RSPAMD_FTOK_FROM_STR (&srch, "application");
if (rspamd_ftok_cmp (&ct->type, &srch) == 0) {
@@ -1062,11 +1063,17 @@ rspamd_message_dtor (struct rspamd_message *msg)
rspamd_message_headers_unref (p->raw_headers);
}
- if (IS_CT_MULTIPART (p->ct)) {
+ if (IS_PART_MULTIPART (p)) {
if (p->specific.mp->children) {
g_ptr_array_free (p->specific.mp->children, TRUE);
}
}
+
+ if (p->part_type == RSPAMD_MIME_PART_CUSTOM_LUA && p->specific.lua_ref != -1) {
+ luaL_unref (msg->task->cfg->lua_state,
+ LUA_REGISTRYINDEX,
+ p->specific.lua_ref);
+ }
}
PTR_ARRAY_FOREACH (msg->text_parts, i, tp) {
@@ -1104,6 +1111,7 @@ rspamd_message_new (struct rspamd_task *task)
msg->parts = g_ptr_array_sized_new (4);
msg->text_parts = g_ptr_array_sized_new (2);
+ msg->task = task;
REF_INIT_RETAIN (msg, rspamd_message_dtor);
@@ -1441,8 +1449,7 @@ rspamd_message_process (struct rspamd_task *task)
lua_settop (L, func_pos);
}
- if (!(part->flags & (RSPAMD_MIME_PART_IMAGE|RSPAMD_MIME_PART_ARCHIVE)) &&
- (!part->ct || !(part->ct->flags & (RSPAMD_CONTENT_TYPE_MULTIPART|RSPAMD_CONTENT_TYPE_MESSAGE)))) {
+ if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
rspamd_message_process_text_part_maybe (task, part);
}
}
diff --git a/src/libmime/message.h b/src/libmime/message.h
index f3a8315fc..7f6ea86dd 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -30,14 +30,25 @@ struct rspamd_image;
struct rspamd_archive;
enum rspamd_mime_part_flags {
- RSPAMD_MIME_PART_TEXT = (1 << 0),
RSPAMD_MIME_PART_ATTACHEMENT = (1 << 1),
- RSPAMD_MIME_PART_IMAGE = (1 << 2),
- RSPAMD_MIME_PART_ARCHIVE = (1 << 3),
RSPAMD_MIME_PART_BAD_CTE = (1 << 4),
- RSPAMD_MIME_PART_MISSING_CTE = (1 << 5)
+ RSPAMD_MIME_PART_MISSING_CTE = (1 << 5),
};
+enum rspamd_mime_part_type {
+ RSPAMD_MIME_PART_UNDEFINED = 0,
+ RSPAMD_MIME_PART_MULTIPART,
+ RSPAMD_MIME_PART_MESSAGE,
+ RSPAMD_MIME_PART_TEXT,
+ RSPAMD_MIME_PART_ARCHIVE,
+ RSPAMD_MIME_PART_IMAGE,
+ RSPAMD_MIME_PART_CUSTOM_LUA
+};
+
+#define IS_PART_MULTIPART(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_MULTIPART))
+#define IS_PART_TEXT(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_TEXT))
+#define IS_PART_MESSAGE(part) ((part) &&((part)->part_type == RSPAMD_MIME_PART_MESSAGE))
+
enum rspamd_cte {
RSPAMD_CTE_UNKNOWN = 0,
RSPAMD_CTE_7BIT = 1,
@@ -72,6 +83,7 @@ struct rspamd_mime_part {
enum rspamd_cte cte;
guint flags;
+ enum rspamd_mime_part_type part_type;
guint id;
union {
@@ -79,6 +91,7 @@ struct rspamd_mime_part {
struct rspamd_mime_text_part *txt;
struct rspamd_image *img;
struct rspamd_archive *arch;
+ gint lua_ref;
} specific;
guchar digest[rspamd_cryptobox_HASHBYTES];
@@ -153,6 +166,7 @@ struct rspamd_message {
GHashTable *emails; /**< list of parsed emails */
struct rspamd_mime_headers_table *raw_headers; /**< list of raw headers */
struct rspamd_mime_header *headers_order; /**< order of raw headers */
+ struct rspamd_task *task;
GPtrArray *rcpt_mime;
GPtrArray *from_mime;
guchar digest[16];
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
index 7354b3aeb..e797bb9b6 100644
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -1480,7 +1480,7 @@ rspamd_compare_transfer_encoding (struct rspamd_task * task,
}
PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
- if (IS_CT_TEXT (part->ct)) {
+ if (IS_PART_TEXT (part)) {
if (part->cte == cte) {
return TRUE;
}
@@ -1800,7 +1800,7 @@ rspamd_content_type_compare_param (struct rspamd_task * task,
* If user did not specify argument, let's assume that he wants
* recursive search if mime part is multipart/mixed
*/
- if (IS_CT_MULTIPART (cur_part->ct)) {
+ if (IS_PART_MULTIPART (cur_part)) {
recursive = TRUE;
}
}
@@ -1880,7 +1880,7 @@ rspamd_content_type_has_param (struct rspamd_task * task,
* If user did not specify argument, let's assume that he wants
* recursive search if mime part is multipart/mixed
*/
- if (IS_CT_MULTIPART (cur_part->ct)) {
+ if (IS_PART_MULTIPART (cur_part)) {
recursive = TRUE;
}
}
@@ -1955,7 +1955,7 @@ rspamd_content_type_check (struct rspamd_task *task,
* If user did not specify argument, let's assume that he wants
* recursive search if mime part is multipart/mixed
*/
- if (IS_CT_MULTIPART (ct)) {
+ if (IS_PART_MULTIPART (cur_part)) {
recursive = TRUE;
}
}
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c
index 5d4ab9f7c..7b38b80b1 100644
--- a/src/libmime/mime_parser.c
+++ b/src/libmime/mime_parser.c
@@ -508,7 +508,7 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task,
}
}
- if (IS_CT_TEXT (part->ct)) {
+ if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) {
/* Need to copy text as we have couple of in-place change functions */
parsed = rspamd_fstring_sized_new (part->raw_data.len);
parsed->len = part->raw_data.len;
@@ -720,6 +720,7 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
st->nesting ++;
g_ptr_array_add (st->stack, npart);
+ npart->part_type = RSPAMD_MIME_PART_MULTIPART;
npart->specific.mp = rspamd_mempool_alloc0 (task->task_pool,
sizeof (struct rspamd_mime_multipart));
memcpy (&npart->specific.mp->boundary, &sel->orig_boundary,
@@ -729,6 +730,7 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) {
st->nesting ++;
g_ptr_array_add (st->stack, npart);
+ npart->part_type = RSPAMD_MIME_PART_MESSAGE;
if ((ret = rspamd_mime_parse_normal_part (task, npart, st, err))
== RSPAMD_MIME_PARSE_OK) {
@@ -1366,6 +1368,7 @@ rspamd_mime_parse_message (struct rspamd_task *task,
if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
g_ptr_array_add (nst->stack, npart);
nst->nesting ++;
+ npart->part_type = RSPAMD_MIME_PART_MULTIPART;
npart->specific.mp = rspamd_mempool_alloc0 (task->task_pool,
sizeof (struct rspamd_mime_multipart));
memcpy (&npart->specific.mp->boundary, &sel->orig_boundary,
@@ -1375,6 +1378,7 @@ rspamd_mime_parse_message (struct rspamd_task *task,
else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) {
if ((ret = rspamd_mime_parse_normal_part (task, npart, nst, err))
== RSPAMD_MIME_PARSE_OK) {
+ npart->part_type = RSPAMD_MIME_PART_MESSAGE;
ret = rspamd_mime_parse_message (task, npart, nst, err);
}
}
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index 01c64ae64..f706b360f 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -1564,14 +1564,14 @@ lua_mimepart_get_boundary (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- if (IS_CT_MULTIPART (part->ct)) {
+ if (IS_PART_MULTIPART (part)) {
lua_pushlstring (L, part->specific.mp->boundary.begin,
part->specific.mp->boundary.len);
}
else {
parent = part->parent_part;
- if (!parent || !IS_CT_MULTIPART (parent->ct)) {
+ if (!parent || !IS_PART_MULTIPART (parent)) {
lua_pushnil (L);
}
else {
@@ -1670,7 +1670,7 @@ lua_mimepart_is_image (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- lua_pushboolean (L, (part->flags & RSPAMD_MIME_PART_IMAGE) ? true : false);
+ lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_IMAGE);
return 1;
}
@@ -1685,7 +1685,7 @@ lua_mimepart_is_archive (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- lua_pushboolean (L, (part->flags & RSPAMD_MIME_PART_ARCHIVE) ? true : false);
+ lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_ARCHIVE);
return 1;
}
@@ -1700,7 +1700,7 @@ lua_mimepart_is_multipart (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- lua_pushboolean (L, IS_CT_MULTIPART (part->ct) ? true : false);
+ lua_pushboolean (L, IS_PART_MULTIPART (part) ? true : false);
return 1;
}
@@ -1715,7 +1715,7 @@ lua_mimepart_is_message (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- lua_pushboolean (L, IS_CT_MESSAGE (part->ct) ? true : false);
+ lua_pushboolean (L, IS_PART_MESSAGE (part) ? true : false);
return 1;
}
@@ -1730,7 +1730,7 @@ lua_mimepart_is_attachment (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- if (!(part->flags & (RSPAMD_MIME_PART_IMAGE))) {
+ if (part->part_type != RSPAMD_MIME_PART_IMAGE) {
if (part->cd && part->cd->type == RSPAMD_CT_ATTACHMENT) {
lua_pushboolean (L, true);
}
@@ -1761,7 +1761,7 @@ lua_mimepart_is_text (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- lua_pushboolean (L, (part->flags & RSPAMD_MIME_PART_TEXT) ? true : false);
+ lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_TEXT);
return 1;
}
@@ -1798,7 +1798,7 @@ lua_mimepart_get_image (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- if (!(part->flags & RSPAMD_MIME_PART_IMAGE) || part->specific.img == NULL) {
+ if (part->part_type != RSPAMD_MIME_PART_IMAGE || part->specific.img == NULL) {
lua_pushnil (L);
}
else {
@@ -1821,7 +1821,7 @@ lua_mimepart_get_archive (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- if (!(part->flags & RSPAMD_MIME_PART_ARCHIVE) || part->specific.arch == NULL) {
+ if (part->part_type != RSPAMD_MIME_PART_ARCHIVE || part->specific.arch == NULL) {
lua_pushnil (L);
}
else {
@@ -1845,7 +1845,7 @@ lua_mimepart_get_children (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- if (!IS_CT_MULTIPART (part->ct) || part->specific.mp->children == NULL) {
+ if (!IS_PART_MULTIPART (part) || part->specific.mp->children == NULL) {
lua_pushnil (L);
}
else {
@@ -1897,7 +1897,7 @@ lua_mimepart_get_text (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- if (!(part->flags & RSPAMD_MIME_PART_TEXT) || part->specific.txt == NULL) {
+ if (part->part_type != RSPAMD_MIME_PART_TEXT || part->specific.txt == NULL) {
lua_pushnil (L);
}
else {
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 0be4699fe..cb725aadf 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -4131,7 +4131,7 @@ lua_task_get_images (lua_State *L)
lua_createtable (L, MESSAGE_FIELD (task, parts)->len, 0);
PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
- if (part->flags & RSPAMD_MIME_PART_IMAGE) {
+ if (part->part_type == RSPAMD_MIME_PART_IMAGE) {
pimg = lua_newuserdata (L, sizeof (struct rspamd_image *));
rspamd_lua_setclass (L, "rspamd{image}", -1);
*pimg = part->specific.img;
@@ -4168,7 +4168,7 @@ lua_task_get_archives (lua_State *L)
lua_createtable (L, MESSAGE_FIELD (task, parts)->len, 0);
PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
- if (part->flags & RSPAMD_MIME_PART_ARCHIVE) {
+ if (part->part_type == RSPAMD_MIME_PART_ARCHIVE) {
parch = lua_newuserdata (L, sizeof (struct rspamd_archive *));
rspamd_lua_setclass (L, "rspamd{archive}", -1);
*parch = part->specific.arch;
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index 57e9bf8cb..5f2c5a0c4 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -2717,7 +2717,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
io = NULL;
if (check_part) {
- if (mime_part->flags & RSPAMD_MIME_PART_TEXT &&
+ if (mime_part->part_type == RSPAMD_MIME_PART_TEXT &&
!(flags & FUZZY_CHECK_FLAG_NOTEXT)) {
part = mime_part->specific.txt;
@@ -2730,7 +2730,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
part,
mime_part);
}
- else if (mime_part->flags & RSPAMD_MIME_PART_IMAGE &&
+ else if (mime_part->part_type == RSPAMD_MIME_PART_IMAGE &&
!(flags & FUZZY_CHECK_FLAG_NOIMAGES)) {
image = mime_part->specific.img;
More information about the Commits
mailing list