commit 1912eac: [Feature] Core: Add libmagic detection for all parts
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Dec 27 18:28:06 UTC 2018
Author: Vsevolod Stakhov
Date: 2018-12-11 12:01:52 +0000
URL: https://github.com/rspamd/rspamd/commit/1912eac2d678b2993b4ef1fa41e36ca7a38e8239
[Feature] Core: Add libmagic detection for all parts
---
src/libmime/archives.c | 4 +--
src/libmime/message.c | 51 ++++++++++++++++++++++-------
src/libmime/message.h | 1 +
src/lua/lua_mimepart.c | 87 ++++++++++++++++++++++++++++++++++++++++----------
4 files changed, 113 insertions(+), 30 deletions(-)
diff --git a/src/libmime/archives.c b/src/libmime/archives.c
index 9cfce6968..1f9a5c634 100644
--- a/src/libmime/archives.c
+++ b/src/libmime/archives.c
@@ -1509,8 +1509,8 @@ rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str,
}
if (magic_start != NULL) {
- if (part->parsed_data.len > magic_len && memcmp (part->parsed_data.begin,
- magic_start, magic_len) == 0) {
+ if (part->parsed_data.len > magic_len &&
+ memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) {
return TRUE;
}
}
diff --git a/src/libmime/message.c b/src/libmime/message.c
index a5faaf017..bbae5e426 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -703,7 +703,7 @@ rspamd_message_process_html_text_part (struct rspamd_task *task,
return TRUE;
}
-static void
+static gboolean
rspamd_message_process_text_part_maybe (struct rspamd_task *task,
struct rspamd_mime_part *mime_part)
{
@@ -812,11 +812,11 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT &&
(task->cfg && !task->cfg->check_text_attachements)) {
debug_task ("skip attachments for checking as text parts");
- return;
+ return TRUE;
}
else if (!(found_txt || found_html)) {
/* Not a text part */
- return;
+ return FALSE;
}
text_part = rspamd_mempool_alloc0 (task->task_pool,
@@ -830,12 +830,12 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
if (found_html) {
if (!rspamd_message_process_html_text_part (task, text_part)) {
- return;
+ return FALSE;
}
}
else {
if (!rspamd_message_process_plain_text_part (task, text_part)) {
- return;
+ return FALSE;
}
}
@@ -866,7 +866,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
rspamd_task_insert_result (task, GTUBE_SYMBOL, 0, NULL);
- return;
+ return TRUE;
}
/* Post process part */
@@ -885,6 +885,8 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
}
rspamd_mime_part_create_words (task, text_part);
+
+ return TRUE;
}
/* Creates message from various data using libmagic to detect type */
@@ -900,15 +902,18 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
g_assert (start != NULL);
+ part = rspamd_mempool_alloc0 (task->task_pool, sizeof (*part));
+
tok = rspamd_task_get_request_header (task, "Content-Type");
if (tok) {
/* We have Content-Type defined */
ct = rspamd_content_type_parse (tok->begin, tok->len,
task->task_pool);
+ part->ct = ct;
}
- else if (task->cfg && task->cfg->libs_ctx) {
- /* Try to predict it by content (slow) */
+
+ if (task->cfg && task->cfg->libs_ctx) {
mb = magic_buffer (task->cfg->libs_ctx->libmagic,
start,
len);
@@ -918,12 +923,16 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
srch.len = strlen (mb);
ct = rspamd_content_type_parse (srch.begin, srch.len,
task->task_pool);
+ msg_warn_task ("construct fake mime of type: %s", mb);
+
+ if (!part->ct) {
+ part->ct = ct;
+ }
+
+ part->detected_ct = ct;
}
}
- msg_warn_task ("construct fake mime of type: %s", mb);
- part = rspamd_mempool_alloc0 (task->task_pool, sizeof (*part));
- part->ct = ct;
part->raw_data.begin = start;
part->raw_data.len = len;
part->parsed_data.begin = start;
@@ -1189,7 +1198,25 @@ rspamd_message_process (struct rspamd_task *task)
struct rspamd_mime_part *part;
part = g_ptr_array_index (task->parts, i);
- rspamd_message_process_text_part_maybe (task, part);
+
+
+ if (!rspamd_message_process_text_part_maybe (task, part) &&
+ part->parsed_data.len > 0) {
+ const gchar *mb = magic_buffer (task->cfg->libs_ctx->libmagic,
+ part->parsed_data.begin,
+ part->parsed_data.len);
+
+ if (mb) {
+ rspamd_ftok_t srch;
+
+ srch.begin = mb;
+ srch.len = strlen (mb);
+ part->detected_ct = rspamd_content_type_parse (srch.begin,
+ srch.len,
+ task->task_pool);
+ }
+
+ }
}
rspamd_images_process (task);
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 29f777c3b..25c88cc3a 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -47,6 +47,7 @@ struct rspamd_mime_multipart {
struct rspamd_mime_part {
struct rspamd_content_type *ct;
+ struct rspamd_content_type *detected_ct;
struct rspamd_content_disposition *cd;
rspamd_ftok_t raw_data;
rspamd_ftok_t parsed_data;
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index 3617a145b..3019cf577 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -333,6 +333,20 @@ LUA_FUNCTION_DEF (mimepart, get_type);
*/
LUA_FUNCTION_DEF (mimepart, get_type_full);
+/***
+ * @method mime_part:get_detected_type()
+ * Extract content-type string of the mime part. Use libmagic detection
+ * @return {string,string} content type in form 'type','subtype'
+ */
+LUA_FUNCTION_DEF (mimepart, get_detected_type);
+
+/***
+ * @method mime_part:get_detected_type_full()
+ * Extract content-type string of the mime part with all attributes. Use libmagic detection
+ * @return {string,string,table} content type in form 'type','subtype', {attrs}
+ */
+LUA_FUNCTION_DEF (mimepart, get_detected_type_full);
+
/***
* @method mime_part:get_cte()
* Extract content-transfer-encoding for a part
@@ -457,6 +471,8 @@ static const struct luaL_reg mimepartlib_m[] = {
LUA_INTERFACE_DEF (mimepart, get_length),
LUA_INTERFACE_DEF (mimepart, get_type),
LUA_INTERFACE_DEF (mimepart, get_type_full),
+ LUA_INTERFACE_DEF (mimepart, get_detected_type),
+ LUA_INTERFACE_DEF (mimepart, get_detected_type_full),
LUA_INTERFACE_DEF (mimepart, get_cte),
LUA_INTERFACE_DEF (mimepart, get_filename),
LUA_INTERFACE_DEF (mimepart, get_header),
@@ -1189,48 +1205,49 @@ lua_mimepart_get_length (lua_State * L)
}
static gint
-lua_mimepart_get_type_common (lua_State * L, gboolean full)
+lua_mimepart_get_type_common (lua_State * L, struct rspamd_content_type *ct,
+ gboolean full)
{
- struct rspamd_mime_part *part = lua_check_mimepart (L);
+
GHashTableIter it;
gpointer k, v;
struct rspamd_content_type_param *param;
- if (part == NULL) {
+ if (ct == NULL) {
lua_pushnil (L);
lua_pushnil (L);
return 2;
}
- lua_pushlstring (L, part->ct->type.begin, part->ct->type.len);
- lua_pushlstring (L, part->ct->subtype.begin, part->ct->subtype.len);
+ lua_pushlstring (L, ct->type.begin, ct->type.len);
+ lua_pushlstring (L, ct->subtype.begin, ct->subtype.len);
if (!full) {
return 2;
}
- lua_createtable (L, 0, 2 + (part->ct->attrs ?
- g_hash_table_size (part->ct->attrs) : 0));
+ lua_createtable (L, 0, 2 + (ct->attrs ?
+ g_hash_table_size (ct->attrs) : 0));
- if (part->ct->charset.len > 0) {
+ if (ct->charset.len > 0) {
lua_pushstring (L, "charset");
- lua_pushlstring (L, part->ct->charset.begin, part->ct->charset.len);
+ lua_pushlstring (L, ct->charset.begin, ct->charset.len);
lua_settable (L, -3);
}
- if (part->ct->boundary.len > 0) {
+ if (ct->boundary.len > 0) {
lua_pushstring (L, "charset");
- lua_pushlstring (L, part->ct->boundary.begin, part->ct->boundary.len);
+ lua_pushlstring (L, ct->boundary.begin, ct->boundary.len);
lua_settable (L, -3);
}
- if (part->ct->attrs) {
- g_hash_table_iter_init (&it, part->ct->attrs);
+ if (ct->attrs) {
+ g_hash_table_iter_init (&it, ct->attrs);
while (g_hash_table_iter_next (&it, &k, &v)) {
param = v;
- if (param->name.len > 0 && param->name.len > 0) {
+ if (param->name.len > 0 && param->value.len > 0) {
/* TODO: think about multiple values here */
lua_pushlstring (L, param->name.begin, param->name.len);
lua_pushlstring (L, param->value.begin, param->value.len);
@@ -1246,14 +1263,52 @@ static gint
lua_mimepart_get_type (lua_State * L)
{
LUA_TRACE_POINT;
- return lua_mimepart_get_type_common (L, FALSE);
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return lua_mimepart_get_type_common (L, part->ct, FALSE);
}
static gint
lua_mimepart_get_type_full (lua_State * L)
{
LUA_TRACE_POINT;
- return lua_mimepart_get_type_common (L, TRUE);
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return lua_mimepart_get_type_common (L, part->ct, TRUE);
+}
+
+static gint
+lua_mimepart_get_detected_type (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return lua_mimepart_get_type_common (L, part->detected_ct, FALSE);
+}
+
+static gint
+lua_mimepart_get_detected_type_full (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return lua_mimepart_get_type_common (L, part->detected_ct, TRUE);
}
static gint
More information about the Commits
mailing list