commit 9aa104f: [Project] Start mime structures refactoring
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Jul 12 16:42:05 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-07-10 15:01:41 +0100
URL: https://github.com/rspamd/rspamd/commit/9aa104f6a709198527538c6553a0a96db1ecc5e9
[Project] Start mime structures refactoring
---
CMakeLists.txt | 1 +
src/libmime/message.c | 27 +++++++++++++++++
src/libmime/message.h | 38 ++++++++++++++++++++++++
src/libmime/mime_headers.c | 22 +++++++-------
src/libmime/mime_headers.h | 72 ++++++++++++++++++++++++----------------------
src/libserver/dkim.c | 4 +--
src/libserver/protocol.c | 2 ++
src/libserver/task.c | 8 +-----
src/libserver/task.h | 23 +++------------
9 files changed, 123 insertions(+), 74 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 06cc9fd30..88aea0b62 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -771,6 +771,7 @@ IF(NOT CMAKE_C_OPT_FLAGS)
ENDIF()
ELSE(ENABLE_OPTIMIZATION MATCHES "ON")
IF(ENABLE_FULL_DEBUG MATCHES "ON")
+ ADD_DEFINITIONS(-DFULL_DEBUG)
SET(CMAKE_C_OPT_FLAGS "-g -O0")
ELSE(ENABLE_FULL_DEBUG MATCHES "ON")
SET(CMAKE_C_OPT_FLAGS "-g -O2")
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 482287769..1d9da26f2 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1091,6 +1091,22 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
task->queue_id = mid;
}
+static void
+rspamd_message_dtor (struct rspamd_message *msg)
+{
+
+}
+
+struct rspamd_message*
+rspamd_message_new (struct rspamd_task *task)
+{
+ struct rspamd_message *msg;
+
+ msg = rspamd_mempool_alloc0 (sizeof (*msg));
+
+
+}
+
gboolean
rspamd_message_parse (struct rspamd_task *task)
{
@@ -1593,3 +1609,14 @@ rspamd_message_get_mime_header_array (struct rspamd_task *task,
return ret;
}
+
+struct rspamd_message *
+rspamd_message_ref (struct rspamd_message *msg)
+{
+ REF_RETAIN (msg);
+}
+
+void rspamd_message_unref (struct rspamd_message *msg)
+{
+ REF_RELEASE (msg);s
+}
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 17c4ec5b9..7d58fa88f 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -12,6 +12,8 @@
#include "cryptobox.h"
#include "mime_headers.h"
#include "content_type.h"
+#include "libutil/ref.h"
+#include "libutil/str_util.h"
#include <unicode/uchar.h>
#include <unicode/utext.h>
@@ -129,6 +131,36 @@ struct rspamd_mime_text_part {
guint unicode_scripts;
};
+struct rspamd_message {
+ const gchar *message_id;
+ gchar *subject;
+
+ GPtrArray *parts; /**< list of parsed parts */
+ GPtrArray *text_parts; /**< list of text parts */
+ struct {
+ const gchar *begin;
+ gsize len;
+ const gchar *body_start;
+ } raw_headers_content; /**< list of raw headers */
+ GPtrArray *received; /**< list of received headers */
+ GHashTable *urls; /**< list of parsed urls */
+ GHashTable *emails; /**< list of parsed emails */
+ GHashTable *raw_headers; /**< list of raw headers */
+ GQueue *headers_order; /**< order of raw headers */
+ GPtrArray *rcpt_mime;
+ GPtrArray *from_mime;
+ enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */
+ ref_entry_t ref;
+};
+
+#ifndef FULL_DEBUG
+#define MESSAGE_FIELD(task, field) ((task)->message->(field))
+#else
+#define MESSAGE_FIELD(task, field) do { \
+ if (!task->message) {msg_err_task("no message when getting field %s", #field); g_assert(0);} \
+ } while(0), ((task)->message->(field))
+#endif
+
/**
* Parse and pre-process mime message
* @param task worker_task object
@@ -191,6 +223,12 @@ enum rspamd_cte rspamd_cte_from_string (const gchar *str);
*/
const gchar *rspamd_cte_to_string (enum rspamd_cte ct);
+struct rspamd_message* rspamd_message_new (struct rspamd_task *task);
+
+struct rspamd_message *rspamd_message_ref (struct rspamd_message *msg);
+
+void rspamd_message_unref (struct rspamd_message *msg);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index cf6d0f763..952a163b9 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -44,31 +44,31 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
g_ptr_array_add (task->received, recv);
}
- rh->type = RSPAMD_HEADER_RECEIVED;
+ rh->flags = RSPAMD_HEADER_RECEIVED;
break;
case 0x76F31A09F4352521ULL: /* to */
task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool,
rh->decoded, strlen (rh->decoded), task->rcpt_mime);
- rh->type = RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
+ rh->flags = RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
break;
case 0x7EB117C1480B76ULL: /* cc */
task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool,
rh->decoded, strlen (rh->decoded), task->rcpt_mime);
- rh->type = RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
+ rh->flags = RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
break;
case 0xE4923E11C4989C8DULL: /* bcc */
task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool,
rh->decoded, strlen (rh->decoded), task->rcpt_mime);
- rh->type = RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
+ rh->flags = RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
break;
case 0x41E1985EDC1CBDE4ULL: /* from */
task->from_mime = rspamd_email_address_from_mime (task->task_pool,
rh->decoded, strlen (rh->decoded), task->from_mime);
- rh->type = RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE;
+ rh->flags = RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE;
break;
case 0x43A558FC7C240226ULL: /* message-id */ {
- rh->type = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE;
+ rh->flags = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE;
p = rh->decoded;
end = p + strlen (p);
@@ -107,20 +107,20 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
if (task->subject == NULL) {
task->subject = rh->decoded;
}
- rh->type = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE;
+ rh->flags = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE;
break;
case 0xEE4AA2EAAC61D6F4ULL: /* return-path */
if (task->from_envelope == NULL) {
task->from_envelope = rspamd_email_address_from_smtp (rh->decoded,
strlen (rh->decoded));
}
- rh->type = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE;
+ rh->flags = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE;
break;
case 0xB9EEFAD2E93C2161ULL: /* delivered-to */
if (task->deliver_to == NULL) {
task->deliver_to = rh->decoded;
}
- rh->type = RSPAMD_HEADER_DELIVERED_TO;
+ rh->flags = RSPAMD_HEADER_DELIVERED_TO;
break;
case 0x2EC3BFF3C393FC10ULL: /* date */
case 0xAC0DDB1A1D214CAULL: /* sender */
@@ -128,7 +128,7 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
case 0x81CD9E9131AB6A9AULL: /* content-type */
case 0xC39BD9A75AA25B60ULL: /* content-transfer-encoding */
case 0xB3F6704CB3AD6589ULL: /* references */
- rh->type = RSPAMD_HEADER_UNIQUE;
+ rh->flags = RSPAMD_HEADER_UNIQUE;
break;
}
}
@@ -472,7 +472,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
while (cur) {
nh = cur->data;
- if (nh->name && nh->type != RSPAMD_HEADER_RECEIVED) {
+ if (nh->name && nh->flags != RSPAMD_HEADER_RECEIVED) {
rspamd_cryptobox_hash_update (&hs, nh->name, strlen (nh->name));
}
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
index 5cb300978..60fd7b697 100644
--- a/src/libmime/mime_headers.h
+++ b/src/libmime/mime_headers.h
@@ -31,20 +31,22 @@ enum rspamd_rfc2047_encoding {
RSPAMD_RFC2047_BASE64,
};
-enum rspamd_mime_header_special_type {
- RSPAMD_HEADER_GENERIC = 0,
- RSPAMD_HEADER_RECEIVED = 1 << 0,
- RSPAMD_HEADER_TO = 1 << 2,
- RSPAMD_HEADER_CC = 1 << 3,
- RSPAMD_HEADER_BCC = 1 << 4,
- RSPAMD_HEADER_FROM = 1 << 5,
- RSPAMD_HEADER_MESSAGE_ID = 1 << 6,
- RSPAMD_HEADER_SUBJECT = 1 << 7,
- RSPAMD_HEADER_RETURN_PATH = 1 << 8,
- RSPAMD_HEADER_DELIVERED_TO = 1 << 9,
- RSPAMD_HEADER_SENDER = 1 << 10,
- RSPAMD_HEADER_RCPT = 1 << 11,
- RSPAMD_HEADER_UNIQUE = 1 << 12
+enum rspamd_mime_header_flags {
+ RSPAMD_HEADER_GENERIC = 0u,
+ RSPAMD_HEADER_RECEIVED = 1u << 0u,
+ RSPAMD_HEADER_TO = 1u << 2u,
+ RSPAMD_HEADER_CC = 1u << 3u,
+ RSPAMD_HEADER_BCC = 1u << 4u,
+ RSPAMD_HEADER_FROM = 1u << 5u,
+ RSPAMD_HEADER_MESSAGE_ID = 1u << 6u,
+ RSPAMD_HEADER_SUBJECT = 1u << 7u,
+ RSPAMD_HEADER_RETURN_PATH = 1u << 8u,
+ RSPAMD_HEADER_DELIVERED_TO = 1u << 9u,
+ RSPAMD_HEADER_SENDER = 1u << 10u,
+ RSPAMD_HEADER_RCPT = 1u << 11u,
+ RSPAMD_HEADER_UNIQUE = 1u << 12u,
+ RSPAMD_HEADER_EMPTY_SEPARATOR = 1u << 13u,
+ RSPAMD_HEADER_TAB_SEPARATED = 1u << 14u,
};
struct rspamd_mime_header {
@@ -52,32 +54,31 @@ struct rspamd_mime_header {
gchar *value;
const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */
gsize raw_len;
- gboolean tab_separated;
- gboolean empty_separator;
guint order;
- enum rspamd_mime_header_special_type type;
+ int flags; /* see enum rspamd_mime_header_flags */
gchar *separator;
gchar *decoded;
+ struct rspamd_mime_header *prev, *next; /* Headers with the same name */
+ struct rspamd_mime_header *ord_prev, *ord_next; /* Overall order of headers */
};
enum rspamd_received_type {
RSPAMD_RECEIVED_SMTP = 0,
- RSPAMD_RECEIVED_ESMTP,
- RSPAMD_RECEIVED_ESMTPA,
- RSPAMD_RECEIVED_ESMTPS,
- RSPAMD_RECEIVED_ESMTPSA,
- RSPAMD_RECEIVED_LMTP,
- RSPAMD_RECEIVED_IMAP,
- RSPAMD_RECEIVED_LOCAL,
- RSPAMD_RECEIVED_HTTP,
- RSPAMD_RECEIVED_MAPI,
- RSPAMD_RECEIVED_UNKNOWN
+ RSPAMD_RECEIVED_ESMTP = 1u << 0u,
+ RSPAMD_RECEIVED_ESMTPA = 1u << 1u,
+ RSPAMD_RECEIVED_ESMTPS = 1u << 2u,
+ RSPAMD_RECEIVED_ESMTPSA = 1u << 3u,
+ RSPAMD_RECEIVED_LMTP = 1u << 4u,
+ RSPAMD_RECEIVED_IMAP = 1u << 5u,
+ RSPAMD_RECEIVED_LOCAL = 1u << 6u,
+ RSPAMD_RECEIVED_HTTP = 1u << 7u,
+ RSPAMD_RECEIVED_MAPI = 1u << 8u,
+ RSPAMD_RECEIVED_UNKNOWN = 1u << 9u,
+ RSPAMD_RECEIVED_FLAG_ARTIFICIAL = (1u << 10u),
+ RSPAMD_RECEIVED_FLAG_SSL = (1u << 11u),
+ RSPAMD_RECEIVED_FLAG_AUTHENTICATED = (1u << 12u),
};
-#define RSPAMD_RECEIVED_FLAG_ARTIFICIAL (1 << 0)
-#define RSPAMD_RECEIVED_FLAG_SSL (1 << 1)
-#define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2)
-
struct received_header {
const gchar *from_hostname;
const gchar *from_ip;
@@ -88,8 +89,8 @@ struct received_header {
rspamd_inet_addr_t *addr;
struct rspamd_mime_header *hdr;
time_t timestamp;
- enum rspamd_received_type type;
- gint flags;
+ gint flags; /* See enum rspamd_received_type */
+ struct received_header *prev, *next;
};
/**
@@ -100,8 +101,9 @@ struct received_header {
* @param len
* @param check_newlines
*/
-void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
- GQueue *order,
+void rspamd_mime_headers_process (struct rspamd_task *task,
+ GHashTable *target,
+ struct rspamd_mime_header **order_ptr,
const gchar *in, gsize len,
gboolean check_newlines);
diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c
index 9386c5cdc..c0ea29f08 100644
--- a/src/libserver/dkim.c
+++ b/src/libserver/dkim.c
@@ -2175,7 +2175,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx,
if (ar) {
/* Check uniqueness of the header */
rh = g_ptr_array_index (ar, 0);
- if ((rh->type & RSPAMD_HEADER_UNIQUE) && ar->len > 1) {
+ if ((rh->flags & RSPAMD_HEADER_UNIQUE) && ar->len > 1) {
guint64 random_cookie = ottery_rand_uint64 ();
msg_warn_dkim ("header %s is intended to be unique by"
@@ -2210,7 +2210,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx,
(gint)rh->raw_len, rh->raw_value);
}
else {
- if (ctx->is_sign && (rh->type & RSPAMD_HEADER_FROM)) {
+ if (ctx->is_sign && (rh->flags & RSPAMD_HEADER_FROM)) {
/* Special handling of the From handling when rewrite is done */
gboolean has_rewrite = FALSE;
guint i;
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index bef7a0452..ddd072882 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -1409,6 +1409,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg,
gint flags = RSPAMD_PROTOCOL_DEFAULT;
struct rspamd_action *action;
+#if 0
/* Write custom headers */
g_hash_table_iter_init (&hiter, task->reply_headers);
while (g_hash_table_iter_next (&hiter, &h, &v)) {
@@ -1416,6 +1417,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg,
rspamd_http_message_add_header (msg, hn->begin, hv->begin);
}
+#endif
flags |= RSPAMD_PROTOCOL_URLS;
diff --git a/src/libserver/task.c b/src/libserver/task.c
index 04be61744..88ee730a3 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -129,15 +129,9 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg,
new_task->request_headers = g_hash_table_new_full (rspamd_ftok_icase_hash,
rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free,
rspamd_request_header_dtor);
- rspamd_mempool_add_destructor (new_task->task_pool,
- (rspamd_mempool_destruct_t) g_hash_table_unref,
- new_task->request_headers);
- new_task->reply_headers = g_hash_table_new_full (rspamd_ftok_icase_hash,
- rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free,
- rspamd_fstring_mapped_ftok_free);
rspamd_mempool_add_destructor (new_task->task_pool,
(rspamd_mempool_destruct_t) g_hash_table_unref,
- new_task->reply_headers);
+ new_task->request_headers);
rspamd_mempool_add_destructor (new_task->task_pool,
(rspamd_mempool_destruct_t) g_hash_table_unref,
new_task->raw_headers);
diff --git a/src/libserver/task.h b/src/libserver/task.h
index ac55dd910..00caf3ab6 100644
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -134,6 +134,7 @@ enum rspamd_task_stage {
struct rspamd_email_address;
struct rspamd_lang_detector;
enum rspamd_newlines_type;
+struct rspamd_message;
/**
* Worker task structure
@@ -147,15 +148,12 @@ struct rspamd_task {
gulong message_len; /**< Message length */
gchar *helo; /**< helo header value */
gchar *queue_id; /**< queue id if specified */
- const gchar *message_id; /**< message id */
rspamd_inet_addr_t *from_addr; /**< from addr for a task */
rspamd_inet_addr_t *client_addr; /**< address of connected socket */
gchar *deliver_to; /**< address to deliver */
gchar *user; /**< user to deliver */
- gchar *subject; /**< subject (for non-mime) */
const gchar *hostname; /**< hostname reported by MTA */
GHashTable *request_headers; /**< HTTP headers in a request */
- GHashTable *reply_headers; /**< Custom reply headers */
struct {
const gchar *begin;
gsize len;
@@ -163,29 +161,14 @@ struct rspamd_task {
} msg; /**< message buffer */
struct rspamd_http_connection *http_conn; /**< HTTP server connection */
struct rspamd_async_session *s; /**< async session object */
- GPtrArray *parts; /**< list of parsed parts */
- GPtrArray *text_parts; /**< list of text parts */
- struct {
- const gchar *begin;
- gsize len;
- const gchar *body_start;
- } raw_headers_content; /**< list of raw headers */
- GPtrArray *received; /**< list of received headers */
- GHashTable *urls; /**< list of parsed urls */
- GHashTable *emails; /**< list of parsed emails */
- GHashTable *raw_headers; /**< list of raw headers */
- GQueue *headers_order; /**< order of raw headers */
struct rspamd_metric_result *result; /**< Metric result */
GHashTable *lua_cache; /**< cache of lua objects */
GPtrArray *tokens; /**< statistics tokens */
GArray *meta_words; /**< rspamd_stat_token_t produced from meta headers
(e.g. Subject) */
- GPtrArray *rcpt_mime;
GPtrArray *rcpt_envelope; /**< array of rspamd_email_address */
- GPtrArray *from_mime;
struct rspamd_email_address *from_envelope;
- enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */
ucl_object_t *messages; /**< list of messages that would be reported */
struct rspamd_re_runtime *re_rt; /**< regexp runtime */
@@ -215,6 +198,7 @@ struct rspamd_task {
const gchar *classifier; /**< Classifier to learn (if needed) */
struct rspamd_lang_detector *lang_det; /**< Languages detector */
+ struct rspamd_message *message;
guchar digest[16];
};
@@ -252,7 +236,8 @@ gboolean rspamd_task_fin (void *arg);
* @return
*/
gboolean rspamd_task_load_message (struct rspamd_task *task,
- struct rspamd_http_message *msg, const gchar *start, gsize len);
+ struct rspamd_http_message *msg,
+ const gchar *start, gsize len);
/**
* Process task
More information about the Commits
mailing list