commit d525194: [Project] Add spilling machine for received headers
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Feb 7 15:14:07 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-02-06 12:36:10 +0000
URL: https://github.com/rspamd/rspamd/commit/d525194397181456bba6edea4680a10403c3415c
[Project] Add spilling machine for received headers
---
src/libmime/message.h | 30 ----
src/libmime/mime_headers.c | 331 +++++++++++++++++++++++++++++++++++++++++++++
src/libmime/mime_headers.h | 31 +++++
src/libmime/smtp_parsers.h | 3 +
4 files changed, 365 insertions(+), 30 deletions(-)
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 19e8b40b5..eb260cd77 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -125,36 +125,6 @@ struct rspamd_mime_text_part {
guint unicode_scripts;
};
-enum rspamd_received_type {
- RSPAMD_RECEIVED_SMTP = 0,
- RSPAMD_RECEIVED_ESMTP,
- RSPAMD_RECEIVED_ESMTPA,
- RSPAMD_RECEIVED_ESMTPS,
- RSPAMD_RECEIVED_ESMTPSA,
- RSPAMD_RECEIVED_LMTP,
- RSPAMD_RECEIVED_IMAP,
- RSPAMD_RECEIVED_UNKNOWN
-};
-
-#define RSPAMD_RECEIVED_FLAG_ARTIFICIAL (1 << 0)
-#define RSPAMD_RECEIVED_FLAG_SSL (1 << 1)
-#define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2)
-
-struct received_header {
- gchar *from_hostname;
- gchar *from_ip;
- gchar *real_hostname;
- gchar *real_ip;
- gchar *by_hostname;
- gchar *for_mbox;
- gchar *comment_ip;
- rspamd_inet_addr_t *addr;
- struct rspamd_mime_header *hdr;
- time_t timestamp;
- enum rspamd_received_type type;
- gint flags;
-};
-
/**
* Parse and pre-process mime message
* @param task worker_task object
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index 2769ae633..19ad3262e 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -17,6 +17,7 @@
#include "mime_headers.h"
#include "smtp_parsers.h"
#include "mime_encoding.h"
+#include "contrib/uthash/utlist.h"
#include "libserver/mempool_vars_internal.h"
#include <unicode/utf8.h>
@@ -848,3 +849,333 @@ rspamd_mime_message_id_generate (const gchar *fqdn)
return g_string_free (out, FALSE);
}
+
+enum rspamd_received_part_type {
+ RSPAMD_RECEIVED_PART_FROM,
+ RSPAMD_RECEIVED_PART_BY,
+ RSPAMD_RECEIVED_PART_FOR,
+ RSPAMD_RECEIVED_PART_WITH,
+ RSPAMD_RECEIVED_PART_UNKNOWN,
+};
+
+struct rspamd_received_comment {
+ gchar *data;
+ gsize dlen;
+ struct rspamd_received_comment *prev;
+};
+
+struct rspamd_received_part {
+ enum rspamd_received_part_type type;
+ gchar *data;
+ gsize dlen;
+ struct rspamd_received_comment *tail_comment;
+ struct rspamd_received_comment *head_comment;
+ struct rspamd_received_part *prev, *next;
+};
+
+static struct rspamd_received_part *
+rspamd_smtp_received_process_part (struct rspamd_task *task,
+ const char *data,
+ size_t len,
+ enum rspamd_received_part_type type,
+ goffset *last)
+{
+ struct rspamd_received_part *npart;
+ const guchar *p, *c, *end;
+ guint obraces = 0, ebraces = 0;
+ enum _parse_state {
+ skip_spaces,
+ in_comment,
+ read_data,
+ all_done
+ } state, next_state;
+
+ npart = rspamd_mempool_alloc0 (task->task_pool, sizeof (*npart));
+ npart->type = type;
+
+ /* In this function, we just process comments and data separately */
+ p = data;
+ end = data + len;
+ c = data;
+ state = skip_spaces;
+ next_state = read_data;
+
+ while (p < end) {
+ switch (state) {
+ case skip_spaces:
+ if (!g_ascii_isspace (*p)) {
+ c = p;
+ state = next_state;
+ }
+ else {
+ p ++;
+ }
+ break;
+ case in_comment:
+ if (*p == '(') {
+ obraces ++;
+ }
+ else if (*p == ')') {
+ ebraces ++;
+
+ if (ebraces >= obraces) {
+ if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
+ if (p > c) {
+ struct rspamd_received_comment *comment;
+
+ comment = rspamd_mempool_alloc (task->task_pool,
+ sizeof (*comment));
+
+ comment->data = rspamd_mempool_alloc (task->task_pool,
+ p - c);
+ memcpy (comment->data, c, p - c);
+ rspamd_str_lc (comment->data, p - c);
+ comment->dlen = p - c;
+
+ if (!npart->head_comment) {
+ comment->prev = NULL;
+ npart->head_comment = comment;
+ npart->tail_comment = comment;
+ }
+ else {
+ comment->prev = npart->tail_comment;
+ npart->tail_comment = comment;
+ }
+ }
+ }
+
+ p ++;
+ c = p;
+ state = skip_spaces;
+ next_state = read_data;
+
+ continue;
+ }
+ }
+
+ p ++;
+ break;
+ case read_data:
+ if (*p == '(') {
+ if (p > c) {
+ if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
+ npart->data = rspamd_mempool_alloc (task->task_pool,
+ p - c);
+ memcpy (npart->data, c, p - c);
+ rspamd_str_lc (npart->data, p - c);
+ npart->dlen = p - c;
+ }
+ }
+
+ state = in_comment;
+ obraces = 1;
+ ebraces = 0;
+ p ++;
+ c = p;
+ }
+ else if (g_ascii_isspace (*p)) {
+ if (p > c) {
+ if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
+ npart->data = rspamd_mempool_alloc (task->task_pool,
+ p - c);
+ memcpy (npart->data, c, p - c);
+ rspamd_str_lc (npart->data, p - c);
+ npart->dlen = p - c;
+ }
+ }
+
+ state = skip_spaces;
+ next_state = read_data;
+ c = p;
+ }
+ else if (*p == ';') {
+ /* It is actually delimiter of date part if not in the comments */
+ if (p > c) {
+ if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
+ npart->data = rspamd_mempool_alloc (task->task_pool,
+ p - c);
+ memcpy (npart->data, c, p - c);
+ rspamd_str_lc (npart->data, p - c);
+ npart->dlen = p - c;
+ }
+ }
+
+ state = all_done;
+ continue;
+ }
+ else if (npart->dlen > 0) {
+ /* We have already received data and find something with no ( */
+ state = all_done;
+ continue;
+ }
+ else {
+ p ++;
+ }
+ break;
+ case all_done:
+ *last = p - (const guchar *)data;
+ return npart;
+ break;
+ }
+ }
+
+ /* Leftover */
+ switch (state) {
+ case read_data:
+ if (p > c) {
+ if (type != RSPAMD_RECEIVED_PART_UNKNOWN) {
+ npart->data = rspamd_mempool_alloc (task->task_pool,
+ p - c);
+ memcpy (npart->data, c, p - c);
+ rspamd_str_lc (npart->data, p - c);
+ npart->dlen = p - c;
+ }
+
+ return npart;
+ }
+ break;
+ case skip_spaces:
+ return npart;
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+static struct rspamd_received_part *
+rspamd_smtp_received_spill (struct rspamd_task *task,
+ const char *data,
+ size_t len,
+ goffset *date_pos)
+{
+ const guchar *p, *end;
+ struct rspamd_received_part *cur_part, *head = NULL;
+ goffset pos = 0;
+
+ p = data;
+ end = data + len;
+
+ while (p < end && g_ascii_isspace (*p)) {
+ p ++;
+ }
+
+ len = end - p;
+
+ /* Ignore all received but those started from from part */
+ if (len <= 4 || (lc_map[p[0]] != 'f' &&
+ lc_map[p[1]] != 'r' &&
+ lc_map[p[2]] != 'o' &&
+ lc_map[p[3]] != 'm')) {
+ return NULL;
+ }
+
+ p += sizeof ("from") - 1;
+
+ /* We can now store from part */
+ cur_part = rspamd_smtp_received_process_part (task, p, end - p,
+ RSPAMD_RECEIVED_PART_FROM, &pos);
+
+ if (!cur_part) {
+ return NULL;
+ }
+
+ p += pos;
+ len = end > p ? end - p : 0;
+ DL_APPEND (head, cur_part);
+
+
+ if (len > 2 && (lc_map[p[0]] == 'b' &&
+ lc_map[p[1]] == 'y')) {
+ p += sizeof ("by") - 1;
+
+ cur_part = rspamd_smtp_received_process_part (task, p, end - p,
+ RSPAMD_RECEIVED_PART_BY, &pos);
+
+ if (!cur_part) {
+ return NULL;
+ }
+
+ p += pos;
+ len = end > p ? end - p : 0;
+ DL_APPEND (head, cur_part);
+ }
+
+ while (p > end) {
+ if (*p == ';') {
+ /* We are at the date separator, stop here */
+ *date_pos = p - (const guchar *)data + 1;
+ break;
+ }
+ else {
+ if (len > sizeof ("with") && (lc_map[p[0]] == 'w' &&
+ lc_map[p[1]] == 'i' &&
+ lc_map[p[2]] == 't' &&
+ lc_map[p[3]] == 'h')) {
+ p += sizeof ("with") - 1;
+
+ cur_part = rspamd_smtp_received_process_part (task, p, end - p,
+ RSPAMD_RECEIVED_PART_WITH, &pos);
+ }
+ else if (len > sizeof ("for") && (lc_map[p[0]] == 'f' &&
+ lc_map[p[1]] == 'o' &&
+ lc_map[p[2]] == 'r')) {
+ p += sizeof ("for") - 1;
+ cur_part = rspamd_smtp_received_process_part (task, p, end - p,
+ RSPAMD_RECEIVED_PART_FOR, &pos);
+ }
+ else {
+ while (p < end) {
+ if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) {
+ p ++;
+ }
+ else {
+ break;
+ }
+ }
+
+ if (p == end) {
+ return NULL;
+ }
+ else if (*p == ';') {
+ *date_pos = p - (const guchar *)data + 1;
+ break;
+ }
+ else {
+ cur_part = rspamd_smtp_received_process_part (task, p, end - p,
+ RSPAMD_RECEIVED_PART_UNKNOWN, &pos);
+ }
+ }
+
+ if (!cur_part) {
+ return NULL;
+ }
+ else {
+ p += pos;
+ len = end > p ? end - p : 0;
+ DL_APPEND (head, cur_part);
+ }
+ }
+ }
+
+ return head;
+}
+
+int
+rspamd_smtp_received_parse (struct rspamd_task *task,
+ const char *data,
+ size_t len,
+ struct received_header *rh)
+{
+ const gchar *p, *c, *end;
+ goffset date_pos = 0;
+ struct rspamd_received_part *head, *cur;
+
+ head = rspamd_smtp_received_spill (task, data, len, &date_pos);
+
+ if (head == NULL) {
+ return -1;
+ }
+
+ return 0;
+}
\ No newline at end of file
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
index 3c0c23a36..ceed5ab06 100644
--- a/src/libmime/mime_headers.h
+++ b/src/libmime/mime_headers.h
@@ -18,6 +18,7 @@
#include "config.h"
#include "libutil/mem_pool.h"
+#include "libutil/addr.h"
struct rspamd_task;
@@ -55,6 +56,36 @@ struct rspamd_mime_header {
gchar *decoded;
};
+enum rspamd_received_type {
+ RSPAMD_RECEIVED_SMTP = 0,
+ RSPAMD_RECEIVED_ESMTP,
+ RSPAMD_RECEIVED_ESMTPA,
+ RSPAMD_RECEIVED_ESMTPS,
+ RSPAMD_RECEIVED_ESMTPSA,
+ RSPAMD_RECEIVED_LMTP,
+ RSPAMD_RECEIVED_IMAP,
+ RSPAMD_RECEIVED_UNKNOWN
+};
+
+#define RSPAMD_RECEIVED_FLAG_ARTIFICIAL (1 << 0)
+#define RSPAMD_RECEIVED_FLAG_SSL (1 << 1)
+#define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2)
+
+struct received_header {
+ gchar *from_hostname;
+ gchar *from_ip;
+ gchar *real_hostname;
+ gchar *real_ip;
+ gchar *by_hostname;
+ gchar *for_mbox;
+ gchar *comment_ip;
+ rspamd_inet_addr_t *addr;
+ struct rspamd_mime_header *hdr;
+ time_t timestamp;
+ enum rspamd_received_type type;
+ gint flags;
+};
+
/**
* Process headers and store them in `target`
* @param task
diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h
index fdd390f22..6904bece0 100644
--- a/src/libmime/smtp_parsers.h
+++ b/src/libmime/smtp_parsers.h
@@ -34,6 +34,9 @@ rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding,
const gchar **charset, gsize *charset_len,
const gchar **encoded, gsize *encoded_len);
+rspamd_inet_addr_t* rspamd_parse_smtp_ip (const char *data, size_t len,
+ rspamd_mempool_t *pool);
+
guint64 rspamd_parse_smtp_date (const char *data, size_t len);
#endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */
More information about the Commits
mailing list