commit 11fafb3: [Project] Add heuristical from parser to received parser
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Feb 7 15:14:10 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-02-06 14:47:20 +0000
URL: https://github.com/rspamd/rspamd/commit/11fafb3cde34c6ebbc0d4b1d694e2185aa27ae27
[Project] Add heuristical from parser to received parser
---
src/libmime/email_addr.c | 26 -----
src/libmime/email_addr.h | 10 --
src/libmime/mime_headers.c | 232 +++++++++++++++++++++++++++++++++++++++++++--
src/libmime/mime_headers.h | 13 ++-
src/libserver/task.h | 2 +-
5 files changed, 229 insertions(+), 54 deletions(-)
diff --git a/src/libmime/email_addr.c b/src/libmime/email_addr.c
index b8d4b04f9..38de7b4f7 100644
--- a/src/libmime/email_addr.c
+++ b/src/libmime/email_addr.c
@@ -496,30 +496,4 @@ rspamd_email_address_list_destroy (gpointer ptr)
}
g_ptr_array_free (ar, TRUE);
-}
-
-void rspamd_smtp_maybe_process_smtp_comment (struct rspamd_task *task,
- const char *data, size_t len,
- struct received_header *rh)
-{
- if (!rh->by_hostname) {
- /* Heuristic to detect IP addresses like in Exim received:
- * [xxx]:port or [xxx]
- */
-
- if (*data == '[' && len > 2) {
- const gchar *p = data + 1;
- gsize iplen = rspamd_memcspn (p, "]", len - 1);
-
- if (iplen > 0) {
- guchar tbuf[sizeof(struct in6_addr) + sizeof(guint32)];
-
- if (rspamd_parse_inet_address_ip4 (p, iplen, tbuf) ||
- rspamd_parse_inet_address_ip6 (p, iplen, tbuf)) {
- rh->comment_ip = rspamd_mempool_alloc (task->task_pool, iplen + 1);
- rspamd_strlcpy (rh->comment_ip, p, iplen + 1);
- }
- }
- }
- }
}
\ No newline at end of file
diff --git a/src/libmime/email_addr.h b/src/libmime/email_addr.h
index 129d2ba44..a08d8ab3c 100644
--- a/src/libmime/email_addr.h
+++ b/src/libmime/email_addr.h
@@ -53,16 +53,6 @@ struct rspamd_email_address {
struct received_header;
struct rspamd_task;
-/**
- * Try to parse SMTP comment to process stupid Exim received headers
- * @param task
- * @param data
- * @param len
- * @param rh
- */
-void rspamd_smtp_maybe_process_smtp_comment (struct rspamd_task *task,
- const char *data, size_t len,
- struct received_header *rh);
/**
* Create email address from a single rfc822 address (e.g. from mail from:)
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index 19ad3262e..cb87bc46e 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -19,6 +19,7 @@
#include "mime_encoding.h"
#include "contrib/uthash/utlist.h"
#include "libserver/mempool_vars_internal.h"
+#include "libserver/url.h"
#include <unicode/utf8.h>
static void
@@ -37,19 +38,22 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
recv = rspamd_mempool_alloc0 (task->task_pool,
sizeof (struct received_header));
recv->hdr = rh;
- rspamd_smtp_received_parse (task, rh->decoded,
- strlen (rh->decoded), recv);
- /* Set flags */
- if (recv->type == RSPAMD_RECEIVED_ESMTPA ||
+
+ if (rspamd_smtp_received_parse (task, rh->decoded,
+ strlen (rh->decoded), recv) != -1) {
+ /* Set flags */
+ if (recv->type == RSPAMD_RECEIVED_ESMTPA ||
recv->type == RSPAMD_RECEIVED_ESMTPSA) {
- recv->flags |= RSPAMD_RECEIVED_FLAG_AUTHENTICATED;
- }
- if (recv->type == RSPAMD_RECEIVED_ESMTPS ||
+ recv->flags |= RSPAMD_RECEIVED_FLAG_AUTHENTICATED;
+ }
+ if (recv->type == RSPAMD_RECEIVED_ESMTPS ||
recv->type == RSPAMD_RECEIVED_ESMTPSA) {
- recv->flags |= RSPAMD_RECEIVED_FLAG_SSL;
+ recv->flags |= RSPAMD_RECEIVED_FLAG_SSL;
+ }
+
+ g_ptr_array_add (task->received, recv);
}
- g_ptr_array_add (task->received, recv);
rh->type = RSPAMD_HEADER_RECEIVED;
break;
case 0x76F31A09F4352521ULL: /* to */
@@ -931,6 +935,8 @@ rspamd_smtp_received_process_part (struct rspamd_task *task,
memcpy (comment->data, c, p - c);
rspamd_str_lc (comment->data, p - c);
comment->dlen = p - c;
+ comment->data = (gchar *)rspamd_string_len_strip (
+ comment->data, &comment->dlen, " \t");
if (!npart->head_comment) {
comment->prev = NULL;
@@ -964,6 +970,8 @@ rspamd_smtp_received_process_part (struct rspamd_task *task,
memcpy (npart->data, c, p - c);
rspamd_str_lc (npart->data, p - c);
npart->dlen = p - c;
+ npart->data = (gchar *)rspamd_string_len_strip (
+ npart->data, &npart->dlen, " \t");
}
}
@@ -997,6 +1005,8 @@ rspamd_smtp_received_process_part (struct rspamd_task *task,
memcpy (npart->data, c, p - c);
rspamd_str_lc (npart->data, p - c);
npart->dlen = p - c;
+ npart->data = (gchar *)rspamd_string_len_strip (
+ npart->data, &npart->dlen, " \t");
}
}
@@ -1029,6 +1039,8 @@ rspamd_smtp_received_process_part (struct rspamd_task *task,
memcpy (npart->data, c, p - c);
rspamd_str_lc (npart->data, p - c);
npart->dlen = p - c;
+ npart->data = (gchar *)rspamd_string_len_strip (npart->data,
+ &npart->dlen, " \t");
}
return npart;
@@ -1161,13 +1173,205 @@ rspamd_smtp_received_spill (struct rspamd_task *task,
return head;
}
+static gboolean
+rspamd_smtp_received_process_rdns (struct rspamd_task *task,
+ const gchar *begin,
+ gsize len,
+ struct received_header *rh,
+ gboolean is_real)
+{
+ const gchar *p, *end;
+ gsize hlen = 0;
+
+ p = begin;
+ end = begin + len;
+
+ while (p < end) {
+ if (rspamd_url_is_domain (*p)) {
+ hlen ++;
+ }
+
+ p ++;
+ }
+
+ if (hlen > 0) {
+ if (p == end || g_ascii_isspace (*p) || *p == '[' || *p == '(') {
+ /* We have some hostname, accept it */
+ gchar *dest;
+
+ dest = rspamd_mempool_alloc (task->task_pool,
+ hlen + 1);
+ rspamd_strlcpy (dest, begin, hlen + 1);
+
+ if (is_real) {
+ rh->real_hostname = dest;
+ }
+ else {
+ rh->from_hostname = dest;
+ }
+
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_smtp_received_process_from_comment (struct rspamd_task *task,
+ struct received_header *rh,
+ struct rspamd_received_comment *comment)
+{
+ rspamd_inet_addr_t *addr;
+ gboolean ret = FALSE;
+
+ if (comment->data[0] == '[') {
+ /* Likely Exim version */
+
+ const gchar *brace_pos = memchr (comment->data, ']', comment->dlen);
+
+ if (brace_pos) {
+ addr = rspamd_parse_smtp_ip (comment->data,
+ brace_pos - comment->data,
+ task->task_pool);
+
+ if (addr) {
+ rh->addr = addr;
+ rh->real_ip = rspamd_inet_address_to_string (addr);
+ rh->from_ip = rh->real_ip;
+ }
+ }
+ }
+ else if (g_ascii_isxdigit (comment->data[0])) {
+ /* Try to parse IP address */
+ addr = rspamd_parse_inet_address_pool (comment->data,
+ comment->dlen, task->task_pool);
+ if (addr) {
+ rh->addr = addr;
+ rh->real_ip = rspamd_inet_address_to_string (addr);
+ rh->from_ip = rh->real_ip;
+ }
+ }
+ else {
+ /* Try canonical Postfix version: rdns [ip] */
+ const gchar *obrace_pos = memchr (comment->data, '[', comment->dlen),
+ *ebrace_pos, *dend;
+
+ if (obrace_pos) {
+ dend = comment->data + comment->dlen;
+ ebrace_pos = memchr (obrace_pos, ']', dend - obrace_pos);
+
+ if (ebrace_pos) {
+ addr = rspamd_parse_smtp_ip (obrace_pos,
+ ebrace_pos - obrace_pos + 1, task->task_pool);
+
+ if (addr) {
+ rh->addr = addr;
+ rh->real_ip = rspamd_inet_address_to_string (addr);
+ rh->from_ip = rh->real_ip;
+
+ /* Process with rDNS */
+ if (rspamd_smtp_received_process_rdns (task,
+ comment->data,
+ obrace_pos - comment->data,
+ rh,
+ TRUE)) {
+ ret = TRUE;
+ }
+ }
+ }
+ }
+ else {
+ /* Hostname or some crap, sigh... */
+ if (rspamd_smtp_received_process_rdns (task,
+ comment->data,
+ comment->dlen,
+ rh,
+ TRUE)) {
+ ret = TRUE;
+ }
+ }
+ }
+
+ return ret;
+}
+
+static void
+rspamd_smtp_received_process_from (struct rspamd_task *task,
+ struct rspamd_received_part *rpart,
+ struct received_header *rh)
+{
+ if (rpart->dlen > 0) {
+ /* We have seen multiple cases:
+ * - [ip] (hostname/unknown [real_ip])
+ * - helo (hostname/unknown [real_ip])
+ * - [ip]
+ * - hostname
+ * - hostname ([ip]:port helo=xxx)
+ * Maybe more...
+ */
+ gboolean seen_ip_in_data = FALSE, seen_rdns_in_comment = FALSE;
+
+ if (rpart->head_comment && rpart->head_comment->dlen > 0) {
+ /* We can have info within comment as part of RFC */
+ seen_rdns_in_comment = rspamd_smtp_received_process_from_comment (
+ task, rh, rpart->head_comment);
+ }
+ else if (rpart->data[0] == '[') {
+ /* No comment, just something that looks like SMTP IP */
+ const gchar *brace_pos = memchr (rpart->data, ']', rpart->dlen);
+ rspamd_inet_addr_t *addr;
+
+ if (brace_pos) {
+ addr = rspamd_parse_smtp_ip (rpart->data, brace_pos -
+ rpart->data, task->task_pool);
+
+ if (addr) {
+ seen_ip_in_data = TRUE;
+ rh->addr = addr;
+ rh->real_ip = rspamd_inet_address_to_string (addr);
+ rh->from_ip = rh->real_ip;
+ }
+ }
+ }
+ else if (g_ascii_isxdigit (rpart->data[0])) {
+ /* Try to parse IP address */
+ rspamd_inet_addr_t *addr;
+ addr = rspamd_parse_inet_address_pool (rpart->data,
+ rpart->dlen, task->task_pool);
+ if (addr) {
+ seen_ip_in_data = TRUE;
+ rh->addr = addr;
+ rh->real_ip = rspamd_inet_address_to_string (addr);
+ rh->from_ip = rh->real_ip;
+ }
+ }
+
+ if (!seen_ip_in_data && !seen_rdns_in_comment) {
+ /* Get rDNS */
+ rspamd_smtp_received_process_rdns (task,
+ rpart->data,
+ rpart->dlen,
+ rh,
+ FALSE);
+ }
+ }
+ else {
+ /* rpart->dlen = 0 */
+
+ if (rpart->head_comment && rpart->head_comment->dlen > 0) {
+ rspamd_smtp_received_process_from_comment (task,
+ rh, rpart->head_comment);
+ }
+ }
+}
+
int
rspamd_smtp_received_parse (struct rspamd_task *task,
const char *data,
size_t len,
struct received_header *rh)
{
- const gchar *p, *c, *end;
goffset date_pos = 0;
struct rspamd_received_part *head, *cur;
@@ -1177,5 +1381,13 @@ rspamd_smtp_received_parse (struct rspamd_task *task,
return -1;
}
+ DL_FOREACH (head, cur) {
+ switch (cur->type) {
+ case RSPAMD_RECEIVED_PART_FROM:
+ rspamd_smtp_received_process_from (task, cur, rh);
+ break;
+ }
+ }
+
return 0;
}
\ No newline at end of file
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
index ceed5ab06..cd21b79d5 100644
--- a/src/libmime/mime_headers.h
+++ b/src/libmime/mime_headers.h
@@ -72,13 +72,12 @@ enum rspamd_received_type {
#define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2)
struct received_header {
- gchar *from_hostname;
- gchar *from_ip;
- gchar *real_hostname;
- gchar *real_ip;
- gchar *by_hostname;
- gchar *for_mbox;
- gchar *comment_ip;
+ const gchar *from_hostname;
+ const gchar *from_ip;
+ const gchar *real_hostname;
+ const gchar *real_ip;
+ const gchar *by_hostname;
+ const gchar *for_mbox;
rspamd_inet_addr_t *addr;
struct rspamd_mime_header *hdr;
time_t timestamp;
diff --git a/src/libserver/task.h b/src/libserver/task.h
index 7cdc09538..93e0ae0e8 100644
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -150,7 +150,7 @@ struct rspamd_task {
gchar *deliver_to; /**< address to deliver */
gchar *user; /**< user to deliver */
gchar *subject; /**< subject (for non-mime) */
- gchar *hostname; /**< hostname reported by MTA */
+ const gchar *hostname; /**< hostname reported by MTA */
GHashTable *request_headers; /**< HTTP headers in a request */
GHashTable *reply_headers; /**< Custom reply headers */
struct {
More information about the Commits
mailing list