commit 5d37956: [Project] Attach new received parser

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Feb 7 15:14:16 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-02-07 13:57:24 +0000
URL: https://github.com/rspamd/rspamd/commit/5d3795649ea758ab176195c68aeba5a50a972356

[Project] Attach new received parser

---
 src/CMakeLists.txt         | 16 ++++++++--------
 src/libmime/mime_headers.c | 48 +++++++++++++++++++++++++++++++++++-----------
 test/lua/unit/received.lua | 25 ++++++++++++++++++++----
 3 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ff7198270..71ce71119 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -117,7 +117,7 @@ LIST(LENGTH PLUGINSSRC RSPAMD_MODULES_NUM)
 SET(RAGEL_DEPENDS "${CMAKE_SOURCE_DIR}/src/ragel/smtp_address.rl"
 	"${CMAKE_SOURCE_DIR}/src/ragel/smtp_date.rl"
 	"${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl"
-	"${CMAKE_SOURCE_DIR}/src/ragel/smtp_whitespace.rl"
+	"${CMAKE_SOURCE_DIR}/src/ragel/smtp_base.rl"
 	"${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl"
 	"${CMAKE_SOURCE_DIR}/src/ragel/content_disposition.rl")
 RAGEL_TARGET(ragel_smtp_addr
@@ -125,11 +125,6 @@ RAGEL_TARGET(ragel_smtp_addr
 	DEPENDS ${RAGEL_DEPENDS}
 	COMPILE_FLAGS -T1
 	OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/smtp_addr_parser.rl.c)
-RAGEL_TARGET(ragel_smtp_received
-	INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_received_parser.rl
-	DEPENDS ${RAGEL_DEPENDS}
-	COMPILE_FLAGS -T1
-	OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/smtp_received_parser.rl.c)
 RAGEL_TARGET(ragel_content_disposition
 	INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_disposition_parser.rl
 	DEPENDS ${RAGEL_DEPENDS}
@@ -145,6 +140,11 @@ RAGEL_TARGET(ragel_smtp_date
 	DEPENDS ${RAGEL_DEPENDS}
 	COMPILE_FLAGS -G2
 	OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/date_parser.rl.c)
+RAGEL_TARGET(ragel_smtp_ip
+	INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip_parser.rl
+	DEPENDS ${RAGEL_DEPENDS}
+	COMPILE_FLAGS -G2
+	OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ip_parser.rl.c)
 ######################### LINK SECTION ###############################
 
 ADD_LIBRARY(rspamd-server STATIC
@@ -157,12 +157,12 @@ ADD_LIBRARY(rspamd-server STATIC
 		${CMAKE_CURRENT_BINARY_DIR}/modules.c
 		${PLUGINSSRC}
 		"${RAGEL_ragel_smtp_addr_OUTPUTS}"
-		"${RAGEL_ragel_smtp_received_OUTPUTS}"
 		"${RAGEL_ragel_newlines_strip_OUTPUTS}"
 		"${RAGEL_ragel_content_type_OUTPUTS}"
 		"${RAGEL_ragel_content_disposition_OUTPUTS}"
 		"${RAGEL_ragel_rfc2047_OUTPUTS}"
-		"${RAGEL_ragel_smtp_date_OUTPUTS}")
+		"${RAGEL_ragel_smtp_date_OUTPUTS}"
+		"${RAGEL_ragel_smtp_ip_OUTPUTS}")
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser)
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb)
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg)
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index a9ebbdb3a..ec3d87e8a 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -1103,7 +1103,7 @@ rspamd_smtp_received_spill (struct rspamd_task *task,
 		DL_APPEND (head, cur_part);
 	}
 
-	while (p > end) {
+	while (p < end) {
 		if (*p == ';') {
 			/* We are at the date separator, stop here */
 			*date_pos = p - (const guchar *)data + 1;
@@ -1171,21 +1171,39 @@ rspamd_smtp_received_process_rdns (struct rspamd_task *task,
 {
 	const gchar *p, *end;
 	gsize hlen = 0;
+	gboolean seen_dot = FALSE;
 
 	p = begin;
 	end = begin + len;
 
 	while (p < end) {
-		if (rspamd_url_is_domain (*p)) {
+		if (!g_ascii_isspace (*p) && rspamd_url_is_domain (*p)) {
+			if (*p == '.') {
+				seen_dot = TRUE;
+			}
+
 			hlen ++;
 		}
+		else {
+			break;
+		}
 
 		p ++;
 	}
 
 	if (hlen > 0) {
-		if (p == end || g_ascii_isspace (*p) || *p == '[' || *p == '(') {
-			/* We have some hostname, accept it */
+		if (p == end) {
+			/* All data looks like a hostname */
+			gchar *dest;
+
+			dest = rspamd_mempool_alloc (task->task_pool,
+					hlen + 1);
+			rspamd_strlcpy (dest, begin, hlen + 1);
+			*pdest = dest;
+
+			return TRUE;
+		}
+		else if (seen_dot && (g_ascii_isspace (*p) || *p == '[' || *p == '(')) {
 			gchar *dest;
 
 			dest = rspamd_mempool_alloc (task->task_pool,
@@ -1214,8 +1232,8 @@ rspamd_smtp_received_process_from_comment (struct rspamd_task *task,
 		const gchar *brace_pos = memchr (comment->data, ']', comment->dlen);
 
 		if (brace_pos) {
-			addr = rspamd_parse_smtp_ip (comment->data,
-					brace_pos - comment->data + 1,
+			addr = rspamd_parse_inet_address_pool (comment->data + 1,
+					brace_pos - comment->data - 1,
 					task->task_pool);
 
 			if (addr) {
@@ -1245,8 +1263,8 @@ rspamd_smtp_received_process_from_comment (struct rspamd_task *task,
 			ebrace_pos = memchr (obrace_pos, ']', dend - obrace_pos);
 
 			if (ebrace_pos) {
-				addr = rspamd_parse_smtp_ip (obrace_pos,
-						ebrace_pos - obrace_pos + 1,
+				addr = rspamd_parse_inet_address_pool (obrace_pos + 1,
+						ebrace_pos - obrace_pos - 1,
 						task->task_pool);
 
 				if (addr) {
@@ -1307,8 +1325,8 @@ rspamd_smtp_received_process_from (struct rspamd_task *task,
 				rspamd_inet_addr_t *addr;
 
 				if (brace_pos) {
-					addr = rspamd_parse_smtp_ip (rpart->data,
-							brace_pos - rpart->data + 1,
+					addr = rspamd_parse_inet_address_pool (rpart->data + 1,
+							brace_pos - rpart->data - 1,
 							task->task_pool);
 
 					if (addr) {
@@ -1356,7 +1374,7 @@ rspamd_smtp_received_parse (struct rspamd_task *task,
 							size_t len,
 							struct received_header *rh)
 {
-	goffset date_pos = 0;
+	goffset date_pos = -1;
 	struct rspamd_received_part *head, *cur;
 	rspamd_ftok_t t1, t2;
 
@@ -1452,6 +1470,9 @@ rspamd_smtp_received_parse (struct rspamd_task *task,
 				}
 			}
 
+			break;
+		default:
+			/* Do nothing */
 			break;
 		}
 	}
@@ -1464,5 +1485,10 @@ rspamd_smtp_received_parse (struct rspamd_task *task,
 		rh->from_hostname = rh->real_hostname;
 	}
 
+	if (date_pos > 0 && date_pos < len) {
+		rh->timestamp = rspamd_parse_smtp_date (data + date_pos,
+				len - date_pos);
+	}
+
 	return 0;
 }
\ No newline at end of file
diff --git a/test/lua/unit/received.lua b/test/lua/unit/received.lua
index ac21c0e83..8185d9ada 100644
--- a/test/lua/unit/received.lua
+++ b/test/lua/unit/received.lua
@@ -56,8 +56,8 @@ context("Received headers parser", function()
  for exim-dev at exim.org; Sat, 30 Jun 2018 02:54:24 +0100]],
      {
        from_hostname = 'smtp.spodhuis.org',
-       from_ip = '2a02:898:31:0:48:4558:736d:7470',
-       real_ip = '2a02:898:31:0:48:4558:736d:7470',
+       from_ip = '2a02:898:31::48:4558:736d:7470',
+       real_ip = '2a02:898:31::48:4558:736d:7470',
        by_hostname = 'hummus.csx.cam.ac.uk',
      }
     },
@@ -68,12 +68,29 @@ context("Received headers parser", function()
        real_ip = '1.1.1.1',
      }
     },
-    {'from [192.83.172.101] by (HELLO 148.251.238.35 ) (148.251.238.35) by guovswzqkvry051 at sohu.com with gg login by AOL 6.0 for Windows US sub 008 SMTP  ; Tue, 03 Jul 2018 09:01:47 -0300',
+    {'from [192.83.172.101] (HELLO 148.251.238.35) (148.251.238.35) by guovswzqkvry051 at sohu.com with gg login by AOL 6.0 for Windows US sub 008 SMTP  ; Tue, 03 Jul 2018 09:01:47 -0300',
      {
        from_ip = '192.83.172.101',
        by_hostname = '',
-     }
+     },
+    },
+    {'from [61.174.163.26] (helo=host) by sc8-sf-list1.sourceforge.net with smtp (Exim 3.31-VA-mm2 #1 (Debian)) id 18t2z0-0001NX-00 for <razor-users at lists.sourceforge.net>; Wed, 12 Mar 2003 01:57:10 -0800',
+     {
+       from_ip = '61.174.163.26',
+       by_hostname = 'sc8-sf-list1.sourceforge.net',
+     },
     },
+    {[[from [127.0.0.1] (unknown [65.19.167.131])
+	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
+	(Client did not present a certificate)
+	by mail01.someotherdomain.org (Postfix) with ESMTPSA id 43tYMW2yKHz50MHS
+	for <user2 at somedomain.com>; Mon,  4 Feb 2019 16:39:35 +0000 (GMT)]],
+     {
+       from_ip = '65.19.167.131',
+       real_ip = '65.19.167.131',
+       by_hostname = 'mail01.someotherdomain.org',
+     }
+    }
   }
 
   local task = ffi.C.rspamd_task_new(nil, nil)


More information about the Commits mailing list