commit c566966: [Minor] Remove ragel based received parser

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Feb 7 15:14:20 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-02-07 15:13:35 +0000
URL: https://github.com/rspamd/rspamd/commit/c56696612bb1e20fe907f6285866c4cb841a38e4 (HEAD -> master)

[Minor] Remove ragel based received parser

---
 src/CMakeLists.txt                |   1 -
 src/ragel/smtp_received.rl        |  61 -------
 src/ragel/smtp_received_parser.rl | 327 --------------------------------------
 3 files changed, 389 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 71ce71119..6fbfa577d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -118,7 +118,6 @@ SET(RAGEL_DEPENDS "${CMAKE_SOURCE_DIR}/src/ragel/smtp_address.rl"
 	"${CMAKE_SOURCE_DIR}/src/ragel/smtp_date.rl"
 	"${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl"
 	"${CMAKE_SOURCE_DIR}/src/ragel/smtp_base.rl"
-	"${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl"
 	"${CMAKE_SOURCE_DIR}/src/ragel/content_disposition.rl")
 RAGEL_TARGET(ragel_smtp_addr
 	INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_addr_parser.rl
diff --git a/src/ragel/smtp_received.rl b/src/ragel/smtp_received.rl
deleted file mode 100644
index 7635fcee4..000000000
--- a/src/ragel/smtp_received.rl
+++ /dev/null
@@ -1,61 +0,0 @@
-%%{
-  machine smtp_received;
-
-
-  # http://tools.ietf.org/html/rfc5321#section-4.4
-
-  Addtl_Link     = Atom;
-  Link           = "TCP" | Addtl_Link;
-  Attdl_Protocol = Atom;
-  Protocol       = "ESMTP"i %ESMTP_proto |
-                   "SMTP"i %SMTP_proto |
-                   "ESMTPS"i %ESMTPS_proto |
-                   "ESMTPA"i %ESMTPA_proto |
-                   "ESMTPSA"i %ESMTPSA_proto |
-                   "LMTP"i %LMTP_proto |
-                   "IMAP"i %IMAP_proto |
-                   Attdl_Protocol;
-
-  TCP_info       = address_literal >Real_IP_Start %Real_IP_End |
-                  ( Domain >Real_Domain_Start %Real_Domain_End FWS address_literal >Real_IP_Start %Real_IP_End ) |
-                  ( non_conformant_address_literal >Real_IP_Start %Real_IP_End );
-  Extended_Domain  = (Domain >Real_Domain_Start %Real_Domain_End | # Used to be a real domain
-                  ( Domain >Reported_Domain_Start %Reported_Domain_End FWS "(" TCP_info ")" ) | # Here domain is something specified by remote side
-                  ( address_literal >Real_Domain_Start %Real_Domain_End FWS "(" TCP_info ")" ) );
-
-  ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; };
-  balanced_ccontent := ccontent* ')' @{ fret; };
-  comment        =   "(" ((WSP* ccontent)* WSP*) >Comment_Start %Comment_End ")";
-  CFWS           =   WSP* (comment+ WSP*)*;
-
-  From_domain    = "FROM"i FWS Extended_Domain >From_Start %From_End;
-  By_domain      = "BY"i FWS Extended_Domain >By_Start %By_End;
-
-  Retarded_Domain = TCP_info;
-  From_domain_retarded = "FROM"i FWS Retarded_Domain >From_Start %From_End;
-
-  Via            = CFWS "VIA"i FWS Link;
-  With           = CFWS "WITH"i FWS Protocol;
-
-  id_left        = dot_atom_text;
-  no_fold_literal = "[" dtext* "]";
-  id_right       = dot_atom_text | no_fold_literal;
-  msg_id         = "<" id_left "@" id_right ">";
-  ID             = CFWS "ID"i FWS ( Dot_string | msg_id );
-
-  For            = CFWS "FOR"i FWS ( Path | Mailbox ) >For_Start %For_End;
-  Additional_Registered_Clauses  = CFWS Atom FWS String;
-  Opt_info       = Via? With? ID? For? Additional_Registered_Clauses?;
-  # Here we make From part optional just because many received headers lack it
-  Received       = From_domain? CFWS? By_domain? CFWS? Opt_info CFWS? ";" FWS date_time >Date_Start %Date_End CFWS?;
-  Received_retarded = From_domain_retarded CFWS? By_domain? CFWS? Opt_info CFWS? ";" FWS date_time >Date_Start %Date_End CFWS?;
-
-  prepush {
-    if (top >= st_storage.size) {
-      st_storage.size = (top + 1) * 2;
-      st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int));
-      g_assert (st_storage.data != NULL);
-      stack = st_storage.data;
-    }
-  }
-}%%
diff --git a/src/ragel/smtp_received_parser.rl b/src/ragel/smtp_received_parser.rl
deleted file mode 100644
index 7c747f9f5..000000000
--- a/src/ragel/smtp_received_parser.rl
+++ /dev/null
@@ -1,327 +0,0 @@
-%%{
-
-  machine smtp_received_parser;
-
-
-  action IP6_start {
-    in_v6 = 1;
-    ip_start = p;
-  }
-  action IP6_end {
-    in_v6 = 0;
-    ip_end = p;
-  }
-  action IP4_start {
-    if (!in_v6) {
-      ip_start = p;
-    }
-  }
-  action IP4_end {
-    if (!in_v6) {
-      ip_end = p;
-    }
-  }
-
-  action User_start {
-    addr->user = p;
-  }
-
-  action User_end {
-    if (addr->user) {
-      addr->user_len = p - addr->user;
-    }
-  }
-
-  action Domain_start {
-    addr->domain = p;
-  }
-
-  action Domain_end {
-    if (addr->domain) {
-      addr->domain_len = p - addr->domain;
-    }
-  }
-
-  action Domain_addr_start {
-    addr->domain = p;
-    addr->flags |= RSPAMD_EMAIL_ADDR_IP;
-  }
-
-  action Domain_addr_end {
-    if (addr->domain) {
-      addr->domain_len = p - addr->domain;
-    }
-  }
-
-  action User_has_backslash {
-    addr->flags |= RSPAMD_EMAIL_ADDR_HAS_BACKSLASH;
-  }
-
-  action Quoted_addr {
-    addr->flags |= RSPAMD_EMAIL_ADDR_QUOTED;
-  }
-
-  action Empty_addr {
-    addr->flags |= RSPAMD_EMAIL_ADDR_EMPTY;
-    addr->addr = "";
-    addr->user = addr->addr;
-    addr->domain = addr->addr;
-  }
-
-  action Valid_addr {
-    addr->flags |= RSPAMD_EMAIL_ADDR_VALID;
-  }
-
-  action Addr_has_angle {
-    addr->flags |= RSPAMD_EMAIL_ADDR_BRACED;
-  }
-
-  action Addr_start {
-    addr->addr = p;
-  }
-
-  action Addr_end {
-    if (addr->addr) {
-      addr->addr_len = p - addr->addr;
-    }
-  }
-
-  action Real_Domain_Start {
-    real_domain_start = p;
-  }
-  action Real_Domain_End {
-    real_domain_end = p;
-  }
-  action Reported_Domain_Start {
-    reported_domain_start = p;
-  }
-  action Reported_Domain_End {
-    reported_domain_end = p;
-  }
-
-  action Real_IP_Start {
-    if (real_ip_end == NULL && real_ip_start == NULL) {
-      real_ip_start = p;
-    }
-  }
-  action Real_IP_End {
-    if (real_ip_end == NULL && real_ip_start != NULL) {
-      if (ip_start && ip_end && ip_end > ip_start) {
-        real_ip_start = ip_start;
-        real_ip_end = ip_end;
-      }
-      else {
-        real_ip_end = p;
-      }
-    }
-
-    ip_start = NULL;
-    ip_end = NULL;
-  }
-
-  action From_Start {
-    real_domain_start = NULL;
-    real_domain_end = NULL;
-    reported_domain_start = NULL;
-    reported_domain_end = NULL;
-    ip_start = NULL;
-    ip_end = NULL;
-    for_start = NULL;
-    for_end = NULL;
-  }
-
-  action By_Start {
-    real_domain_start = NULL;
-    real_domain_end = NULL;
-    reported_domain_start = NULL;
-    reported_domain_end = NULL;
-    ip_start = NULL;
-    ip_end = NULL;
-    for_start = NULL;
-    for_end = NULL;
-  }
-
-  action By_End {
-    if (real_domain_end && real_domain_start && real_domain_end > real_domain_start) {
-      tmplen = real_domain_end - real_domain_start;
-      rh->by_hostname = rspamd_mempool_alloc (task->task_pool, tmplen + 1);
-      rspamd_strlcpy (rh->by_hostname, real_domain_start, tmplen + 1);
-    }
-    else if (reported_domain_end && reported_domain_start && reported_domain_end > reported_domain_start) {
-      len = reported_domain_end - reported_domain_start;
-      rh->by_hostname = rspamd_mempool_alloc (task->task_pool, tmplen + 1);
-      rspamd_strlcpy (rh->by_hostname, reported_domain_start, tmplen + 1);
-    }
-  }
-
-  action From_End {
-    if (real_domain_end && real_domain_start && real_domain_end > real_domain_start) {
-      tmplen = real_domain_end - real_domain_start;
-      rh->real_hostname = rspamd_mempool_alloc (task->task_pool, tmplen + 1);
-      rspamd_strlcpy (rh->real_hostname, real_domain_start, tmplen + 1);
-    }
-    if (reported_domain_end && reported_domain_start && reported_domain_end > reported_domain_start) {
-      tmplen = reported_domain_end - reported_domain_start;
-      rh->from_hostname = rspamd_mempool_alloc (task->task_pool, tmplen + 1);
-      rspamd_strlcpy (rh->from_hostname, reported_domain_start, tmplen + 1);
-    }
-  }
-
-  action For_Start {
-    for_start = p;
-  }
-
-  action For_End {
-    if (for_start && p > for_start) {
-      for_end = p;
-      tmplen = for_end - for_start;
-      rh->for_mbox = rspamd_mempool_alloc (task->task_pool, tmplen + 1);
-      rspamd_strlcpy (rh->for_mbox, for_start, tmplen + 1);
-    }
-  }
-
-  action SMTP_proto {
-    rh->type = RSPAMD_RECEIVED_SMTP;
-  }
-  action ESMTPS_proto {
-    rh->type = RSPAMD_RECEIVED_ESMTPS;
-  }
-  action ESMTPA_proto {
-    rh->type = RSPAMD_RECEIVED_ESMTPA;
-  }
-  action ESMTP_proto {
-    rh->type = RSPAMD_RECEIVED_ESMTP;
-  }
-  action ESMTPSA_proto {
-    rh->type = RSPAMD_RECEIVED_ESMTPSA;
-  }
-  action LMTP_proto {
-    rh->type = RSPAMD_RECEIVED_LMTP;
-  }
-  action IMAP_proto {
-    rh->type = RSPAMD_RECEIVED_IMAP;
-  }
-
-  action Date_Start {
-    date_start = p;
-  }
-  action Date_End {
-    if (date_start && p > date_start) {
-      rh->timestamp = rspamd_tm_to_time (&tm, tz);
-    }
-  }
-
-  action Comment_Start {
-    cstart = p;
-  }
-
-  action Comment_End {
-    cend = p;
-
-    if (cend && cstart && cend > cstart) {
-      rspamd_smtp_maybe_process_smtp_comment (task, cstart, cend - cstart, rh);
-    }
-
-    cend = NULL;
-    cstart = NULL;
-  }
-
-  include smtp_base "smtp_base.rl";
-  include smtp_ip "smtp_ip.rl";
-  include smtp_date "smtp_date.rl";
-  include smtp_address"smtp_address.rl";
-  include smtp_received "smtp_received.rl";
-
-  main := Received;
-  retarded := Received_retarded;
-
-}%%
-
-#include "smtp_parsers.h"
-
-%% write data;
-
-int
-rspamd_smtp_received_parse (struct rspamd_task *task, const char *data, size_t len, struct received_header *rh)
-{
-  struct rspamd_email_address for_addr, *addr;
-  const char *real_domain_start, *real_domain_end,
-              *real_ip_start, *real_ip_end,
-              *reported_domain_start, *reported_domain_end,
-              *ip_start, *ip_end, *date_start,
-              *for_start, *for_end, *tmp, *cstart, *cend;
-  struct tm tm;
-  const char *p = data, *pe = data + len, *eof;
-  int cs, in_v6 = 0, *stack = NULL;
-  gsize top = 0;
-  glong tz = 0;
-  struct _ragel_st_storage {
-    int *data;
-    gsize size;
-  } st_storage;
-  guint tmplen;
-  gboolean retarded_checked = FALSE;
-
-  memset (&st_storage, 0, sizeof (st_storage));
-  memset (rh, 0, sizeof (*rh));
-  memset (&tm, 0, sizeof (tm));
-  real_domain_start = NULL;
-  real_domain_end = NULL;
-  real_ip_start = NULL;
-  real_ip_end = NULL;
-  reported_domain_start = NULL;
-  reported_domain_end = NULL;
-  ip_start = NULL;
-  ip_end = NULL;
-  date_start = NULL;
-  for_start = NULL;
-  for_end = NULL;
-  cstart = NULL;
-  cend = NULL;
-  rh->type = RSPAMD_RECEIVED_UNKNOWN;
-
-  memset (&for_addr, 0, sizeof (for_addr));
-  addr = &for_addr;
-  eof = pe;
-
-  %% write init;
-reexec_retarded:
-  %% write exec;
-  %% write exports;
-
-  if (!real_ip_end && !retarded_checked) {
-    cs = smtp_received_parser_en_retarded;
-    retarded_checked = TRUE;
-    goto reexec_retarded;
-  }
-
-  if (real_ip_end && real_ip_start && real_ip_end > real_ip_start) {
-    tmplen = real_ip_end - real_ip_start;
-    rh->real_ip = rspamd_mempool_alloc (task->task_pool, tmplen + 1);
-    rspamd_strlcpy (rh->real_ip, real_ip_start, tmplen + 1);
-  }
-
-  if (!rh->real_ip && rh->comment_ip) {
-    rh->real_ip = rh->comment_ip;
-  }
-
-  if (rh->real_ip && !rh->from_ip) {
-    rh->from_ip = rh->real_ip;
-  }
-  if (rh->real_hostname && !rh->from_hostname) {
-    rh->from_hostname = rh->real_hostname;
-  }
-
-  if (rh->real_ip) {
-    if (rspamd_parse_inet_address (&rh->addr, rh->real_ip, strlen (rh->real_ip))) {
-      rspamd_mempool_add_destructor (task->task_pool,
-              (rspamd_mempool_destruct_t)rspamd_inet_address_free, rh->addr);
-    }
-  }
-
-  if (st_storage.data) {
-    free (st_storage.data);
-  }
-
-  return cs;
-}


More information about the Commits mailing list