commit b8ccacc: [Project] Rework ragel machines
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Feb 7 15:14:05 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-02-05 16:00:06 +0000
URL: https://github.com/rspamd/rspamd/commit/b8ccacc1744d1c0385449c3c9996f2fcfecfb8d8
[Project] Rework ragel machines
---
src/ragel/content_disposition_parser.rl | 2 +-
src/ragel/smtp_addr_parser.rl | 2 +-
src/ragel/smtp_address.rl | 37 ++++++++++++++++++++-------------
src/ragel/smtp_date_parser.rl | 2 +-
src/ragel/smtp_ip.rl | 15 +++++++++++++
src/ragel/smtp_received.rl | 8 +++++--
src/ragel/smtp_received_parser.rl | 12 ++++++++++-
src/ragel/smtp_whitespace.rl | 28 -------------------------
8 files changed, 57 insertions(+), 49 deletions(-)
diff --git a/src/ragel/content_disposition_parser.rl b/src/ragel/content_disposition_parser.rl
index c35d2b232..e17f900a1 100644
--- a/src/ragel/content_disposition_parser.rl
+++ b/src/ragel/content_disposition_parser.rl
@@ -86,7 +86,7 @@
}
}
- include smtp_whitespace "smtp_whitespace.rl";
+ include smtp_base "smtp_base.rl";
include content_disposition "content_disposition.rl";
main := content_disposition;
diff --git a/src/ragel/smtp_addr_parser.rl b/src/ragel/smtp_addr_parser.rl
index 737b4ddcd..9ae7c5173 100644
--- a/src/ragel/smtp_addr_parser.rl
+++ b/src/ragel/smtp_addr_parser.rl
@@ -75,8 +75,8 @@
}
}
+ include smtp_base "smtp_base.rl";
include smtp_ip "smtp_ip.rl";
- include smtp_whitespace "smtp_whitespace.rl";
include smtp_address "smtp_address.rl";
main := SMTPAddr;
diff --git a/src/ragel/smtp_address.rl b/src/ragel/smtp_address.rl
index f5d04f620..3f03d405b 100644
--- a/src/ragel/smtp_address.rl
+++ b/src/ragel/smtp_address.rl
@@ -1,6 +1,27 @@
%%{
machine smtp_address;
+ # SMTP address spec
+ # Source: https://tools.ietf.org/html/rfc5321#section-4.1.2
+ # Dependencies: smtp_base + smtp_ip
+ # Required actions:
+ # - User_has_backslash
+ # - User_end
+ # - Quoted_addr
+ # - Domain_start
+ # - Domain_end
+ # - Addr_end
+ # - Addr_has_angle
+ # - Valid_addr
+ # - Empty_addr
+ # + from deps:
+ # - IP4_start
+ # - IP4_end
+ # - IP6_start
+ # - IP6_end
+ # - Domain_addr_start
+ # - Domain_addr_end
+
# SMTP address spec
# Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2
@@ -9,23 +30,9 @@
Local_part = Dot_string >User_start %User_end | Quoted_string;
String = Atom | Quoted_string;
- Standardized_tag = Ldh_str;
- General_address_literal = Standardized_tag ":" dcontent+;
- address_literal = "[" ( IPv4_address_literal |
- IPv6_address_literal |
- General_address_literal ) >Domain_addr_start %Domain_addr_end "]";
- non_conformant_address_literal = IPv4_address_literal >Domain_addr_start %Domain_addr_end;
-
-
- sub_domain = Let_dig Ldh_str?;
- Domain = sub_domain ("." sub_domain)*;
- Atdomain = "@" Domain;
- Adl = Atdomain ( "," Atdomain )*;
-
Mailbox = Local_part "@" (address_literal | Domain >Domain_start %Domain_end);
- UnangledPath = ( Adl ":" )? Mailbox >Addr_start %Addr_end "."?;
+ UnangledPath = ( Adl ":" )? Mailbox >Domain_start %Addr_end "."?;
AngledPath = "<" UnangledPath ">" %Addr_has_angle;
Path = AngledPath | UnangledPath;
SMTPAddr = space* (Path | "<>" %Empty_addr ) %Valid_addr space*;
-
}%%
diff --git a/src/ragel/smtp_date_parser.rl b/src/ragel/smtp_date_parser.rl
index bc6e5c8f0..f0d49c23a 100644
--- a/src/ragel/smtp_date_parser.rl
+++ b/src/ragel/smtp_date_parser.rl
@@ -1,7 +1,7 @@
%%{
machine smtp_date_parser;
- include smtp_whitespace "smtp_whitespace.rl";
+ include smtp_base "smtp_base.rl";
include smtp_date "smtp_date.rl";
main := date_time;
diff --git a/src/ragel/smtp_ip.rl b/src/ragel/smtp_ip.rl
index cd9bec64f..ed10c95b5 100644
--- a/src/ragel/smtp_ip.rl
+++ b/src/ragel/smtp_ip.rl
@@ -3,6 +3,14 @@
# Parses IPv4/IPv6 address
# Source: https://tools.ietf.org/html/rfc5321#section-4.1.3
+ # Dependencies: none
+ # Required actions:
+ # - IP4_start
+ # - IP4_end
+ # - IP6_start
+ # - IP6_end
+ # - Domain_addr_start
+ # - Domain_addr_end
Snum = digit{1,3};
IPv4_addr = (Snum ("." Snum){3});
@@ -18,4 +26,11 @@
IPv6_simple = IPv6_full | IPv6_comp;
IPv6_addr = IPv6_simple | IPv6v4_full | IPv6v4_comp;
IPv6_address_literal = "IPv6:" %IP6_start IPv6_addr %IP6_end;
+
+ General_address_literal = Standardized_tag ":" dcontent+;
+ address_literal = "[" ( IPv4_address_literal |
+ IPv6_address_literal |
+ General_address_literal ) >Domain_addr_start %Domain_addr_end "]";
+ non_conformant_address_literal = IPv4_address_literal >Domain_addr_start %Domain_addr_end;
+
}%%
\ No newline at end of file
diff --git a/src/ragel/smtp_received.rl b/src/ragel/smtp_received.rl
index 1c76f40fb..7635fcee4 100644
--- a/src/ragel/smtp_received.rl
+++ b/src/ragel/smtp_received.rl
@@ -25,12 +25,15 @@
ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; };
balanced_ccontent := ccontent* ')' @{ fret; };
- comment = "(" ((FWS? ccontent)* FWS?) >Comment_Start %Comment_End ")";
- CFWS = ((FWS? comment)+ FWS?) | FWS;
+ comment = "(" ((WSP* ccontent)* WSP*) >Comment_Start %Comment_End ")";
+ CFWS = WSP* (comment+ WSP*)*;
From_domain = "FROM"i FWS Extended_Domain >From_Start %From_End;
By_domain = "BY"i FWS Extended_Domain >By_Start %By_End;
+ Retarded_Domain = TCP_info;
+ From_domain_retarded = "FROM"i FWS Retarded_Domain >From_Start %From_End;
+
Via = CFWS "VIA"i FWS Link;
With = CFWS "WITH"i FWS Protocol;
@@ -45,6 +48,7 @@
Opt_info = Via? With? ID? For? Additional_Registered_Clauses?;
# Here we make From part optional just because many received headers lack it
Received = From_domain? CFWS? By_domain? CFWS? Opt_info CFWS? ";" FWS date_time >Date_Start %Date_End CFWS?;
+ Received_retarded = From_domain_retarded CFWS? By_domain? CFWS? Opt_info CFWS? ";" FWS date_time >Date_Start %Date_End CFWS?;
prepush {
if (top >= st_storage.size) {
diff --git a/src/ragel/smtp_received_parser.rl b/src/ragel/smtp_received_parser.rl
index 836a02384..7c747f9f5 100644
--- a/src/ragel/smtp_received_parser.rl
+++ b/src/ragel/smtp_received_parser.rl
@@ -226,13 +226,14 @@
cstart = NULL;
}
- include smtp_whitespace "smtp_whitespace.rl";
+ include smtp_base "smtp_base.rl";
include smtp_ip "smtp_ip.rl";
include smtp_date "smtp_date.rl";
include smtp_address"smtp_address.rl";
include smtp_received "smtp_received.rl";
main := Received;
+ retarded := Received_retarded;
}%%
@@ -259,6 +260,7 @@ rspamd_smtp_received_parse (struct rspamd_task *task, const char *data, size_t l
gsize size;
} st_storage;
guint tmplen;
+ gboolean retarded_checked = FALSE;
memset (&st_storage, 0, sizeof (st_storage));
memset (rh, 0, sizeof (*rh));
@@ -283,7 +285,15 @@ rspamd_smtp_received_parse (struct rspamd_task *task, const char *data, size_t l
eof = pe;
%% write init;
+reexec_retarded:
%% write exec;
+ %% write exports;
+
+ if (!real_ip_end && !retarded_checked) {
+ cs = smtp_received_parser_en_retarded;
+ retarded_checked = TRUE;
+ goto reexec_retarded;
+ }
if (real_ip_end && real_ip_start && real_ip_end > real_ip_start) {
tmplen = real_ip_end - real_ip_start;
diff --git a/src/ragel/smtp_whitespace.rl b/src/ragel/smtp_whitespace.rl
deleted file mode 100644
index 3b8563e8b..000000000
--- a/src/ragel/smtp_whitespace.rl
+++ /dev/null
@@ -1,28 +0,0 @@
-%%{
- machine smtp_whitespace;
-
- WSP = " ";
- CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n");
- DQUOTE = '"';
-
- # Printable US-ASCII characters not including specials
- atext = alpha | digit | "!" | "#" | "$" | "%" | "&" |
- "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" |
- "-" | "`" | "{" | "|" | "}" | "~";
- # Printable US-ASCII characters not including "[", "]", or "\"
- dtext = 33..90 | 94..126;
- # Printable US-ASCII characters not including "(", ")", or "\"
- ctext = 33..39 | 42..91 | 93..126;
-
- dcontent = 33..90 | 94..126;
- Let_dig = alpha | digit;
- Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig;
-
- quoted_pairSMTP = "\\" 32..126;
- qtextSMTP = 32..33 | 35..91 | 93..126;
- Atom = atext+;
- Dot_string = Atom ("." Atom)*;
- dot_atom_text = atext+ ("." atext+)*;
- #FWS = ((WSP* CRLF)? WSP+);
- FWS = WSP+; # We work with unfolded headers, so we can simplify machine
-}%%
\ No newline at end of file
More information about the Commits
mailing list