commit 58eb1a0: [Rework] Rework request headers processing
Vsevolod Stakhov
vsevolod at highsecure.ru
Sun Jul 14 11:49:04 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-07-14 12:38:32 +0100
URL: https://github.com/rspamd/rspamd/commit/58eb1a0b68572f86be8c861d1f38db9ef34b712b
[Rework] Rework request headers processing
---
src/libserver/protocol.c | 94 ++++++++++++++++++++++++-----------------------
src/libserver/task.c | 96 ++++++++++++++++++++++++------------------------
src/libserver/task.h | 35 ++++++++++++------
src/lua/lua_task.c | 8 ++++
4 files changed, 126 insertions(+), 107 deletions(-)
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index 8834529ee..6d436d56d 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -58,7 +58,7 @@ rspamd_protocol_quark (void)
* Remove <> from the fixed string and copy it to the pool
*/
static gchar *
-rspamd_protocol_escape_braces (struct rspamd_task *task, rspamd_fstring_t *in)
+rspamd_protocol_escape_braces (struct rspamd_task *task, rspamd_ftok_t *in)
{
guint nchars = 0;
const gchar *p;
@@ -68,7 +68,7 @@ rspamd_protocol_escape_braces (struct rspamd_task *task, rspamd_fstring_t *in)
g_assert (in != NULL);
g_assert (in->len > 0);
- p = in->str;
+ p = in->begin;
while ((g_ascii_isspace (*p) || *p == '<') && nchars < in->len) {
if (*p == '<') {
@@ -81,7 +81,7 @@ rspamd_protocol_escape_braces (struct rspamd_task *task, rspamd_fstring_t *in)
tok.begin = p;
- p = in->str + in->len - 1;
+ p = in->begin + in->len - 1;
tok.len = in->len - nchars;
while (g_ascii_isspace (*p) && tok.len > 0) {
@@ -344,28 +344,34 @@ gboolean
rspamd_protocol_handle_headers (struct rspamd_task *task,
struct rspamd_http_message *msg)
{
- rspamd_fstring_t *hn, *hv;
rspamd_ftok_t *hn_tok, *hv_tok, srch;
gboolean fl, has_ip = FALSE;
struct rspamd_http_header *header, *h, *htmp;
+ gchar *ntok;
HASH_ITER (hh, msg->headers, header, htmp) {
DL_FOREACH (header, h) {
- hn = rspamd_fstring_new_init (h->name.begin, h->name.len);
- hv = rspamd_fstring_new_init (h->value.begin, h->value.len);
- hn_tok = rspamd_ftok_map (hn);
- hv_tok = rspamd_ftok_map (hv);
+ ntok = rspamd_mempool_ftokdup (task->task_pool, &h->name);
+ hn_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hn_tok));
+ hn_tok->begin = ntok;
+ hn_tok->len = h->name.len;
+
+
+ ntok = rspamd_mempool_ftokdup (task->task_pool, &h->value);
+ hv_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hv_tok));
+ hv_tok->begin = ntok;
+ hv_tok->len = h->value.len;
switch (*hn_tok->begin) {
case 'd':
case 'D':
IF_HEADER (DELIVER_TO_HEADER) {
- task->deliver_to = rspamd_protocol_escape_braces (task, hv);
+ task->deliver_to = rspamd_protocol_escape_braces (task, hv_tok);
msg_debug_protocol ("read deliver-to header, value: %s",
task->deliver_to);
}
else {
- msg_debug_protocol ("wrong header: %V", hn);
+ msg_debug_protocol ("wrong header: %T", hn_tok);
}
break;
case 'h':
@@ -383,12 +389,13 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
case 'f':
case 'F':
IF_HEADER (FROM_HEADER) {
- task->from_envelope = rspamd_email_address_from_smtp (hv->str,
- hv->len);
- msg_debug_protocol ("read from header, value: %V", hv);
+ task->from_envelope = rspamd_email_address_from_smtp (
+ hv_tok->begin,
+ hv_tok->len);
+ msg_debug_protocol ("read from header, value: %T", hv_tok);
if (!task->from_envelope) {
- msg_err_protocol ("bad from header: '%V'", hv);
+ msg_err_protocol ("bad from header: '%T'", hv_tok);
task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
}
}
@@ -401,8 +408,8 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
case 'j':
case 'J':
IF_HEADER (JSON_HEADER) {
- msg_debug_protocol ("read json header, value: %V", hv);
- fl = rspamd_config_parse_flag (hv->str, hv->len);
+ msg_debug_protocol ("read json header, value: %T", hv_tok);
+ fl = rspamd_config_parse_flag (hv_tok->begin, hv_tok->len);
if (fl) {
task->flags |= RSPAMD_TASK_FLAG_JSON;
}
@@ -411,7 +418,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
}
}
else {
- msg_debug_protocol ("wrong header: %V", hn);
+ msg_debug_protocol ("wrong header: %T", hn_tok);
}
break;
case 'q':
@@ -422,20 +429,20 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
msg_debug_protocol ("read queue_id header, value: %s", task->queue_id);
}
else {
- msg_debug_protocol ("wrong header: %V", hn);
+ msg_debug_protocol ("wrong header: %T", hn_tok);
}
break;
case 'r':
case 'R':
IF_HEADER (RCPT_HEADER) {
rspamd_protocol_process_recipients (task, hv_tok);
- msg_debug_protocol ("read rcpt header, value: %V", hv);
+ msg_debug_protocol ("read rcpt header, value: %T", hv_tok);
}
IF_HEADER (RAW_DATA_HEADER) {
srch.begin = "yes";
srch.len = 3;
- msg_debug_protocol ("read raw data header, value: %V", hv);
+ msg_debug_protocol ("read raw data header, value: %T", hv_tok);
if (rspamd_ftok_casecmp (hv_tok, &srch) == 0) {
task->flags &= ~RSPAMD_TASK_FLAG_MIME;
@@ -446,16 +453,17 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
case 'i':
case 'I':
IF_HEADER (IP_ADDR_HEADER) {
- if (!rspamd_parse_inet_address (&task->from_addr, hv->str, hv->len)) {
- msg_err_protocol ("bad ip header: '%V'", hv);
+ if (!rspamd_parse_inet_address (&task->from_addr,
+ hv_tok->begin, hv_tok->len)) {
+ msg_err_protocol ("bad ip header: '%T'", hv_tok);
}
else {
- msg_debug_protocol ("read IP header, value: %V", hv);
+ msg_debug_protocol ("read IP header, value: %T", hv_tok);
has_ip = TRUE;
}
}
else {
- msg_debug_protocol ("wrong header: %V", hn);
+ msg_debug_protocol ("wrong header: %T", hn_tok);
}
break;
case 'p':
@@ -464,7 +472,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
srch.begin = "all";
srch.len = 3;
- msg_debug_protocol ("read pass header, value: %V", hv);
+ msg_debug_protocol ("read pass header, value: %V", hv_tok);
if (rspamd_ftok_casecmp (hv_tok, &srch) == 0) {
task->flags |= RSPAMD_TASK_FLAG_PASS_ALL;
@@ -472,14 +480,14 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
}
}
IF_HEADER (PROFILE_HEADER) {
- msg_debug_protocol ("read profile header, value: %V", hv);
+ msg_debug_protocol ("read profile header, value: %T", hv_tok);
task->flags |= RSPAMD_TASK_FLAG_PROFILE;
}
break;
case 's':
case 'S':
IF_HEADER (SETTINGS_ID_HEADER) {
- msg_debug_protocol ("read settings-id header, value: %V", hv);
+ msg_debug_protocol ("read settings-id header, value: %T", hv_tok);
task->settings_elt = rspamd_config_find_settings_name_ref (
task->cfg, hv_tok->begin, hv_tok->len);
@@ -492,15 +500,15 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
cur->name, cur->id);
}
- msg_warn_protocol ("unknown settings id: %V(%d); known_ids: %v",
- hv,
+ msg_warn_protocol ("unknown settings id: %T(%d); known_ids: %v",
+ hv_tok,
rspamd_config_name_to_id (hv_tok->begin, hv_tok->len),
known_ids);
g_string_free (known_ids, TRUE);
}
else {
- msg_debug_protocol ("applied settings id %V -> %ud", hv,
+ msg_debug_protocol ("applied settings id %T -> %ud", hv_tok,
task->settings_elt->id);
}
}
@@ -512,7 +520,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
* We must ignore User header in case of spamc, as SA has
* different meaning of this header
*/
- msg_debug_protocol ("read user header, value: %V", hv);
+ msg_debug_protocol ("read user header, value: %T", hv_tok);
if (!RSPAMD_TASK_IS_SPAMC (task)) {
task->user = rspamd_mempool_ftokdup (task->task_pool,
hv_tok);
@@ -522,7 +530,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
}
}
IF_HEADER (URLS_HEADER) {
- msg_debug_protocol ("read urls header, value: %V", hv);
+ msg_debug_protocol ("read urls header, value: %T", hv_tok);
srch.begin = "extended";
srch.len = 8;
@@ -535,7 +543,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
/* TODO: add more formats there */
}
IF_HEADER (USER_AGENT_HEADER) {
- msg_debug_protocol ("read user-agent header, value: %V", hv);
+ msg_debug_protocol ("read user-agent header, value: %T", hv_tok);
if (hv_tok->len == 6 &&
rspamd_lc_cmp (hv_tok->begin, "rspamc", 6) == 0) {
@@ -546,7 +554,7 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
case 'l':
case 'L':
IF_HEADER (NO_LOG_HEADER) {
- msg_debug_protocol ("read log header, value: %V", hv);
+ msg_debug_protocol ("read log header, value: %T", hv_tok);
srch.begin = "no";
srch.len = 2;
@@ -558,15 +566,9 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
case 'm':
case 'M':
IF_HEADER (MLEN_HEADER) {
- msg_debug_protocol ("read message length header, value: %V", hv);
- if (!rspamd_strtoul (hv_tok->begin,
- hv_tok->len,
- &task->message_len)) {
- msg_err_protocol ("Invalid message length header: %V", hv);
- }
- else {
- task->flags |= RSPAMD_TASK_FLAG_HAS_CONTROL;
- }
+ msg_debug_protocol ("read message length header, value: %T",
+ hv_tok);
+ task->flags |= RSPAMD_TASK_FLAG_HAS_CONTROL;
}
IF_HEADER (MTA_TAG_HEADER) {
gchar *mta_tag;
@@ -586,18 +588,18 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
}
IF_HEADER (MILTER_HEADER) {
task->flags |= RSPAMD_TASK_FLAG_MILTER;
- msg_debug_protocol ("read Milter header, value: %V", hv);
+ msg_debug_protocol ("read Milter header, value: %T", hv_tok);
}
break;
case 't':
case 'T':
IF_HEADER (TLS_CIPHER_HEADER) {
task->flags |= RSPAMD_TASK_FLAG_SSL;
- msg_debug_protocol ("read TLS cipher header, value: %V", hv);
+ msg_debug_protocol ("read TLS cipher header, value: %T", hv_tok);
}
break;
default:
- msg_debug_protocol ("generic header: %V", hn);
+ msg_debug_protocol ("generic header: %T", hn_tok);
break;
}
diff --git a/src/libserver/task.c b/src/libserver/task.c
index 950af5ec8..54f2510a2 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -16,7 +16,8 @@
#include "task.h"
#include "rspamd.h"
#include "filter.h"
-#include "protocol.h"
+#include "libserver/protocol.h"
+#include "libserver/protocol_internal.h"
#include "message.h"
#include "lua/lua_common.h"
#include "email_addr.h"
@@ -40,6 +41,10 @@
#include <math.h>
+__KHASH_IMPL (rspamd_req_headers_hash, static inline,
+ rspamd_ftok_t *, struct rspamd_request_header_chain *, 1,
+ rspamd_ftok_icase_hash, rspamd_ftok_icase_equal)
+
/*
* Do not print more than this amount of elts
*/
@@ -51,23 +56,6 @@ rspamd_task_quark (void)
return g_quark_from_static_string ("task-error");
}
-static void
-rspamd_request_header_dtor (gpointer p)
-{
- GPtrArray *ar = p;
- guint i;
- rspamd_ftok_t *tok;
-
- if (ar) {
- for (i = 0; i < ar->len; i ++) {
- tok = g_ptr_array_index (ar, i);
- rspamd_fstring_mapped_ftok_free (tok);
- }
-
- g_ptr_array_free (ar, TRUE);
- }
-}
-
/*
* Create new task
*/
@@ -123,13 +111,7 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg,
new_task->task_pool = pool;
}
- new_task->request_headers = g_hash_table_new_full (rspamd_ftok_icase_hash,
- rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free,
- rspamd_request_header_dtor);
- rspamd_mempool_add_destructor (new_task->task_pool,
- (rspamd_mempool_destruct_t) g_hash_table_unref,
- new_task->request_headers);
-
+ new_task->request_headers = kh_init (rspamd_req_headers_hash);
new_task->sock = -1;
new_task->flags |= (RSPAMD_TASK_FLAG_MIME|RSPAMD_TASK_FLAG_JSON);
new_task->result = rspamd_create_metric_result (new_task);
@@ -314,6 +296,7 @@ rspamd_task_free (struct rspamd_task *task)
REF_RELEASE (task->cfg);
}
+ kh_destroy (rspamd_req_headers_hash, task->request_headers);
rspamd_message_unref (task->message);
if (task->flags & RSPAMD_TASK_FLAG_OWN_POOL) {
@@ -647,15 +630,19 @@ rspamd_task_load_message (struct rspamd_task *task,
}
if (task->flags & RSPAMD_TASK_FLAG_HAS_CONTROL) {
- /* We have control chunk, so we need to process it separately */
- if (task->msg.len < task->message_len) {
+ rspamd_ftok_t *hv = rspamd_task_get_request_header (task, MLEN_HEADER);
+ gulong message_len = 0;
+
+ if (!hv || !rspamd_strtoul (hv->begin, hv->len, &message_len) ||
+ task->msg.len < message_len) {
msg_warn_task ("message has invalid message length: %ul and total len: %ul",
- task->message_len, task->msg.len);
+ message_len, task->msg.len);
g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
"Invalid length");
return FALSE;
}
- control_len = task->msg.len - task->message_len;
+
+ control_len = task->msg.len - message_len;
if (control_len > 0) {
parser = ucl_parser_new (UCL_PARSER_KEY_LOWERCASE);
@@ -1590,32 +1577,33 @@ rspamd_ftok_t *
rspamd_task_get_request_header (struct rspamd_task *task,
const gchar *name)
{
- GPtrArray *ret;
- rspamd_ftok_t srch;
-
- srch.begin = (gchar *)name;
- srch.len = strlen (name);
-
- ret = g_hash_table_lookup (task->request_headers, &srch);
+ struct rspamd_request_header_chain *ret =
+ rspamd_task_get_request_header_multiple (task, name);
if (ret) {
- return (rspamd_ftok_t *)g_ptr_array_index (ret, 0);
+ return ret->hdr;
}
return NULL;
}
-GPtrArray*
+struct rspamd_request_header_chain *
rspamd_task_get_request_header_multiple (struct rspamd_task *task,
const gchar *name)
{
- GPtrArray *ret;
+ struct rspamd_request_header_chain *ret = NULL;
rspamd_ftok_t srch;
+ khiter_t k;
srch.begin = (gchar *)name;
srch.len = strlen (name);
- ret = g_hash_table_lookup (task->request_headers, &srch);
+ k = kh_get (rspamd_req_headers_hash, task->request_headers,
+ &srch);
+
+ if (k != kh_end (task->request_headers)) {
+ ret = kh_value (task->request_headers, k);
+ }
return ret;
}
@@ -1625,20 +1613,30 @@ void
rspamd_task_add_request_header (struct rspamd_task *task,
rspamd_ftok_t *name, rspamd_ftok_t *value)
{
- GPtrArray *ret;
- ret = g_hash_table_lookup (task->request_headers, name);
+ khiter_t k;
+ gint res;
+ struct rspamd_request_header_chain *chain, *nchain;
- if (ret) {
- g_ptr_array_add (ret, value);
+ k = kh_put (rspamd_req_headers_hash, task->request_headers,
+ name, &res);
+
+ if (res == 0) {
+ /* Existing name */
+ nchain = rspamd_mempool_alloc (task->task_pool, sizeof (*nchain));
+ nchain->hdr = value;
+ nchain->next = NULL;
+ chain = kh_value (task->request_headers, k);
- /* We need to free name token */
- rspamd_fstring_mapped_ftok_free (name);
+ /* Slow but OK here */
+ LL_APPEND (chain, nchain);
}
else {
- ret = g_ptr_array_sized_new (2);
- g_ptr_array_add (ret, value);
- g_hash_table_replace (task->request_headers, name, ret);
+ nchain = rspamd_mempool_alloc (task->task_pool, sizeof (*nchain));
+ nchain->hdr = value;
+ nchain->next = NULL;
+
+ kh_value (task->request_headers, k) = nchain;
}
}
diff --git a/src/libserver/task.h b/src/libserver/task.h
index e0e1fc808..67f33488a 100644
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -23,6 +23,7 @@
#include "mem_pool.h"
#include "dns.h"
#include "re_cache.h"
+#include "khash.h"
#ifdef __cplusplus
extern "C" {
@@ -121,6 +122,7 @@ enum rspamd_task_stage {
#define RSPAMD_TASK_FLAG_MILTER (1u << 28u)
#define RSPAMD_TASK_FLAG_SSL (1u << 29u)
#define RSPAMD_TASK_FLAG_BAD_UNICODE (1u << 30u)
+#define RSPAMD_TASK_FLAG_MESSAGE_REWRITE (1u << 31u)
#define RSPAMD_TASK_IS_SKIPPED(task) (((task)->flags & RSPAMD_TASK_FLAG_SKIP))
#define RSPAMD_TASK_IS_JSON(task) (((task)->flags & RSPAMD_TASK_FLAG_JSON))
@@ -136,16 +138,28 @@ struct rspamd_lang_detector;
enum rspamd_newlines_type;
struct rspamd_message;
+struct rspamd_task_data_storage {
+ const gchar *begin;
+ gsize len;
+ gchar *fpath;
+};
+
+struct rspamd_request_header_chain {
+ rspamd_ftok_t *hdr;
+ struct rspamd_request_header_chain *next;
+};
+
+__KHASH_TYPE (rspamd_req_headers_hash, rspamd_ftok_t *, struct rspamd_request_header_chain *)
+
/**
* Worker task structure
*/
struct rspamd_task {
struct rspamd_worker *worker; /**< pointer to worker object */
enum rspamd_command cmd; /**< command */
- gint sock; /**< socket descriptor */
- guint32 flags; /**< Bit flags */
- guint32 dns_requests; /**< number of DNS requests per this task */
- gulong message_len; /**< Message length */
+ gint sock; /**< socket descriptor */
+ guint32 dns_requests; /**< number of DNS requests per this task */
+ guint32 flags; /**< Bit flags */
gchar *helo; /**< helo header value */
gchar *queue_id; /**< queue id if specified */
rspamd_inet_addr_t *from_addr; /**< from addr for a task */
@@ -153,12 +167,8 @@ struct rspamd_task {
gchar *deliver_to; /**< address to deliver */
gchar *user; /**< user to deliver */
const gchar *hostname; /**< hostname reported by MTA */
- GHashTable *request_headers; /**< HTTP headers in a request */
- struct {
- const gchar *begin;
- gsize len;
- gchar *fpath;
- } msg; /**< message buffer */
+ khash_t(rspamd_req_headers_hash) *request_headers; /**< HTTP headers in a request */
+ struct rspamd_task_data_storage msg; /**< message buffer */
struct rspamd_http_connection *http_conn; /**< HTTP server connection */
struct rspamd_async_session *s; /**< async session object */
struct rspamd_metric_result *result; /**< Metric result */
@@ -308,8 +318,9 @@ rspamd_ftok_t *rspamd_task_get_request_header (struct rspamd_task *task,
* @param name
* @return
*/
-GPtrArray *rspamd_task_get_request_header_multiple (struct rspamd_task *task,
- const gchar *name);
+struct rspamd_request_header_chain *rspamd_task_get_request_header_multiple (
+ struct rspamd_task *task,
+ const gchar *name);
/**
* Adds a new request header to task (name and value should be mapped to fstring)
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 0b2dd4ab3..6182d64fb 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -1476,6 +1476,7 @@ lua_task_set_message (lua_State * L)
lua_pop (L, 1);
}
+ task->flags |= RSPAMD_TASK_FLAG_MESSAGE_REWRITE;
task->msg.begin = buf;
task->msg.len = final_len;
}
@@ -1502,6 +1503,7 @@ lua_task_set_message (lua_State * L)
if (buf) {
task->msg.begin = buf;
task->msg.len = final_len;
+ task->flags |= RSPAMD_TASK_FLAG_MESSAGE_REWRITE;
}
}
@@ -4778,6 +4780,8 @@ lua_task_has_flag (lua_State *L)
RSPAMD_TASK_FLAG_BAD_UNICODE);
LUA_TASK_GET_FLAG (flag, "mime",
RSPAMD_TASK_FLAG_MIME);
+ LUA_TASK_GET_FLAG (flag, "message_rewrite",
+ RSPAMD_TASK_FLAG_MESSAGE_REWRITE);
if (!found) {
msg_warn_task ("unknown flag requested: %s", flag);
@@ -4853,6 +4857,10 @@ lua_task_get_flags (lua_State *L)
lua_pushstring (L, "milter");
lua_rawseti (L, -2, idx++);
break;
+ case RSPAMD_TASK_FLAG_MESSAGE_REWRITE:
+ lua_pushstring (L, "message_rewrite");
+ lua_rawseti (L, -2, idx++);
+ break;
default:
break;
}
More information about the Commits
mailing list