commit a49a31b: [Feature] Add limit for number of URLs in Lua
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Oct 11 12:14:08 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-10-11 13:00:24 +0100
URL: https://github.com/rspamd/rspamd/commit/a49a31b3f72d9a744c3bb53f60f7f39614eb7f35 (HEAD -> master)
[Feature] Add limit for number of URLs in Lua
---
src/libserver/cfg_file.h | 1 +
src/libserver/cfg_rcl.c | 6 ++++++
src/libserver/cfg_utils.c | 1 +
src/libutil/util.c | 15 ++++++++++-----
src/libutil/util.h | 2 +-
src/lua/lua_task.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
6 files changed, 62 insertions(+), 8 deletions(-)
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index f22b0cefc..4eea4db16 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -462,6 +462,7 @@ struct rspamd_config {
guint lua_gc_step; /**< lua gc step */
guint lua_gc_pause; /**< lua gc pause */
guint full_gc_iters; /**< iterations between full gc cycle */
+ guint max_lua_urls; /**< maximum number of urls to be passed to Lua */
GList *classify_headers; /**< list of headers using for statistics */
struct module_s **compiled_modules; /**< list of compiled C modules */
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index cdf3d9b5b..d409e40e8 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -2195,6 +2195,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections)
RSPAMD_CL_FLAG_INT_32,
"Maximum count of heartbeats to be lost before trying to "
"terminate a worker (default: 0 - disabled)");
+ rspamd_rcl_add_default_handler (sub,
+ "max_lua_urls",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct rspamd_config, max_lua_urls),
+ RSPAMD_CL_FLAG_INT_32,
+ "Maximum count of URLs to pass to Lua to avoid DoS");
/* Neighbours configuration */
rspamd_rcl_add_section_doc (&sub->subsections, "neighbours", "name",
diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c
index be709a051..5851e250f 100644
--- a/src/libserver/cfg_utils.c
+++ b/src/libserver/cfg_utils.c
@@ -195,6 +195,7 @@ rspamd_config_new (enum rspamd_config_init_flags flags)
cfg->log_error_elts = 10;
cfg->log_error_elt_maxlen = 1000;
cfg->cache_reload_time = 30.0;
+ cfg->max_lua_urls = 1024;
/* Default log line */
cfg->log_format_str = "id: <$mid>,$if_qid{ qid: <$>,}$if_ip{ ip: $,}"
diff --git a/src/libutil/util.c b/src/libutil/util.c
index 55696c212..ecdd7b2b9 100644
--- a/src/libutil/util.c
+++ b/src/libutil/util.c
@@ -2624,17 +2624,22 @@ xoroshiro_rotl (const guint64 x, int k) {
return (x << k) | (x >> (64 - k));
}
-
gdouble
rspamd_random_double_fast (void)
{
- const guint64 s0 = xorshifto_seed[0];
- guint64 s1 = xorshifto_seed[1];
+ return rspamd_random_double_fast_seed (xorshifto_seed);
+}
+
+gdouble
+rspamd_random_double_fast_seed (guint64 seed[2])
+{
+ const guint64 s0 = seed[0];
+ guint64 s1 = seed[1];
const guint64 result = s0 + s1;
s1 ^= s0;
- xorshifto_seed[0] = xoroshiro_rotl(s0, 55) ^ s1 ^ (s1 << 14);
- xorshifto_seed[1] = xoroshiro_rotl (s1, 36);
+ seed[0] = xoroshiro_rotl(s0, 55) ^ s1 ^ (s1 << 14);
+ seed[1] = xoroshiro_rotl (s1, 36);
return rspamd_double_from_int64 (result);
}
diff --git a/src/libutil/util.h b/src/libutil/util.h
index 7c9eb5f91..c482a2d9f 100644
--- a/src/libutil/util.h
+++ b/src/libutil/util.h
@@ -424,7 +424,7 @@ gdouble rspamd_random_double (void);
* @return
*/
gdouble rspamd_random_double_fast (void);
-
+gdouble rspamd_random_double_fast_seed (guint64 seed[2]);
guint64 rspamd_random_uint64_fast (void);
/**
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 97523a1fa..4293ef178 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -2103,6 +2103,8 @@ struct lua_tree_cb_data {
int i;
gint mask;
gint need_images;
+ gdouble skip_prob;
+ guint64 xoroshiro_state[2];
};
static void
@@ -2117,6 +2119,14 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
return;
}
+ if (cb->skip_prob > 0) {
+ gdouble coin = rspamd_random_double_fast_seed (cb->xoroshiro_state);
+
+ if (coin < cb->skip_prob) {
+ return;
+ }
+ }
+
lua_url = lua_newuserdata (cb->L, sizeof (struct rspamd_lua_url));
rspamd_lua_setclass (cb->L, "rspamd{url}", -1);
lua_url->url = url;
@@ -2124,6 +2134,26 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
}
}
+static inline gsize
+lua_task_urls_adjust_skip_prob (struct rspamd_task *task,
+ struct lua_tree_cb_data *cb, gsize sz, gsize max_urls)
+{
+ if (max_urls > 0 && sz > max_urls) {
+ cb->skip_prob = 1.0 - ((gdouble)max_urls) / (gdouble)sz;
+ /*
+ * Use task dependent probabilistic seed to ensure that
+ * consequent task:get_urls return the same list of urls
+ */
+ memcpy (&cb->xoroshiro_state[0], &task->task_timestamp,
+ MIN (sizeof (cb->xoroshiro_state[0]), sizeof (task->task_timestamp)));
+ memcpy (&cb->xoroshiro_state[1], MESSAGE_FIELD (task, digest),
+ sizeof (cb->xoroshiro_state[1]));
+ sz = max_urls;
+ }
+
+ return sz;
+}
+
static gint
lua_task_get_urls (lua_State * L)
{
@@ -2135,9 +2165,13 @@ lua_task_get_urls (lua_State * L)
PROTOCOL_FILE|PROTOCOL_FTP;
const gchar *cache_name = "emails+urls";
gboolean need_images = FALSE;
- gsize sz;
+ gsize sz, max_urls = 0;
if (task) {
+ if (task->cfg) {
+ max_urls = task->cfg->max_lua_urls;
+ }
+
if (task->message == NULL) {
lua_newtable (L);
@@ -2220,6 +2254,8 @@ lua_task_get_urls (lua_State * L)
sz = g_hash_table_size (MESSAGE_FIELD (task, urls)) +
g_hash_table_size (MESSAGE_FIELD (task, emails));
+ sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls);
+
if (protocols_mask == (default_mask|PROTOCOL_MAILTO)) {
/* Can use cached version */
if (!lua_task_get_cached (L, task, cache_name)) {
@@ -2250,6 +2286,7 @@ lua_task_get_urls (lua_State * L)
}
sz = g_hash_table_size (MESSAGE_FIELD (task, urls));
+ sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls);
if (protocols_mask == (default_mask)) {
if (!lua_task_get_cached (L, task, cache_name)) {
@@ -2279,6 +2316,7 @@ lua_task_has_urls (lua_State * L)
LUA_TRACE_POINT;
struct rspamd_task *task = lua_check_task (L, 1);
gboolean need_emails = FALSE, ret = FALSE;
+ gsize sz = 0;
if (task) {
if (task->message) {
@@ -2287,10 +2325,12 @@ lua_task_has_urls (lua_State * L)
}
if (g_hash_table_size (MESSAGE_FIELD (task, urls)) > 0) {
+ sz += g_hash_table_size (MESSAGE_FIELD (task, urls));
ret = TRUE;
}
if (need_emails && g_hash_table_size (MESSAGE_FIELD (task, emails)) > 0) {
+ sz += g_hash_table_size (MESSAGE_FIELD (task, emails));
ret = TRUE;
}
}
@@ -2300,8 +2340,9 @@ lua_task_has_urls (lua_State * L)
}
lua_pushboolean (L, ret);
+ lua_pushinteger (L, sz);
- return 1;
+ return 2;
}
static gint
More information about the Commits
mailing list