commit a49a31b: [Feature] Add limit for number of URLs in Lua

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Oct 11 12:14:08 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-10-11 13:00:24 +0100
URL: https://github.com/rspamd/rspamd/commit/a49a31b3f72d9a744c3bb53f60f7f39614eb7f35 (HEAD -> master)

[Feature] Add limit for number of URLs in Lua

---
 src/libserver/cfg_file.h  |  1 +
 src/libserver/cfg_rcl.c   |  6 ++++++
 src/libserver/cfg_utils.c |  1 +
 src/libutil/util.c        | 15 ++++++++++-----
 src/libutil/util.h        |  2 +-
 src/lua/lua_task.c        | 45 +++++++++++++++++++++++++++++++++++++++++++--
 6 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index f22b0cefc..4eea4db16 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -462,6 +462,7 @@ struct rspamd_config {
 	guint lua_gc_step;                                /**< lua gc step 										*/
 	guint lua_gc_pause;                                /**< lua gc pause										*/
 	guint full_gc_iters;                            /**< iterations between full gc cycle					*/
+	guint max_lua_urls;                             /**< maximum number of urls to be passed to Lua			*/
 
 	GList *classify_headers;                        /**< list of headers using for statistics				*/
 	struct module_s **compiled_modules;                /**< list of compiled C modules							*/
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index cdf3d9b5b..d409e40e8 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -2195,6 +2195,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections)
 				RSPAMD_CL_FLAG_INT_32,
 				"Maximum count of heartbeats to be lost before trying to "
 				"terminate a worker (default: 0 - disabled)");
+		rspamd_rcl_add_default_handler (sub,
+				"max_lua_urls",
+				rspamd_rcl_parse_struct_integer,
+				G_STRUCT_OFFSET (struct rspamd_config, max_lua_urls),
+				RSPAMD_CL_FLAG_INT_32,
+				"Maximum count of URLs to pass to Lua to avoid DoS");
 
 		/* Neighbours configuration */
 		rspamd_rcl_add_section_doc (&sub->subsections, "neighbours", "name",
diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c
index be709a051..5851e250f 100644
--- a/src/libserver/cfg_utils.c
+++ b/src/libserver/cfg_utils.c
@@ -195,6 +195,7 @@ rspamd_config_new (enum rspamd_config_init_flags flags)
 	cfg->log_error_elts = 10;
 	cfg->log_error_elt_maxlen = 1000;
 	cfg->cache_reload_time = 30.0;
+	cfg->max_lua_urls = 1024;
 
 	/* Default log line */
 	cfg->log_format_str = "id: <$mid>,$if_qid{ qid: <$>,}$if_ip{ ip: $,}"
diff --git a/src/libutil/util.c b/src/libutil/util.c
index 55696c212..ecdd7b2b9 100644
--- a/src/libutil/util.c
+++ b/src/libutil/util.c
@@ -2624,17 +2624,22 @@ xoroshiro_rotl (const guint64 x, int k) {
 	return (x << k) | (x >> (64 - k));
 }
 
-
 gdouble
 rspamd_random_double_fast (void)
 {
-	const guint64 s0 = xorshifto_seed[0];
-	guint64 s1 = xorshifto_seed[1];
+	return rspamd_random_double_fast_seed (xorshifto_seed);
+}
+
+gdouble
+rspamd_random_double_fast_seed (guint64 seed[2])
+{
+	const guint64 s0 = seed[0];
+	guint64 s1 = seed[1];
 	const guint64 result = s0 + s1;
 
 	s1 ^= s0;
-	xorshifto_seed[0] = xoroshiro_rotl(s0, 55) ^ s1 ^ (s1 << 14);
-	xorshifto_seed[1] = xoroshiro_rotl (s1, 36);
+	seed[0] = xoroshiro_rotl(s0, 55) ^ s1 ^ (s1 << 14);
+	seed[1] = xoroshiro_rotl (s1, 36);
 
 	return rspamd_double_from_int64 (result);
 }
diff --git a/src/libutil/util.h b/src/libutil/util.h
index 7c9eb5f91..c482a2d9f 100644
--- a/src/libutil/util.h
+++ b/src/libutil/util.h
@@ -424,7 +424,7 @@ gdouble rspamd_random_double (void);
  * @return
  */
 gdouble rspamd_random_double_fast (void);
-
+gdouble rspamd_random_double_fast_seed (guint64 seed[2]);
 guint64 rspamd_random_uint64_fast (void);
 
 /**
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 97523a1fa..4293ef178 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -2103,6 +2103,8 @@ struct lua_tree_cb_data {
 	int i;
 	gint mask;
 	gint need_images;
+	gdouble skip_prob;
+	guint64 xoroshiro_state[2];
 };
 
 static void
@@ -2117,6 +2119,14 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
 			return;
 		}
 
+		if (cb->skip_prob > 0) {
+			gdouble coin = rspamd_random_double_fast_seed (cb->xoroshiro_state);
+
+			if (coin < cb->skip_prob) {
+				return;
+			}
+		}
+
 		lua_url = lua_newuserdata (cb->L, sizeof (struct rspamd_lua_url));
 		rspamd_lua_setclass (cb->L, "rspamd{url}", -1);
 		lua_url->url = url;
@@ -2124,6 +2134,26 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
 	}
 }
 
+static inline gsize
+lua_task_urls_adjust_skip_prob (struct rspamd_task *task,
+		struct lua_tree_cb_data *cb, gsize sz, gsize max_urls)
+{
+	if (max_urls > 0 && sz > max_urls) {
+		cb->skip_prob = 1.0 - ((gdouble)max_urls) / (gdouble)sz;
+		/*
+		 * Use task dependent probabilistic seed to ensure that
+		 * consequent task:get_urls return the same list of urls
+		 */
+		memcpy (&cb->xoroshiro_state[0], &task->task_timestamp,
+				MIN (sizeof (cb->xoroshiro_state[0]), sizeof (task->task_timestamp)));
+		memcpy (&cb->xoroshiro_state[1], MESSAGE_FIELD (task, digest),
+				sizeof (cb->xoroshiro_state[1]));
+		sz = max_urls;
+	}
+
+	return sz;
+}
+
 static gint
 lua_task_get_urls (lua_State * L)
 {
@@ -2135,9 +2165,13 @@ lua_task_get_urls (lua_State * L)
 			PROTOCOL_FILE|PROTOCOL_FTP;
 	const gchar *cache_name = "emails+urls";
 	gboolean need_images = FALSE;
-	gsize sz;
+	gsize sz, max_urls = 0;
 
 	if (task) {
+		if (task->cfg) {
+			max_urls = task->cfg->max_lua_urls;
+		}
+
 		if (task->message == NULL) {
 			lua_newtable (L);
 
@@ -2220,6 +2254,8 @@ lua_task_get_urls (lua_State * L)
 			sz = g_hash_table_size (MESSAGE_FIELD (task, urls)) +
 					g_hash_table_size (MESSAGE_FIELD (task, emails));
 
+			sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls);
+
 			if (protocols_mask == (default_mask|PROTOCOL_MAILTO)) {
 				/* Can use cached version */
 				if (!lua_task_get_cached (L, task, cache_name)) {
@@ -2250,6 +2286,7 @@ lua_task_get_urls (lua_State * L)
 			}
 
 			sz = g_hash_table_size (MESSAGE_FIELD (task, urls));
+			sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls);
 
 			if (protocols_mask == (default_mask)) {
 				if (!lua_task_get_cached (L, task, cache_name)) {
@@ -2279,6 +2316,7 @@ lua_task_has_urls (lua_State * L)
 	LUA_TRACE_POINT;
 	struct rspamd_task *task = lua_check_task (L, 1);
 	gboolean need_emails = FALSE, ret = FALSE;
+	gsize sz = 0;
 
 	if (task) {
 		if (task->message) {
@@ -2287,10 +2325,12 @@ lua_task_has_urls (lua_State * L)
 			}
 
 			if (g_hash_table_size (MESSAGE_FIELD (task, urls)) > 0) {
+				sz += g_hash_table_size (MESSAGE_FIELD (task, urls));
 				ret = TRUE;
 			}
 
 			if (need_emails && g_hash_table_size (MESSAGE_FIELD (task, emails)) > 0) {
+				sz += g_hash_table_size (MESSAGE_FIELD (task, emails));
 				ret = TRUE;
 			}
 		}
@@ -2300,8 +2340,9 @@ lua_task_has_urls (lua_State * L)
 	}
 
 	lua_pushboolean (L, ret);
+	lua_pushinteger (L, sz);
 
-	return 1;
+	return 2;
 }
 
 static gint


More information about the Commits mailing list