commit 61f8601: [Rework] Kill surbl C module

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Aug 28 07:42:08 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-08-28 08:37:34 +0100
URL: https://github.com/rspamd/rspamd/commit/61f8601c3b987f7b24f9a61a2f622ee797368e03 (HEAD -> master)

[Rework] Kill surbl C module

---
 src/CMakeLists.txt                   |    5 +-
 src/libserver/protocol.c             |    6 +-
 src/libserver/url.h                  |    2 -
 src/plugins/lua/reputation.lua       |    1 -
 src/plugins/surbl.c                  | 2286 ----------------------------------
 test/functional/configs/plugins.conf |    2 +-
 6 files changed, 6 insertions(+), 2296 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1147eba9f..c5871b665 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -93,15 +93,14 @@ SET(RSPAMDSRC	controller.c
 				worker.c
 				rspamd_proxy.c)
 
-SET(PLUGINSSRC	plugins/surbl.c
-				plugins/regexp.c
+SET(PLUGINSSRC  plugins/regexp.c
 				plugins/chartable.c
 				plugins/fuzzy_check.c
 				plugins/spf.c
 				plugins/dkim_check.c
 				libserver/rspamd_control.c)
 
-SET(MODULES_LIST surbl regexp chartable fuzzy_check spf dkim)
+SET(MODULES_LIST regexp chartable fuzzy_check spf dkim)
 SET(WORKERS_LIST normal controller fuzzy rspamd_proxy)
 IF (ENABLE_HYPERSCAN MATCHES "ON")
 	LIST(APPEND WORKERS_LIST "hs_helper")
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index a7abc5947..f09784c48 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -875,9 +875,9 @@ rspamd_protocol_extended_url (struct rspamd_task *task,
 	elt = ucl_object_fromlstring (encoded, enclen);
 	ucl_object_insert_key (obj, elt, "url", 0, false);
 
-	if (url->surbllen > 0) {
-		elt = ucl_object_fromlstring (url->surbl, url->surbllen);
-		ucl_object_insert_key (obj, elt, "surbl", 0, false);
+	if (url->tldlen > 0) {
+		elt = ucl_object_fromlstring (url->tld, url->tldlen);
+		ucl_object_insert_key (obj, elt, "tld", 0, false);
 	}
 	if (url->hostlen > 0) {
 		elt = ucl_object_fromlstring (url->host, url->hostlen);
diff --git a/src/libserver/url.h b/src/libserver/url.h
index d9e15e212..ae21b6ab3 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -51,7 +51,6 @@ struct rspamd_url {
 	gchar *data;
 	gchar *query;
 	gchar *fragment;
-	gchar *surbl;
 	gchar *tld;
 	gchar *visible_part;
 
@@ -63,7 +62,6 @@ struct rspamd_url {
 	guint datalen;
 	guint querylen;
 	guint fragmentlen;
-	guint surbllen;
 	guint tldlen;
 	guint urllen;
 	guint rawlen;
diff --git a/src/plugins/lua/reputation.lua b/src/plugins/lua/reputation.lua
index 9e28e876b..4c92eab94 100644
--- a/src/plugins/lua/reputation.lua
+++ b/src/plugins/lua/reputation.lua
@@ -346,7 +346,6 @@ local url_selector = {
     outbound = true,
     inbound = true,
   },
-  dependencies = {"SURBL_REDIRECTOR_CALLBACK"},
   filter = url_reputation_filter, -- used to get scores
   idempotent = url_reputation_idempotent -- used to set scores
 }
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c
deleted file mode 100644
index 4113130db..000000000
--- a/src/plugins/surbl.c
+++ /dev/null
@@ -1,2286 +0,0 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/***MODULE:surbl
- * rspamd module that implements SURBL url checking
- *
- * Allowed options:
- * - weight (integer): weight of symbol
- * Redirecotor options:
- * - redirector (string): address of http redirector utility in format "host:port"
- * - redirector_connect_timeout (seconds): redirector connect timeout (default: 1s)
- * - redirector_read_timeout (seconds): timeout for reading data (default: 5s)
- * - redirector_hosts_map (map string): map that contains domains to check with redirector
- * Surbl options:
- * - exceptions (map string): map of domains that should be checked via surbl using 3 (e.g. somehost.domain.com)
- *   components of domain name instead of normal 2 (e.g. domain.com)
- * - whitelist (map string): map of domains that should be whitelisted for surbl checks
- * - max_urls (integer): maximum allowed number of urls in message to be checked
- * - suffix (string): surbl address (for example insecure-bl.rambler.ru), may contain %b if bits are used (read documentation about it)
- * - bit (string): describes a prefix for a single bit
- */
-
-#include "config.h"
-#include "libmime/message.h"
-#include "libutil/map.h"
-#include "libutil/map_helpers.h"
-#include "rspamd.h"
-#include "utlist.h"
-#include "multipattern.h"
-#include "monitored.h"
-#include "libserver/html.h"
-#include "libutil/http_private.h"
-#include "unix-std.h"
-#include "lua/lua_common.h"
-
-#define msg_err_surbl(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
-        "surbl", task->task_pool->tag.uid, \
-        G_STRFUNC, \
-        __VA_ARGS__)
-#define msg_warn_surbl(...)   rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
-        "surbl", task->task_pool->tag.uid, \
-        G_STRFUNC, \
-        __VA_ARGS__)
-#define msg_info_surbl(...)   rspamd_default_log_function (G_LOG_LEVEL_INFO, \
-        "surbl", task->task_pool->tag.uid, \
-        G_STRFUNC, \
-        __VA_ARGS__)
-#define msg_debug_surbl(...)  rspamd_conditional_debug_fast (NULL, task->from_addr, \
-        rspamd_surbl_log_id, "surbl", task->task_pool->tag.uid, \
-        G_STRFUNC, \
-        __VA_ARGS__)
-
-INIT_LOG_MODULE(surbl)
-
-static const gchar *M = "surbl";
-
-#define DEFAULT_SURBL_WEIGHT 10
-#define DEFAULT_REDIRECTOR_READ_TIMEOUT 5.0
-#define DEFAULT_SURBL_SYMBOL "SURBL_DNS"
-#define SURBL_OPTION_NOIP (1u << 0u)
-#define SURBL_OPTION_RESOLVEIP (1u << 1u)
-#define SURBL_OPTION_CHECKIMAGES (1u << 2u)
-#define SURBL_OPTION_CHECKDKIM (1u << 3u)
-#define SURBL_OPTION_FULLDOMAIN (1u << 4u)
-#define SURBL_OPTION_CHECKEMAILS (1u << 5u)
-#define MAX_LEVELS 10
-
-struct surbl_ctx {
-	struct module_ctx ctx;
-	guint16 weight;
-	gdouble read_timeout;
-	gboolean use_tags;
-	GList *suffixes;
-	const gchar *redirector_symbol;
-	GHashTable **exceptions;
-	struct rspamd_hash_map_helper *whitelist;
-	GHashTable *redirector_tlds;
-	guint use_redirector;
-	guint max_redirected_urls;
-	gint redirector_cbid;
-	struct upstream_list *redirectors;
-};
-
-struct suffix_item {
-	guint64 magic;
-	const gchar *monitored_domain;
-	const gchar *suffix;
-	const gchar *symbol;
-	GArray *bits;
-	GHashTable *ips;
-	struct rspamd_monitored *m;
-	guint32 options;
-	gboolean reported_offline;
-	gint callback_id;
-	gint url_process_cbref;
-};
-
-struct dns_param {
-	struct rspamd_url *url;
-	struct rspamd_task *task;
-	gchar *host_resolve;
-	gchar *host_orig; /* Name with no uribl suffix */
-	struct suffix_item *suffix;
-	struct rspamd_symcache_item *item;
-	struct surbl_module_ctx *ctx;
-};
-
-struct redirector_param {
-	struct rspamd_url *url;
-	struct rspamd_task *task;
-	struct upstream *redirector;
-	struct surbl_ctx *ctx;
-	struct rspamd_http_connection *conn;
-	GHashTable *tree;
-	struct suffix_item *suffix;
-	struct rspamd_symcache_item *item;
-	guint redirector_requests;
-};
-
-struct surbl_bit_item {
-	guint32 bit;
-	gchar *symbol;
-};
-
-#define SURBL_REDIRECTOR_CALLBACK "SURBL_REDIRECTOR_CALLBACK"
-
-static const guint64 rspamd_surbl_cb_magic = 0xe09b8536f80de0d1ULL;
-static const gchar *rspamd_surbl_default_monitored = "facebook.com";
-static const guint default_max_redirected_urls = 10;
-
-static void surbl_test_url (struct rspamd_task *task,
-							struct rspamd_symcache_item *item,
-							void *user_data);
-static void surbl_test_redirector (struct rspamd_task *task,
-								   struct rspamd_symcache_item *item,
-								   void *user_data);
-static void surbl_dns_callback (struct rdns_reply *reply, gpointer arg);
-static void surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg);
-static void process_dns_results (struct rspamd_task *task,
-	struct suffix_item *suffix, gchar *resolved_name,
-	guint32 addr, struct rspamd_url *url);
-static gint surbl_register_redirect_handler (lua_State *L);
-static gint surbl_continue_process_handler (lua_State *L);
-static gint surbl_is_redirector_handler (lua_State *L);
-
-#define NO_REGEXP (gpointer) - 1
-
-#define SURBL_ERROR surbl_error_quark ()
-#define WHITELIST_ERROR 0
-#define CONVERSION_ERROR 1
-#define DUPLICATE_ERROR 1
-
-GQuark
-surbl_error_quark (void)
-{
-	return g_quark_from_static_string ("surbl-error-quark");
-}
-
-/* Initialization */
-gint surbl_module_init (struct rspamd_config *cfg, struct module_ctx **ctx);
-gint surbl_module_config (struct rspamd_config *cfg);
-gint surbl_module_reconfig (struct rspamd_config *cfg);
-
-module_t surbl_module = {
-		"surbl",
-		surbl_module_init,
-		surbl_module_config,
-		surbl_module_reconfig,
-		NULL,
-		RSPAMD_MODULE_VER,
-		(guint)-1,
-};
-
-static inline struct surbl_ctx *
-surbl_get_context (struct rspamd_config *cfg)
-{
-	return (struct surbl_ctx *)g_ptr_array_index (cfg->c_modules,
-			surbl_module.ctx_offset);
-}
-
-static void
-exceptions_free_value (gpointer v)
-{
-	rspamd_ftok_t *val = v;
-
-	g_free ((gpointer)val->begin);
-	g_free (val);
-}
-
-static void
-exception_insert (gpointer st, gconstpointer key, gconstpointer value)
-{
-	GHashTable **t = st;
-	gint level = 0;
-	const gchar *p = key;
-	rspamd_ftok_t *val;
-
-	while (*p) {
-		if (*p == '.') {
-			level++;
-		}
-		p++;
-	}
-	if (level >= MAX_LEVELS) {
-		msg_err ("invalid domain in exceptions list: %s, levels: %d",
-			(gchar *)key,
-			level);
-		return;
-	}
-
-	val = g_malloc (sizeof (rspamd_ftok_t));
-	val->begin = g_strdup (key);
-	val->len = strlen (key);
-
-	if (t[level] == NULL) {
-		t[level] = g_hash_table_new_full (rspamd_ftok_icase_hash,
-				rspamd_ftok_icase_equal,
-				exceptions_free_value,
-				g_free);
-	}
-
-	g_hash_table_replace (t[level], val, g_strdup (value));
-}
-
-static gchar *
-read_exceptions_list (gchar * chunk,
-	gint len,
-	struct map_cb_data *data,
-	gboolean final)
-{
-	GHashTable **t;
-	guint i;
-
-	if (data->cur_data == NULL) {
-		t = data->prev_data;
-
-		if (t) {
-			for (i = 0; i < MAX_LEVELS; i++) {
-				if (t[i] != NULL) {
-					g_hash_table_destroy (t[i]);
-				}
-				t[i] = NULL;
-			}
-
-			g_free (t);
-		}
-
-		data->prev_data = NULL;
-		data->cur_data = g_malloc0 (MAX_LEVELS * sizeof (GHashTable *));
-	}
-
-	return rspamd_parse_kv_list (
-			   chunk,
-			   len,
-			   data,
-			   exception_insert,
-			   "",
-			   final);
-}
-
-static void
-fin_exceptions_list (struct map_cb_data *data, void **target)
-{
-	GHashTable **t;
-	gint i;
-
-	if (target) {
-		*target = data->cur_data;
-	}
-
-	if (data->prev_data) {
-		t = data->prev_data;
-		for (i = 0; i < MAX_LEVELS; i++) {
-			if (t[i] != NULL) {
-				rspamd_default_log_function (G_LOG_LEVEL_DEBUG,
-						"surbl", "",
-						G_STRFUNC,
-						"exceptions level %d: %d elements",
-						i, g_hash_table_size (t[i]));
-			}
-		}
-	}
-}
-
-static void
-dtor_exceptions_list (struct map_cb_data *data)
-{
-	GHashTable **t;
-	gint i;
-
-	if (data->cur_data) {
-		t = data->cur_data;
-		for (i = 0; i < MAX_LEVELS; i++) {
-			if (t[i] != NULL) {
-				g_hash_table_destroy (t[i]);
-			}
-			t[i] = NULL;
-		}
-
-		g_free (t);
-	}
-}
-
-static void
-redirector_insert (gpointer st, gconstpointer key, gconstpointer value)
-{
-	GHashTable *tld_hash = st;
-	const gchar *p = key, *begin = key;
-	rspamd_fstring_t *pat;
-	rspamd_ftok_t *tok;
-	rspamd_regexp_t *re = NO_REGEXP;
-	GError *err = NULL;
-
-	while (*p && !g_ascii_isspace (*p)) {
-		p++;
-	}
-
-	pat = rspamd_fstring_new_init (begin, p - begin);
-	tok = g_malloc0 (sizeof (*tok));
-	tok->begin = pat->str;
-	tok->len = pat->len;
-
-	if (g_ascii_isspace (*p)) {
-		while (g_ascii_isspace (*p) && *p) {
-			p++;
-		}
-		if (*p) {
-			re = rspamd_regexp_new (p,
-					"ir",
-					&err);
-			if (re == NULL) {
-				msg_warn ("could not read regexp: %e while reading regexp %s",
-					err,
-					p);
-				g_error_free (err);
-				re = NO_REGEXP;
-			}
-		}
-	}
-
-	g_hash_table_replace (tld_hash, tok, re);
-}
-
-static void
-redirector_item_free (gpointer p)
-{
-	rspamd_regexp_t *re;
-
-	if (p != NULL && p != NO_REGEXP) {
-		re = (rspamd_regexp_t *)p;
-		rspamd_regexp_unref (re);
-	}
-}
-
-static gchar *
-read_redirectors_list (gchar * chunk,
-	gint len,
-	struct map_cb_data *data,
-	gboolean final)
-{
-	GHashTable *tld_hash;
-
-	if (data->cur_data == NULL) {
-		tld_hash  = g_hash_table_new_full (rspamd_ftok_icase_hash,
-				rspamd_ftok_icase_equal,
-				rspamd_fstring_mapped_ftok_free,
-				redirector_item_free);
-
-		data->cur_data = tld_hash;
-	}
-
-	return rspamd_parse_kv_list (
-			   chunk,
-			   len,
-			   data,
-			   redirector_insert,
-			   "",
-			   final);
-}
-
-static void
-fin_redirectors_list (struct map_cb_data *data, void **target)
-{
-	GHashTable *tld_hash;
-
-	if (target) {
-		*target = data->cur_data;
-	}
-
-	if (data->prev_data) {
-		tld_hash = data->prev_data;
-
-		g_hash_table_unref (tld_hash);
-	}
-}
-
-static void
-dtor_redirectors_list (struct map_cb_data *data)
-{
-	GHashTable *tld_hash;
-
-	if (data->cur_data) {
-		tld_hash = data->cur_data;
-
-		g_hash_table_unref (tld_hash);
-	}
-}
-
-gint
-surbl_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
-{
-	struct surbl_ctx *surbl_module_ctx;
-
-	surbl_module_ctx = rspamd_mempool_alloc0 (cfg->cfg_pool,
-			sizeof (struct surbl_ctx));
-
-	surbl_module_ctx->use_redirector = 0;
-	surbl_module_ctx->suffixes = NULL;
-
-	surbl_module_ctx->redirectors = NULL;
-	surbl_module_ctx->whitelist = NULL;
-	surbl_module_ctx->exceptions = NULL;
-	surbl_module_ctx->redirector_cbid = -1;
-
-
-	*ctx = (struct module_ctx *)surbl_module_ctx;
-
-	rspamd_rcl_add_doc_by_path (cfg,
-			NULL,
-			"URL blacklist plugin",
-			"surbl",
-			UCL_OBJECT,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl",
-			"List of redirector servers",
-			"redirector",
-			UCL_STRING,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl",
-			"Map of domains that should be checked with redirector",
-			"redirector_hosts_map",
-			UCL_STRING,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl",
-			"Connect timeout for redirector",
-			"redirector_connect_timeout",
-			UCL_TIME,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl",
-			"Read timeout for redirector",
-			"redirector_read_timeout",
-			UCL_TIME,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl",
-			"Maximum number of URLs to process per message",
-			"max_urls",
-			UCL_INT,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl",
-			"Rules for TLD composition",
-			"exceptions",
-			UCL_STRING,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl",
-			"Map of whitelisted domains",
-			"whitelist",
-			UCL_STRING,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl",
-			"URL blacklist rule",
-			"rule",
-			UCL_OBJECT,
-			NULL,
-			0,
-			NULL,
-			0);
-	/* Rules doc strings */
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl.rule",
-			"Name of DNS black list (e.g. `multi.surbl.com`)",
-			"suffix",
-			UCL_STRING,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl.rule",
-			"Symbol to insert (if no bits or suffixes are defined)",
-			"symbol",
-			UCL_STRING,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl.rule",
-			"Whether the defined rule should be used",
-			"enabled",
-			UCL_BOOLEAN,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl.rule",
-			"Do not try to check URLs with IP address instead of hostname",
-			"no_ip",
-			UCL_BOOLEAN,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl.rule",
-			"Resolve URL host and then check against the specified suffix with reversed IP octets",
-			"resolve_ip",
-			UCL_BOOLEAN,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl.rule",
-			"Check images URLs with this URL list",
-			"images",
-			UCL_BOOLEAN,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl.rule",
-			"Parse IP bits in DNS reply, the content is 'symbol = <bit>'",
-			"bits",
-			UCL_OBJECT,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl.rule",
-			"Parse IP addresses in DNS reply, the content is 'symbol = address'",
-			"ips",
-			UCL_OBJECT,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl.rule",
-			"Check domains in valid DKIM signatures",
-			"check_dkim",
-			UCL_BOOLEAN,
-			NULL,
-			0,
-			NULL,
-			0);
-	rspamd_rcl_add_doc_by_path (cfg,
-			"surbl.rule",
-			"Check full domain name instead of eSLD",
-			"full_domain",
-			UCL_BOOLEAN,
-			NULL,
*** OUTPUT TRUNCATED, 1689 LINES SKIPPED ***


More information about the Commits mailing list