commit 61f8601: [Rework] Kill surbl C module
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Aug 28 07:42:08 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-08-28 08:37:34 +0100
URL: https://github.com/rspamd/rspamd/commit/61f8601c3b987f7b24f9a61a2f622ee797368e03 (HEAD -> master)
[Rework] Kill surbl C module
---
src/CMakeLists.txt | 5 +-
src/libserver/protocol.c | 6 +-
src/libserver/url.h | 2 -
src/plugins/lua/reputation.lua | 1 -
src/plugins/surbl.c | 2286 ----------------------------------
test/functional/configs/plugins.conf | 2 +-
6 files changed, 6 insertions(+), 2296 deletions(-)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1147eba9f..c5871b665 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -93,15 +93,14 @@ SET(RSPAMDSRC controller.c
worker.c
rspamd_proxy.c)
-SET(PLUGINSSRC plugins/surbl.c
- plugins/regexp.c
+SET(PLUGINSSRC plugins/regexp.c
plugins/chartable.c
plugins/fuzzy_check.c
plugins/spf.c
plugins/dkim_check.c
libserver/rspamd_control.c)
-SET(MODULES_LIST surbl regexp chartable fuzzy_check spf dkim)
+SET(MODULES_LIST regexp chartable fuzzy_check spf dkim)
SET(WORKERS_LIST normal controller fuzzy rspamd_proxy)
IF (ENABLE_HYPERSCAN MATCHES "ON")
LIST(APPEND WORKERS_LIST "hs_helper")
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index a7abc5947..f09784c48 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -875,9 +875,9 @@ rspamd_protocol_extended_url (struct rspamd_task *task,
elt = ucl_object_fromlstring (encoded, enclen);
ucl_object_insert_key (obj, elt, "url", 0, false);
- if (url->surbllen > 0) {
- elt = ucl_object_fromlstring (url->surbl, url->surbllen);
- ucl_object_insert_key (obj, elt, "surbl", 0, false);
+ if (url->tldlen > 0) {
+ elt = ucl_object_fromlstring (url->tld, url->tldlen);
+ ucl_object_insert_key (obj, elt, "tld", 0, false);
}
if (url->hostlen > 0) {
elt = ucl_object_fromlstring (url->host, url->hostlen);
diff --git a/src/libserver/url.h b/src/libserver/url.h
index d9e15e212..ae21b6ab3 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -51,7 +51,6 @@ struct rspamd_url {
gchar *data;
gchar *query;
gchar *fragment;
- gchar *surbl;
gchar *tld;
gchar *visible_part;
@@ -63,7 +62,6 @@ struct rspamd_url {
guint datalen;
guint querylen;
guint fragmentlen;
- guint surbllen;
guint tldlen;
guint urllen;
guint rawlen;
diff --git a/src/plugins/lua/reputation.lua b/src/plugins/lua/reputation.lua
index 9e28e876b..4c92eab94 100644
--- a/src/plugins/lua/reputation.lua
+++ b/src/plugins/lua/reputation.lua
@@ -346,7 +346,6 @@ local url_selector = {
outbound = true,
inbound = true,
},
- dependencies = {"SURBL_REDIRECTOR_CALLBACK"},
filter = url_reputation_filter, -- used to get scores
idempotent = url_reputation_idempotent -- used to set scores
}
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c
deleted file mode 100644
index 4113130db..000000000
--- a/src/plugins/surbl.c
+++ /dev/null
@@ -1,2286 +0,0 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/***MODULE:surbl
- * rspamd module that implements SURBL url checking
- *
- * Allowed options:
- * - weight (integer): weight of symbol
- * Redirecotor options:
- * - redirector (string): address of http redirector utility in format "host:port"
- * - redirector_connect_timeout (seconds): redirector connect timeout (default: 1s)
- * - redirector_read_timeout (seconds): timeout for reading data (default: 5s)
- * - redirector_hosts_map (map string): map that contains domains to check with redirector
- * Surbl options:
- * - exceptions (map string): map of domains that should be checked via surbl using 3 (e.g. somehost.domain.com)
- * components of domain name instead of normal 2 (e.g. domain.com)
- * - whitelist (map string): map of domains that should be whitelisted for surbl checks
- * - max_urls (integer): maximum allowed number of urls in message to be checked
- * - suffix (string): surbl address (for example insecure-bl.rambler.ru), may contain %b if bits are used (read documentation about it)
- * - bit (string): describes a prefix for a single bit
- */
-
-#include "config.h"
-#include "libmime/message.h"
-#include "libutil/map.h"
-#include "libutil/map_helpers.h"
-#include "rspamd.h"
-#include "utlist.h"
-#include "multipattern.h"
-#include "monitored.h"
-#include "libserver/html.h"
-#include "libutil/http_private.h"
-#include "unix-std.h"
-#include "lua/lua_common.h"
-
-#define msg_err_surbl(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
- "surbl", task->task_pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-#define msg_warn_surbl(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
- "surbl", task->task_pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-#define msg_info_surbl(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
- "surbl", task->task_pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-#define msg_debug_surbl(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \
- rspamd_surbl_log_id, "surbl", task->task_pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-
-INIT_LOG_MODULE(surbl)
-
-static const gchar *M = "surbl";
-
-#define DEFAULT_SURBL_WEIGHT 10
-#define DEFAULT_REDIRECTOR_READ_TIMEOUT 5.0
-#define DEFAULT_SURBL_SYMBOL "SURBL_DNS"
-#define SURBL_OPTION_NOIP (1u << 0u)
-#define SURBL_OPTION_RESOLVEIP (1u << 1u)
-#define SURBL_OPTION_CHECKIMAGES (1u << 2u)
-#define SURBL_OPTION_CHECKDKIM (1u << 3u)
-#define SURBL_OPTION_FULLDOMAIN (1u << 4u)
-#define SURBL_OPTION_CHECKEMAILS (1u << 5u)
-#define MAX_LEVELS 10
-
-struct surbl_ctx {
- struct module_ctx ctx;
- guint16 weight;
- gdouble read_timeout;
- gboolean use_tags;
- GList *suffixes;
- const gchar *redirector_symbol;
- GHashTable **exceptions;
- struct rspamd_hash_map_helper *whitelist;
- GHashTable *redirector_tlds;
- guint use_redirector;
- guint max_redirected_urls;
- gint redirector_cbid;
- struct upstream_list *redirectors;
-};
-
-struct suffix_item {
- guint64 magic;
- const gchar *monitored_domain;
- const gchar *suffix;
- const gchar *symbol;
- GArray *bits;
- GHashTable *ips;
- struct rspamd_monitored *m;
- guint32 options;
- gboolean reported_offline;
- gint callback_id;
- gint url_process_cbref;
-};
-
-struct dns_param {
- struct rspamd_url *url;
- struct rspamd_task *task;
- gchar *host_resolve;
- gchar *host_orig; /* Name with no uribl suffix */
- struct suffix_item *suffix;
- struct rspamd_symcache_item *item;
- struct surbl_module_ctx *ctx;
-};
-
-struct redirector_param {
- struct rspamd_url *url;
- struct rspamd_task *task;
- struct upstream *redirector;
- struct surbl_ctx *ctx;
- struct rspamd_http_connection *conn;
- GHashTable *tree;
- struct suffix_item *suffix;
- struct rspamd_symcache_item *item;
- guint redirector_requests;
-};
-
-struct surbl_bit_item {
- guint32 bit;
- gchar *symbol;
-};
-
-#define SURBL_REDIRECTOR_CALLBACK "SURBL_REDIRECTOR_CALLBACK"
-
-static const guint64 rspamd_surbl_cb_magic = 0xe09b8536f80de0d1ULL;
-static const gchar *rspamd_surbl_default_monitored = "facebook.com";
-static const guint default_max_redirected_urls = 10;
-
-static void surbl_test_url (struct rspamd_task *task,
- struct rspamd_symcache_item *item,
- void *user_data);
-static void surbl_test_redirector (struct rspamd_task *task,
- struct rspamd_symcache_item *item,
- void *user_data);
-static void surbl_dns_callback (struct rdns_reply *reply, gpointer arg);
-static void surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg);
-static void process_dns_results (struct rspamd_task *task,
- struct suffix_item *suffix, gchar *resolved_name,
- guint32 addr, struct rspamd_url *url);
-static gint surbl_register_redirect_handler (lua_State *L);
-static gint surbl_continue_process_handler (lua_State *L);
-static gint surbl_is_redirector_handler (lua_State *L);
-
-#define NO_REGEXP (gpointer) - 1
-
-#define SURBL_ERROR surbl_error_quark ()
-#define WHITELIST_ERROR 0
-#define CONVERSION_ERROR 1
-#define DUPLICATE_ERROR 1
-
-GQuark
-surbl_error_quark (void)
-{
- return g_quark_from_static_string ("surbl-error-quark");
-}
-
-/* Initialization */
-gint surbl_module_init (struct rspamd_config *cfg, struct module_ctx **ctx);
-gint surbl_module_config (struct rspamd_config *cfg);
-gint surbl_module_reconfig (struct rspamd_config *cfg);
-
-module_t surbl_module = {
- "surbl",
- surbl_module_init,
- surbl_module_config,
- surbl_module_reconfig,
- NULL,
- RSPAMD_MODULE_VER,
- (guint)-1,
-};
-
-static inline struct surbl_ctx *
-surbl_get_context (struct rspamd_config *cfg)
-{
- return (struct surbl_ctx *)g_ptr_array_index (cfg->c_modules,
- surbl_module.ctx_offset);
-}
-
-static void
-exceptions_free_value (gpointer v)
-{
- rspamd_ftok_t *val = v;
-
- g_free ((gpointer)val->begin);
- g_free (val);
-}
-
-static void
-exception_insert (gpointer st, gconstpointer key, gconstpointer value)
-{
- GHashTable **t = st;
- gint level = 0;
- const gchar *p = key;
- rspamd_ftok_t *val;
-
- while (*p) {
- if (*p == '.') {
- level++;
- }
- p++;
- }
- if (level >= MAX_LEVELS) {
- msg_err ("invalid domain in exceptions list: %s, levels: %d",
- (gchar *)key,
- level);
- return;
- }
-
- val = g_malloc (sizeof (rspamd_ftok_t));
- val->begin = g_strdup (key);
- val->len = strlen (key);
-
- if (t[level] == NULL) {
- t[level] = g_hash_table_new_full (rspamd_ftok_icase_hash,
- rspamd_ftok_icase_equal,
- exceptions_free_value,
- g_free);
- }
-
- g_hash_table_replace (t[level], val, g_strdup (value));
-}
-
-static gchar *
-read_exceptions_list (gchar * chunk,
- gint len,
- struct map_cb_data *data,
- gboolean final)
-{
- GHashTable **t;
- guint i;
-
- if (data->cur_data == NULL) {
- t = data->prev_data;
-
- if (t) {
- for (i = 0; i < MAX_LEVELS; i++) {
- if (t[i] != NULL) {
- g_hash_table_destroy (t[i]);
- }
- t[i] = NULL;
- }
-
- g_free (t);
- }
-
- data->prev_data = NULL;
- data->cur_data = g_malloc0 (MAX_LEVELS * sizeof (GHashTable *));
- }
-
- return rspamd_parse_kv_list (
- chunk,
- len,
- data,
- exception_insert,
- "",
- final);
-}
-
-static void
-fin_exceptions_list (struct map_cb_data *data, void **target)
-{
- GHashTable **t;
- gint i;
-
- if (target) {
- *target = data->cur_data;
- }
-
- if (data->prev_data) {
- t = data->prev_data;
- for (i = 0; i < MAX_LEVELS; i++) {
- if (t[i] != NULL) {
- rspamd_default_log_function (G_LOG_LEVEL_DEBUG,
- "surbl", "",
- G_STRFUNC,
- "exceptions level %d: %d elements",
- i, g_hash_table_size (t[i]));
- }
- }
- }
-}
-
-static void
-dtor_exceptions_list (struct map_cb_data *data)
-{
- GHashTable **t;
- gint i;
-
- if (data->cur_data) {
- t = data->cur_data;
- for (i = 0; i < MAX_LEVELS; i++) {
- if (t[i] != NULL) {
- g_hash_table_destroy (t[i]);
- }
- t[i] = NULL;
- }
-
- g_free (t);
- }
-}
-
-static void
-redirector_insert (gpointer st, gconstpointer key, gconstpointer value)
-{
- GHashTable *tld_hash = st;
- const gchar *p = key, *begin = key;
- rspamd_fstring_t *pat;
- rspamd_ftok_t *tok;
- rspamd_regexp_t *re = NO_REGEXP;
- GError *err = NULL;
-
- while (*p && !g_ascii_isspace (*p)) {
- p++;
- }
-
- pat = rspamd_fstring_new_init (begin, p - begin);
- tok = g_malloc0 (sizeof (*tok));
- tok->begin = pat->str;
- tok->len = pat->len;
-
- if (g_ascii_isspace (*p)) {
- while (g_ascii_isspace (*p) && *p) {
- p++;
- }
- if (*p) {
- re = rspamd_regexp_new (p,
- "ir",
- &err);
- if (re == NULL) {
- msg_warn ("could not read regexp: %e while reading regexp %s",
- err,
- p);
- g_error_free (err);
- re = NO_REGEXP;
- }
- }
- }
-
- g_hash_table_replace (tld_hash, tok, re);
-}
-
-static void
-redirector_item_free (gpointer p)
-{
- rspamd_regexp_t *re;
-
- if (p != NULL && p != NO_REGEXP) {
- re = (rspamd_regexp_t *)p;
- rspamd_regexp_unref (re);
- }
-}
-
-static gchar *
-read_redirectors_list (gchar * chunk,
- gint len,
- struct map_cb_data *data,
- gboolean final)
-{
- GHashTable *tld_hash;
-
- if (data->cur_data == NULL) {
- tld_hash = g_hash_table_new_full (rspamd_ftok_icase_hash,
- rspamd_ftok_icase_equal,
- rspamd_fstring_mapped_ftok_free,
- redirector_item_free);
-
- data->cur_data = tld_hash;
- }
-
- return rspamd_parse_kv_list (
- chunk,
- len,
- data,
- redirector_insert,
- "",
- final);
-}
-
-static void
-fin_redirectors_list (struct map_cb_data *data, void **target)
-{
- GHashTable *tld_hash;
-
- if (target) {
- *target = data->cur_data;
- }
-
- if (data->prev_data) {
- tld_hash = data->prev_data;
-
- g_hash_table_unref (tld_hash);
- }
-}
-
-static void
-dtor_redirectors_list (struct map_cb_data *data)
-{
- GHashTable *tld_hash;
-
- if (data->cur_data) {
- tld_hash = data->cur_data;
-
- g_hash_table_unref (tld_hash);
- }
-}
-
-gint
-surbl_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
-{
- struct surbl_ctx *surbl_module_ctx;
-
- surbl_module_ctx = rspamd_mempool_alloc0 (cfg->cfg_pool,
- sizeof (struct surbl_ctx));
-
- surbl_module_ctx->use_redirector = 0;
- surbl_module_ctx->suffixes = NULL;
-
- surbl_module_ctx->redirectors = NULL;
- surbl_module_ctx->whitelist = NULL;
- surbl_module_ctx->exceptions = NULL;
- surbl_module_ctx->redirector_cbid = -1;
-
-
- *ctx = (struct module_ctx *)surbl_module_ctx;
-
- rspamd_rcl_add_doc_by_path (cfg,
- NULL,
- "URL blacklist plugin",
- "surbl",
- UCL_OBJECT,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl",
- "List of redirector servers",
- "redirector",
- UCL_STRING,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl",
- "Map of domains that should be checked with redirector",
- "redirector_hosts_map",
- UCL_STRING,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl",
- "Connect timeout for redirector",
- "redirector_connect_timeout",
- UCL_TIME,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl",
- "Read timeout for redirector",
- "redirector_read_timeout",
- UCL_TIME,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl",
- "Maximum number of URLs to process per message",
- "max_urls",
- UCL_INT,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl",
- "Rules for TLD composition",
- "exceptions",
- UCL_STRING,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl",
- "Map of whitelisted domains",
- "whitelist",
- UCL_STRING,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl",
- "URL blacklist rule",
- "rule",
- UCL_OBJECT,
- NULL,
- 0,
- NULL,
- 0);
- /* Rules doc strings */
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl.rule",
- "Name of DNS black list (e.g. `multi.surbl.com`)",
- "suffix",
- UCL_STRING,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl.rule",
- "Symbol to insert (if no bits or suffixes are defined)",
- "symbol",
- UCL_STRING,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl.rule",
- "Whether the defined rule should be used",
- "enabled",
- UCL_BOOLEAN,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl.rule",
- "Do not try to check URLs with IP address instead of hostname",
- "no_ip",
- UCL_BOOLEAN,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl.rule",
- "Resolve URL host and then check against the specified suffix with reversed IP octets",
- "resolve_ip",
- UCL_BOOLEAN,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl.rule",
- "Check images URLs with this URL list",
- "images",
- UCL_BOOLEAN,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl.rule",
- "Parse IP bits in DNS reply, the content is 'symbol = <bit>'",
- "bits",
- UCL_OBJECT,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl.rule",
- "Parse IP addresses in DNS reply, the content is 'symbol = address'",
- "ips",
- UCL_OBJECT,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl.rule",
- "Check domains in valid DKIM signatures",
- "check_dkim",
- UCL_BOOLEAN,
- NULL,
- 0,
- NULL,
- 0);
- rspamd_rcl_add_doc_by_path (cfg,
- "surbl.rule",
- "Check full domain name instead of eSLD",
- "full_domain",
- UCL_BOOLEAN,
- NULL,
*** OUTPUT TRUNCATED, 1689 LINES SKIPPED ***
More information about the Commits
mailing list