commit 34cc551: [Project] Preliminary support of HTTP proxies

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Mar 18 16:21:05 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-03-18 14:07:32 +0000
URL: https://github.com/rspamd/rspamd/commit/34cc551018df1c174685493a64b496d594cc8eb6

[Project] Preliminary support of HTTP proxies
Issue: #572

---
 src/client/rspamc.c           |  2 +-
 src/controller.c              |  3 +-
 src/fuzzy_storage.c           |  2 +-
 src/libutil/http_connection.c |  2 +
 src/libutil/http_context.c    | 94 ++++++++++++++++++++++++++++++++++++++++---
 src/libutil/http_context.h    |  8 +++-
 src/libutil/http_private.h    |  4 ++
 src/libutil/upstream.c        | 52 +++++++++++++-----------
 src/libutil/upstream.h        |  4 ++
 src/plugins/surbl.c           |  2 +-
 src/rspamadm/rspamadm.c       |  3 +-
 src/rspamd.c                  |  2 +-
 src/rspamd_proxy.c            |  3 +-
 src/worker.c                  |  3 +-
 14 files changed, 146 insertions(+), 38 deletions(-)

diff --git a/src/client/rspamc.c b/src/client/rspamc.c
index 08a267f1b..2f572c449 100644
--- a/src/client/rspamc.c
+++ b/src/client/rspamc.c
@@ -1911,7 +1911,7 @@ main (gint argc, gchar **argv, gchar **env)
 	http_config.kp_cache_size_server = 0;
 	http_config.user_agent = user_agent;
 	http_ctx = rspamd_http_context_create_config (&http_config,
-			ev_base);
+			ev_base, NULL);
 
 	/* Ignore sigpipe */
 	sigemptyset (&sigpipe_act.sa_mask);
diff --git a/src/controller.c b/src/controller.c
index ac1acae81..7b48462ee 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -3776,7 +3776,8 @@ start_controller_worker (struct rspamd_worker *worker)
 			"password");
 
 	/* Accept event */
-	ctx->http_ctx = rspamd_http_context_create (ctx->cfg, ctx->ev_base);
+	ctx->http_ctx = rspamd_http_context_create (ctx->cfg, ctx->ev_base,
+			ctx->cfg->ups_ctx);
 	ctx->http = rspamd_http_router_new (rspamd_controller_error_handler,
 			rspamd_controller_finish_handler, &ctx->io_tv,
 			ctx->static_files_dir, ctx->http_ctx);
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c
index 96fb09c2b..e82e9062a 100644
--- a/src/fuzzy_storage.c
+++ b/src/fuzzy_storage.c
@@ -2999,7 +2999,7 @@ start_fuzzy (struct rspamd_worker *worker)
 		ctx->keypair_cache = rspamd_keypair_cache_new (ctx->keypair_cache_size);
 	}
 
-	ctx->http_ctx = rspamd_http_context_create (cfg, ctx->ev_base);
+	ctx->http_ctx = rspamd_http_context_create (cfg, ctx->ev_base, ctx->cfg->ups_ctx);
 
 	if (!ctx->collection_mode) {
 		/*
diff --git a/src/libutil/http_connection.c b/src/libutil/http_connection.c
index 417784789..d782c2d13 100644
--- a/src/libutil/http_connection.c
+++ b/src/libutil/http_connection.c
@@ -49,6 +49,8 @@ enum rspamd_http_priv_flags {
 	RSPAMD_HTTP_CONN_FLAG_RESETED = 1 << 2,
 	RSPAMD_HTTP_CONN_FLAG_TOO_LARGE = 1 << 3,
 	RSPAMD_HTTP_CONN_FLAG_ENCRYPTION_NEEDED = 1 << 4,
+	RSPAMD_HTTP_CONN_FLAG_PROXY = 1 << 5,
+	RSPAMD_HTTP_CONN_FLAG_PROXY_REQUEST = 1 << 6,
 };
 
 #define IS_CONN_ENCRYPTED(c) ((c)->flags & RSPAMD_HTTP_CONN_FLAG_ENCRYPTED)
diff --git a/src/libutil/http_context.c b/src/libutil/http_context.c
index 9182285a3..18c89c6bc 100644
--- a/src/libutil/http_context.c
+++ b/src/libutil/http_context.c
@@ -14,12 +14,14 @@
  * limitations under the License.
  */
 
+#include <contrib/http-parser/http_parser.h>
 #include "http_context.h"
 #include "http_private.h"
 #include "keypair.h"
 #include "keypairs_cache.h"
 #include "cfg_file.h"
 #include "contrib/libottery/ottery.h"
+#include "contrib/http-parser/http_parser.h"
 #include "rspamd.h"
 
 INIT_LOG_MODULE(http_context)
@@ -85,7 +87,8 @@ rspamd_http_context_client_rotate_ev (gint fd, short what, void *arg)
 
 static struct rspamd_http_context*
 rspamd_http_context_new_default (struct rspamd_config *cfg,
-								 struct event_base *ev_base)
+								 struct event_base *ev_base,
+								 struct upstream_ctx *ups_ctx)
 {
 	struct rspamd_http_context *ctx;
 
@@ -100,6 +103,7 @@ rspamd_http_context_new_default (struct rspamd_config *cfg,
 	ctx->config.client_key_rotate_time = default_rotate_time;
 	ctx->config.user_agent = default_user_agent;
 	ctx->config.keepalive_interval = default_keepalive_interval;
+	ctx->ups_ctx = ups_ctx;
 
 	if (cfg) {
 		ctx->ssl_ctx = cfg->libs_ctx->ssl_ctx;
@@ -117,9 +121,63 @@ rspamd_http_context_new_default (struct rspamd_config *cfg,
 	return ctx;
 }
 
+static void
+rspamd_http_context_parse_proxy (struct rspamd_http_context *ctx,
+								 const gchar *name,
+								 struct upstream_list **pls)
+{
+	struct http_parser_url u;
+	struct upstream_list *uls;
+
+	if (!ctx->ups_ctx) {
+		msg_err ("cannot parse http_proxy %s - upstreams context is udefined", name);
+		return;
+	}
+
+	memset (&u, 0, sizeof (u));
+
+	if (http_parser_parse_url (name, strlen (name), 1, &u) == 0) {
+		if (!(u.field_set & (1u << UF_HOST)) || u.port == 0) {
+			msg_err ("cannot parse http(s) proxy %s - invalid host or port", name);
+
+			return;
+		}
+
+		uls = rspamd_upstreams_create (ctx->ups_ctx);
+
+		if (!rspamd_upstreams_parse_line_len (uls,
+				name + u.field_data[UF_HOST].off,
+				u.field_data[UF_HOST].len, u.port, NULL)) {
+			msg_err ("cannot parse http(s) proxy %s - invalid data", name);
+
+			rspamd_upstreams_destroy (uls);
+		}
+		else {
+			*pls = uls;
+			msg_info ("set http(s) proxy to %s", name);
+		}
+	}
+	else {
+		uls = rspamd_upstreams_create (ctx->ups_ctx);
+
+		if (!rspamd_upstreams_parse_line (uls,
+				name, 3128, NULL)) {
+			msg_err ("cannot parse http(s) proxy %s - invalid data", name);
+
+			rspamd_upstreams_destroy (uls);
+		}
+		else {
+			*pls = uls;
+			msg_info ("set http(s) proxy to %s", name);
+		}
+	}
+}
+
 static void
 rspamd_http_context_init (struct rspamd_http_context *ctx)
 {
+
+
 	if (ctx->config.kp_cache_size_client > 0) {
 		ctx->client_kp_cache = rspamd_keypair_cache_new (ctx->config.kp_cache_size_client);
 	}
@@ -140,17 +198,28 @@ rspamd_http_context_init (struct rspamd_http_context *ctx)
 		event_add (&ctx->client_rotate_ev, &tv);
 	}
 
+	if (ctx->config.http_proxy) {
+		rspamd_http_context_parse_proxy (ctx, ctx->config.http_proxy,
+				&ctx->http_proxies);
+	}
+
+	if (ctx->config.https_proxy) {
+		rspamd_http_context_parse_proxy (ctx, ctx->config.https_proxy,
+				&ctx->https_proxies);
+	}
+
 	default_ctx = ctx;
 }
 
 struct rspamd_http_context*
 rspamd_http_context_create (struct rspamd_config *cfg,
-							struct event_base *ev_base)
+							struct event_base *ev_base,
+							struct upstream_ctx *ups_ctx)
 {
 	struct rspamd_http_context *ctx;
 	const ucl_object_t *http_obj;
 
-	ctx = rspamd_http_context_new_default (cfg, ev_base);
+	ctx = rspamd_http_context_new_default (cfg, ev_base, ups_ctx);
 	http_obj = ucl_object_lookup (cfg->rcl_obj, "http");
 
 	if (http_obj) {
@@ -194,6 +263,20 @@ rspamd_http_context_create (struct rspamd_config *cfg,
 			if (keepalive_interval) {
 				ctx->config.keepalive_interval = ucl_object_todouble (keepalive_interval);
 			}
+
+			const ucl_object_t *http_proxy;
+			http_proxy = ucl_object_lookup (client_obj, "http_proxy");
+
+			if (http_proxy) {
+				ctx->config.http_proxy = ucl_object_tostring (http_proxy);
+			}
+
+			const ucl_object_t *https_proxy;
+			https_proxy = ucl_object_lookup (client_obj, "https_proxy");
+
+			if (https_proxy) {
+				ctx->config.https_proxy = ucl_object_tostring (https_proxy);
+			}
 		}
 
 		server_obj = ucl_object_lookup (http_obj, "server");
@@ -262,11 +345,12 @@ rspamd_http_context_free (struct rspamd_http_context *ctx)
 
 struct rspamd_http_context*
 rspamd_http_context_create_config (struct rspamd_http_context_cfg *cfg,
-		struct event_base *ev_base)
+								   struct event_base *ev_base,
+								   struct upstream_ctx *ups_ctx)
 {
 	struct rspamd_http_context *ctx;
 
-	ctx = rspamd_http_context_new_default (NULL, ev_base);
+	ctx = rspamd_http_context_new_default (NULL, ev_base, ups_ctx);
 	memcpy (&ctx->config, cfg, sizeof (*cfg));
 	rspamd_http_context_init (ctx);
 
diff --git a/src/libutil/http_context.h b/src/libutil/http_context.h
index 74e5c69a6..6abd66651 100644
--- a/src/libutil/http_context.h
+++ b/src/libutil/http_context.h
@@ -26,6 +26,7 @@
 struct rspamd_http_context;
 struct rspamd_config;
 struct rspamd_http_message;
+struct upstream_ctx;
 
 struct rspamd_http_context_cfg {
 	guint kp_cache_size_client;
@@ -34,6 +35,8 @@ struct rspamd_http_context_cfg {
 	gdouble keepalive_interval;
 	gdouble client_key_rotate_time;
 	const gchar *user_agent;
+	const gchar *http_proxy;
+	const gchar *https_proxy;
 };
 
 /**
@@ -43,11 +46,12 @@ struct rspamd_http_context_cfg {
  * @return new context used for both client and server HTTP connections
  */
 struct rspamd_http_context* rspamd_http_context_create (struct rspamd_config *cfg,
-		struct event_base *ev_base);
+		struct event_base *ev_base, struct upstream_ctx *ctx);
 
 struct rspamd_http_context* rspamd_http_context_create_config (
 		struct rspamd_http_context_cfg *cfg,
-		struct event_base *ev_base);
+		struct event_base *ev_base,
+		struct upstream_ctx *ctx);
 /**
  * Destroys context
  * @param ctx
diff --git a/src/libutil/http_private.h b/src/libutil/http_private.h
index dd3d0c6a9..dd4ca3435 100644
--- a/src/libutil/http_private.h
+++ b/src/libutil/http_private.h
@@ -22,6 +22,7 @@
 #include "keypair.h"
 #include "keypairs_cache.h"
 #include "ref.h"
+#include "upstream.h"
 #include "khash.h"
 #define HASH_CASELESS
 #include "uthash_strcase.h"
@@ -95,6 +96,9 @@ struct rspamd_http_context {
 	struct rspamd_keypair_cache *client_kp_cache;
 	struct rspamd_cryptobox_keypair *client_kp;
 	struct rspamd_keypair_cache *server_kp_cache;
+	struct upstream_ctx *ups_ctx;
+	struct upstream_list *http_proxies;
+	struct upstream_list *https_proxies;
 	gpointer ssl_ctx;
 	gpointer ssl_ctx_noverify;
 	struct event_base *ev_base;
diff --git a/src/libutil/upstream.c b/src/libutil/upstream.c
index 64d5291fa..3a2b803b4 100644
--- a/src/libutil/upstream.c
+++ b/src/libutil/upstream.c
@@ -788,51 +788,45 @@ rspamd_upstream_add_addr (struct upstream *up, rspamd_inet_addr_t *addr)
 	return TRUE;
 }
 
+#define LEN_CHECK_STARTS_WITH(s, len, lit) \
+	((len) >= sizeof(lit) - 1 && g_ascii_strncasecmp ((s), (lit), sizeof(lit) - 1) == 0)
 gboolean
-rspamd_upstreams_parse_line (struct upstream_list *ups,
-		const gchar *str, guint16 def_port, void *data)
+rspamd_upstreams_parse_line_len (struct upstream_list *ups,
+		const gchar *str, gsize len, guint16 def_port, void *data)
 {
-	const gchar *end = str + strlen (str), *p = str;
+	const gchar *end = str + len, *p = str;
 	const gchar *separators = ";, \n\r\t";
 	gchar *tmp;
-	guint len;
+	guint span_len;
 	gboolean ret = FALSE;
 
-	if (g_ascii_strncasecmp (p, "random:", sizeof ("random:") - 1) == 0) {
+	if (LEN_CHECK_STARTS_WITH(p, len, "random:")) {
 		ups->rot_alg = RSPAMD_UPSTREAM_RANDOM;
 		p += sizeof ("random:") - 1;
 	}
-	else if (g_ascii_strncasecmp (p,
-			"master-slave:",
-			sizeof ("master-slave:") - 1) == 0) {
+	else if (LEN_CHECK_STARTS_WITH(p, len, "master-slave:")) {
 		ups->rot_alg = RSPAMD_UPSTREAM_MASTER_SLAVE;
 		p += sizeof ("master-slave:") - 1;
 	}
-	else if (g_ascii_strncasecmp (p,
-			"round-robin:",
-			sizeof ("round-robin:") - 1) == 0) {
+	else if (LEN_CHECK_STARTS_WITH(p, len, "round-robin:")) {
 		ups->rot_alg = RSPAMD_UPSTREAM_ROUND_ROBIN;
 		p += sizeof ("round-robin:") - 1;
 	}
-	else if (g_ascii_strncasecmp (p,
-			"hash:",
-			sizeof ("hash:") - 1) == 0) {
+	else if (LEN_CHECK_STARTS_WITH(p, len, "hash:")) {
 		ups->rot_alg = RSPAMD_UPSTREAM_HASHED;
 		p += sizeof ("hash:") - 1;
 	}
-	else if (g_ascii_strncasecmp (p,
-			"sequential:",
-			sizeof ("sequential:") - 1) == 0) {
+	else if (LEN_CHECK_STARTS_WITH(p, len, "sequential:")) {
 		ups->rot_alg = RSPAMD_UPSTREAM_SEQUENTIAL;
 		p += sizeof ("sequential:") - 1;
 	}
 
 	while (p < end) {
-		len = strcspn (p, separators);
+		span_len = rspamd_memcspn (p, separators, end - p);
 
-		if (len > 0) {
-			tmp = g_malloc (len + 1);
-			rspamd_strlcpy (tmp, p, len + 1);
+		if (span_len > 0) {
+			tmp = g_malloc (span_len + 1);
+			rspamd_strlcpy (tmp, p, span_len + 1);
 
 			if (rspamd_upstreams_add_upstream (ups, tmp, def_port,
 					RSPAMD_UPSTREAM_PARSE_DEFAULT,
@@ -843,14 +837,26 @@ rspamd_upstreams_parse_line (struct upstream_list *ups,
 			g_free (tmp);
 		}
 
-		p += len;
+		p += span_len;
 		/* Skip separators */
-		p += strspn (p, separators);
+		if (p < end) {
+			p += rspamd_memspn (p, separators, end - p);
+		}
 	}
 
 	return ret;
 }
 
+#undef LEN_CHECK_STARTS_WITH
+
+gboolean
+rspamd_upstreams_parse_line (struct upstream_list *ups,
+							 const gchar *str, guint16 def_port, void *data)
+{
+	return rspamd_upstreams_parse_line_len (ups, str, strlen (str),
+			def_port, data);
+}
+
 gboolean
 rspamd_upstreams_from_ucl (struct upstream_list *ups,
 		const ucl_object_t *in, guint16 def_port, void *data)
diff --git a/src/libutil/upstream.h b/src/libutil/upstream.h
index 4db962765..75d840ce2 100644
--- a/src/libutil/upstream.h
+++ b/src/libutil/upstream.h
@@ -157,6 +157,10 @@ gboolean rspamd_upstreams_parse_line (struct upstream_list *ups,
 		const gchar *str, guint16 def_port, void *data);
 
 
+gboolean rspamd_upstreams_parse_line_len (struct upstream_list *ups,
+										  const gchar *str, gsize len,
+										  guint16 def_port,
+										  void *data);
 /**
  * Parse upstreams list from the UCL object
  * @param ups
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c
index 63a7dd544..3f1990b7b 100644
--- a/src/plugins/surbl.c
+++ b/src/plugins/surbl.c
@@ -2187,9 +2187,9 @@ surbl_is_redirector_handler (lua_State *L)
 
 	task = lua_check_task (L, 1);
 	url = luaL_checklstring (L, 2, &len);
-	surbl_module_ctx = surbl_get_context (task->cfg);
 
 	if (task && url) {
+		surbl_module_ctx = surbl_get_context (task->cfg);
 		url_cpy = rspamd_mempool_alloc (task->task_pool, len);
 		memcpy (url_cpy, url, len);
 
diff --git a/src/rspamadm/rspamadm.c b/src/rspamadm/rspamadm.c
index c0bb4bc72..c49853ef7 100644
--- a/src/rspamadm/rspamadm.c
+++ b/src/rspamadm/rspamadm.c
@@ -436,7 +436,8 @@ main (gint argc, gchar **argv, gchar **env)
 	(void) dns_resolver_init (rspamd_main->logger,
 			rspamd_main->ev_base,
 			cfg);
-	rspamd_main->http_ctx = rspamd_http_context_create (cfg, rspamd_main->ev_base);
+	rspamd_main->http_ctx = rspamd_http_context_create (cfg, rspamd_main->ev_base,
+			NULL);
 
 	g_log_set_default_handler (rspamd_glib_log_function, rspamd_main->logger);
 	g_set_printerr_handler (rspamd_glib_printerr_function);
diff --git a/src/rspamd.c b/src/rspamd.c
index 27ba5e032..142915df9 100644
--- a/src/rspamd.c
+++ b/src/rspamd.c
@@ -1490,7 +1490,7 @@ main (gint argc, gchar **argv, gchar **env)
 	rspamd_mempool_unlock_mutex (rspamd_main->start_mtx);
 
 	rspamd_main->http_ctx = rspamd_http_context_create (rspamd_main->cfg,
-			ev_base);
+			ev_base, rspamd_main->cfg->ups_ctx);
 
 	if (control_fd != -1) {
 		msg_info_main ("listening for control commands on %s",
diff --git a/src/rspamd_proxy.c b/src/rspamd_proxy.c
index e83426673..446552b81 100644
--- a/src/rspamd_proxy.c
+++ b/src/rspamd_proxy.c
@@ -2177,7 +2177,8 @@ start_rspamd_proxy (struct rspamd_worker *worker)
 	rspamd_upstreams_library_config (worker->srv->cfg, ctx->cfg->ups_ctx,
 			ctx->ev_base, ctx->resolver->r);
 
-	ctx->http_ctx = rspamd_http_context_create (ctx->cfg, ctx->ev_base);
+	ctx->http_ctx = rspamd_http_context_create (ctx->cfg, ctx->ev_base,
+			ctx->cfg->ups_ctx);
 
 	if (ctx->has_self_scan) {
 		/* Additional initialisation needed */
diff --git a/src/worker.c b/src/worker.c
index 77614ec13..40e3d07f9 100644
--- a/src/worker.c
+++ b/src/worker.c
@@ -692,7 +692,8 @@ start_worker (struct rspamd_worker *worker)
 	rspamd_upstreams_library_config (worker->srv->cfg, ctx->cfg->ups_ctx,
 			ctx->ev_base, ctx->resolver->r);
 
-	ctx->http_ctx = rspamd_http_context_create (ctx->cfg, ctx->ev_base);
+	ctx->http_ctx = rspamd_http_context_create (ctx->cfg, ctx->ev_base,
+			ctx->cfg->ups_ctx);
 	rspamd_worker_init_scanner (worker, ctx->ev_base, ctx->resolver,
 			&ctx->lang_det);
 	rspamd_lua_run_postloads (ctx->cfg->lua_state, ctx->cfg, ctx->ev_base,


More information about the Commits mailing list