commit 261c549: [Project] Allow to kill workers that hang up
Vsevolod Stakhov
vsevolod at highsecure.ru
Sat Sep 21 16:21:06 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-09-21 16:27:25 +0100
URL: https://github.com/rspamd/rspamd/commit/261c54963d3e48834100180125a2a17e1759cb61 (HEAD -> master)
[Project] Allow to kill workers that hang up
---
src/libserver/cfg_file.h | 1 +
src/libserver/cfg_rcl.c | 7 +++++++
src/libserver/worker_util.c | 25 +++++++++++++++++++++++++
3 files changed, 33 insertions(+)
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 7186a73ec..d42fbfba9 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -380,6 +380,7 @@ struct rspamd_config {
gsize images_cache_size; /**< size of LRU cache for DCT data from images */
gdouble task_timeout; /**< maximum message processing time */
gint default_max_shots; /**< default maximum count of symbols hits permitted (-1 for unlimited) */
+ gint32 heartbeats_loss_max; /**< number of heartbeats lost to consider worker's termination */
gdouble heartbeat_interval; /**< interval for heartbeats for workers */
enum rspamd_log_type log_type; /**< log type */
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index 5a1d3a639..11c378d5d 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -2188,6 +2188,13 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections)
G_STRUCT_OFFSET (struct rspamd_config, heartbeat_interval),
RSPAMD_CL_FLAG_TIME_FLOAT,
"Time between workers heartbeats");
+ rspamd_rcl_add_default_handler (sub,
+ "heartbeats_loss_max",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct rspamd_config, heartbeat_interval),
+ RSPAMD_CL_FLAG_INT_32,
+ "Maximum count of heartbeats to be lost before trying to "
+ "terminate a worker (default: 0 - disabled)");
/* Neighbours configuration */
rspamd_rcl_add_section_doc (&sub->subsections, "neighbours", "name",
diff --git a/src/libserver/worker_util.c b/src/libserver/worker_util.c
index d2e52d5a1..883e7e8a9 100644
--- a/src/libserver/worker_util.c
+++ b/src/libserver/worker_util.c
@@ -756,6 +756,31 @@ rspamd_main_heartbeat_cb (EV_P_ ev_timer *w, int revents)
g_quark_to_string (wrk->type),
wrk->pid,
timebuf);
+
+ if (rspamd_main->cfg->heartbeats_loss_max > 0 &&
+ -(wrk->hb.nbeats) >= rspamd_main->cfg->heartbeats_loss_max) {
+
+
+ if (-(wrk->hb.nbeats) >= rspamd_main->cfg->heartbeats_loss_max + 1) {
+ msg_err_main ("terminate worker type %s with pid %P, "
+ "last beat on: %s; %L heartbeat loast",
+ g_quark_to_string (wrk->type),
+ wrk->pid,
+ timebuf,
+ -(wrk->hb.nbeats));
+ kill (wrk->pid, SIGTERM);
+ }
+ else {
+ msg_err_main ("force kill worker type %s with pid %P, "
+ "last beat on: %s; %L heartbeat loast",
+ g_quark_to_string (wrk->type),
+ wrk->pid,
+ timebuf,
+ -(wrk->hb.nbeats));
+ kill (wrk->pid, SIGKILL);
+ }
+
+ }
}
}
else if (wrk->hb.nbeats < 0) {
More information about the Commits
mailing list