commit df5d064: [Minor] Improve symbols execution profiling logic
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Jul 18 11:28:04 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-07-18 12:24:31 +0100
URL: https://github.com/rspamd/rspamd/commit/df5d064b0ca52b81154e6a959e9ae05792b4f706 (HEAD -> master)
[Minor] Improve symbols execution profiling logic
---
src/libserver/rspamd_symcache.c | 72 ++++++++++++++++++++++++++++-------------
src/libserver/task.c | 10 +-----
src/libserver/task.h | 1 -
3 files changed, 50 insertions(+), 33 deletions(-)
diff --git a/src/libserver/rspamd_symcache.c b/src/libserver/rspamd_symcache.c
index 2acff7e6d..bba4db617 100644
--- a/src/libserver/rspamd_symcache.c
+++ b/src/libserver/rspamd_symcache.c
@@ -177,6 +177,7 @@ struct rspamd_symcache {
guint id;
struct rspamd_config *cfg;
gdouble reload_time;
+ gdouble last_profile;
gint peak_cb;
};
@@ -221,6 +222,8 @@ struct cache_savepoint {
enum rspamd_cache_savepoint_stage pass;
guint version;
guint items_inflight;
+ gboolean profile;
+ gdouble profile_start;
struct rspamd_metric_result *rs;
gdouble lim;
@@ -238,6 +241,13 @@ struct rspamd_cache_refresh_cbdata {
struct ev_loop *event_loop;
};
+/* At least once per minute */
+#define PROFILE_MAX_TIME (60.0)
+/* For messages larger than 2Mb enable profiling */
+#define PROFILE_MESSAGE_SIZE_THRESHOLD (1024 * 1024 * 2)
+/* Enable profile at least once per this amount of messages processed */
+#define PROFILE_PROBABILITY (0.01)
+
/* weight, frequency, time */
#define TIME_ALPHA (1.0)
#define WEIGHT_ALPHA (0.1)
@@ -764,17 +774,17 @@ rspamd_symcache_load_items (struct rspamd_symcache *cache, const gchar *name)
elt = ucl_object_lookup (cur, "frequency");
if (elt && ucl_object_type (elt) == UCL_OBJECT) {
- const ucl_object_t *cur;
+ const ucl_object_t *freq_elt;
- cur = ucl_object_lookup (elt, "avg");
+ freq_elt = ucl_object_lookup (elt, "avg");
- if (cur) {
- item->st->avg_frequency = ucl_object_todouble (cur);
+ if (freq_elt) {
+ item->st->avg_frequency = ucl_object_todouble (freq_elt);
}
- cur = ucl_object_lookup (elt, "stddev");
+ freq_elt = ucl_object_lookup (elt, "stddev");
- if (cur) {
- item->st->stddev_frequency = ucl_object_todouble (cur);
+ if (freq_elt) {
+ item->st->stddev_frequency = ucl_object_todouble (freq_elt);
}
}
@@ -1576,7 +1586,6 @@ rspamd_symcache_check_symbol (struct rspamd_task *task,
struct rspamd_symcache_item *item,
struct cache_savepoint *checkpoint)
{
- double t1 = 0;
struct rspamd_task **ptask;
lua_State *L;
gboolean check = TRUE;
@@ -1637,8 +1646,12 @@ rspamd_symcache_check_symbol (struct rspamd_task *task,
if (check) {
msg_debug_cache_task ("execute %s, %d", item->symbol, item->id);
- t1 = ev_now (task->event_loop);
- dyn_item->start_msec = (t1 - task->time_virtual) * 1e3;
+
+ if (checkpoint->profile) {
+ dyn_item->start_msec = (rspamd_get_virtual_ticks () -
+ checkpoint->profile_start) * 1e3;
+ }
+
dyn_item->async_events = 0;
checkpoint->cur_item = item;
checkpoint->items_inflight ++;
@@ -1787,6 +1800,18 @@ rspamd_symcache_make_checkpoint (struct rspamd_task *task,
rspamd_mempool_add_destructor (task->task_pool,
rspamd_symcache_order_unref, checkpoint->order);
+ /* Calculate profile probability */
+ ev_tstamp now = ev_now (task->event_loop);
+
+ if ((cache->last_profile == 0.0 || now > cache->last_profile + PROFILE_MAX_TIME) ||
+ (task->msg.len >= PROFILE_MESSAGE_SIZE_THRESHOLD) ||
+ (rspamd_random_double_fast () >= (1 - PROFILE_PROBABILITY))) {
+ msg_debug_cache_task ("enable profiling of symbols for task");
+ checkpoint->profile = TRUE;
+ checkpoint->profile_start = rspamd_get_virtual_ticks ();
+ cache->last_profile = now;
+ }
+
checkpoint->pass = RSPAMD_CACHE_PASS_INIT;
task->checkpoint = checkpoint;
@@ -2880,7 +2905,7 @@ rspamd_symcache_finalize_item (struct rspamd_task *task,
struct cache_savepoint *checkpoint = task->checkpoint;
struct cache_dependency *rdep;
struct rspamd_symcache_dynamic_item *dyn_item;
- gdouble t2, diff;
+ gdouble diff;
guint i;
const gdouble slow_diff_limit = 300;
@@ -2909,20 +2934,21 @@ rspamd_symcache_finalize_item (struct rspamd_task *task,
checkpoint->items_inflight --;
checkpoint->cur_item = NULL;
- t2 = ev_now (task->event_loop);
- diff = ((t2 - task->time_virtual) * 1e3 - dyn_item->start_msec);
-
- if (G_UNLIKELY (RSPAMD_TASK_IS_PROFILING (task))) {
- rspamd_task_profile_set (task, item->symbol, diff);
- }
+ if (checkpoint->profile) {
+ diff = ((rspamd_get_virtual_ticks () - checkpoint->profile_start) * 1e3 -
+ dyn_item->start_msec);
+ if (diff > slow_diff_limit) {
+ msg_info_task ("slow rule: %s(%d): %.2f ms", item->symbol, item->id,
+ diff);
+ }
- if (diff > slow_diff_limit) {
- msg_info_task ("slow rule: %s(%d): %.2f ms", item->symbol, item->id,
- diff);
- }
+ if (G_UNLIKELY (RSPAMD_TASK_IS_PROFILING (task))) {
+ rspamd_task_profile_set (task, item->symbol, diff);
+ }
- if (rspamd_worker_is_scanner (task->worker)) {
- rspamd_set_counter (item->cd, diff);
+ if (rspamd_worker_is_scanner (task->worker)) {
+ rspamd_set_counter (item->cd, diff);
+ }
}
/* Process all reverse dependencies */
diff --git a/src/libserver/task.c b/src/libserver/task.c
index 402f39987..fd14a68f2 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -90,15 +90,7 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg,
}
new_task->event_loop = event_loop;
- if (event_loop) {
- new_task->task_timestamp = ev_time ();
- new_task->time_virtual = ev_now (event_loop);
- }
- else {
- new_task->task_timestamp = ev_time ();
- new_task->time_virtual = rspamd_get_virtual_ticks ();
- }
-
+ new_task->task_timestamp = ev_time ();
new_task->time_real_finish = NAN;
if (pool == NULL) {
diff --git a/src/libserver/task.h b/src/libserver/task.h
index e6805eb57..39d6d9b76 100644
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -193,7 +193,6 @@ struct rspamd_task {
struct rspamd_config *cfg; /**< pointer to config object */
GError *err;
rspamd_mempool_t *task_pool; /**< memory pool for task */
- double time_virtual;
double time_real_finish;
ev_tstamp task_timestamp;
More information about the Commits
mailing list