commit 8139bbc: [Rework] Rework expressions processing
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Jun 17 13:35:10 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-06-16 21:30:34 +0100
URL: https://github.com/rspamd/rspamd/commit/8139bbca9cf81108c17b607acad8971e9a7d404b
[Rework] Rework expressions processing
---
src/libutil/expression.c | 257 +++++++++++++++++++++++++----------------------
1 file changed, 138 insertions(+), 119 deletions(-)
diff --git a/src/libutil/expression.c b/src/libutil/expression.c
index b7dc0cc96..4c07643ae 100644
--- a/src/libutil/expression.c
+++ b/src/libutil/expression.c
@@ -40,19 +40,23 @@ enum rspamd_expression_elt_type {
enum rspamd_expression_op_flag {
RSPAMD_EXPRESSION_UNARY = 1u << 0u,
RSPAMD_EXPRESSION_BINARY = 1u << 1u,
- RSPAMD_EXPRESSION_NARY = 1u << 2u
+ RSPAMD_EXPRESSION_NARY = 1u << 2u,
+ RSPAMD_EXPRESSION_ARITHMETIC = 1u << 3u,
+ RSPAMD_EXPRESSION_LOGICAL = 1u << 4u,
+ RSPAMD_EXPRESSION_COMPARISON = 1u << 5u,
};
struct rspamd_expression_operation {
enum rspamd_expression_op op;
guint logical_priority;
+ guint op_flags;
};
struct rspamd_expression_elt {
enum rspamd_expression_elt_type type;
union {
rspamd_expression_atom_t *atom;
- enum rspamd_expression_op op;
+ struct rspamd_expression_operation op;
gdouble lim;
} p;
@@ -241,6 +245,48 @@ rspamd_expr_logic_priority (enum rspamd_expression_op op)
return ret;
}
+static guint
+rspamd_expr_op_flags (enum rspamd_expression_op op)
+{
+ guint ret = 0;
+
+ switch (op) {
+ case OP_NOT:
+ ret |= RSPAMD_EXPRESSION_UNARY|RSPAMD_EXPRESSION_LOGICAL;
+ break;
+ case OP_MULT:
+ ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
+ break;
+ case OP_DIVIDE:
+ ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
+ break;
+ case OP_PLUS:
+ ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
+ break;
+ case OP_MINUS:
+ ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
+ break;
+ case OP_GE:
+ case OP_GT:
+ case OP_LE:
+ case OP_LT:
+ ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_COMPARISON;
+ break;
+ case OP_AND:
+ ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_LOGICAL;
+ break;
+ case OP_OR:
+ ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_LOGICAL;
+ break;
+ case OP_OBRACE:
+ case OP_CBRACE:
+ case OP_INVALID:
+ break;
+ }
+
+ return ret;
+}
+
/*
* Return FALSE if symbol is not operation symbol (operand)
* Return TRUE if symbol is operation symbol
@@ -428,14 +474,14 @@ rspamd_ast_add_node (GPtrArray *operands, struct rspamd_expression_elt *op,
g_assert (op->type == ELT_OP);
- if (op->p.op == OP_NOT) {
+ if (op->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
/* Unary operator */
res = g_node_new (op);
a1 = rspamd_expr_stack_elt_pop (operands);
if (a1 == NULL) {
g_set_error (err, rspamd_expr_quark(), EINVAL, "no operand to "
- "unary '%s' operation", rspamd_expr_op_to_str (op->p.op));
+ "unary '%s' operation", rspamd_expr_op_to_str (op->p.op.op));
g_node_destroy (res);
return FALSE;
@@ -449,44 +495,54 @@ rspamd_ast_add_node (GPtrArray *operands, struct rspamd_expression_elt *op,
}
}
else {
- /* For binary operators we might want to examine chains */
+ /* For binary/nary operators we might want to examine chains */
a2 = rspamd_expr_stack_elt_pop (operands);
a1 = rspamd_expr_stack_elt_pop (operands);
if (a2 == NULL) {
g_set_error (err, rspamd_expr_quark(), EINVAL, "no left operand to "
- "'%s' operation", rspamd_expr_op_to_str (op->p.op));
+ "'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
return FALSE;
}
+
if (a1 == NULL) {
g_set_error (err, rspamd_expr_quark(), EINVAL, "no right operand to "
- "'%s' operation", rspamd_expr_op_to_str (op->p.op));
+ "'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
return FALSE;
}
- /* First try with a1 */
- test = a1;
- test_elt = test->data;
+ /* Nary stuff */
+ if (op->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
+ /*
+ * We convert a set of ops like X + Y + Z to a nary tree like
+ * X Y Z +
+ * for the longest possible prefix of atoms/limits
+ */
- if (test_elt->type == ELT_OP && test_elt->p.op == op->p.op) {
- /* Add children */
- g_node_append (test, a2);
- rspamd_expr_stack_elt_push (operands, a1);
- return TRUE;
- }
+ /* First try with a1 */
+ test = a1;
+ test_elt = test->data;
- /* Now test a2 */
- test = a2;
- test_elt = test->data;
+ if (test_elt->type == ELT_OP && test_elt->p.op.op == op->p.op.op) {
+ /* Add children */
+ g_node_append (test, a2);
+ rspamd_expr_stack_elt_push (operands, a1);
+ return TRUE;
+ }
+
+ /* Now test a2 */
+ test = a2;
+ test_elt = test->data;
- if (test_elt->type == ELT_OP && test_elt->p.op == op->p.op) {
- /* Add children */
- g_node_prepend (test, a1);
- rspamd_expr_stack_elt_push (operands, a2);
- return TRUE;
+ if (test_elt->type == ELT_OP && test_elt->p.op.op == op->p.op.op) {
+ /* Add children */
+ g_node_prepend (test, a1);
+ rspamd_expr_stack_elt_push (operands, a2);
+ return TRUE;
+ }
}
- /* No optimizations possible, so create new level */
+ /* No optimizations possible, so create a new level */
res = g_node_new (op);
g_node_append (res, a1);
g_node_append (res, a2);
@@ -558,10 +614,10 @@ rspamd_ast_priority_cmp (GNode *a, GNode *b)
gdouble w1, w2;
if (ea->type == ELT_LIMIT) {
- return -1;
+ return 1;
}
else if (eb->type == ELT_LIMIT) {
- return 1;
+ return -1;
}
/* Special logic for atoms */
@@ -584,17 +640,26 @@ static gboolean
rspamd_ast_resort_traverse (GNode *node, gpointer unused)
{
GNode *children, *last;
+ struct rspamd_expression_elt *elt;
- if (node->children) {
+ elt = (struct rspamd_expression_elt *)node->data;
- children = node->children;
- last = g_node_last_sibling (children);
- /* Needed for utlist compatibility */
- children->prev = last;
- DL_SORT (node->children, rspamd_ast_priority_cmp);
- /* Restore GLIB compatibility */
- children = node->children;
- children->prev = NULL;
+ /*
+ * We sort merely logical operations, everything else is dangerous
+ */
+ if (elt->type == ELT_OP && elt->p.op.op_flags & RSPAMD_EXPRESSION_LOGICAL) {
+
+ if (node->children) {
+
+ children = node->children;
+ last = g_node_last_sibling (children);
+ /* Needed for utlist compatibility */
+ children->prev = last;
+ DL_SORT (node->children, rspamd_ast_priority_cmp);
+ /* Restore GLIB compatibility */
+ children = node->children;
+ children->prev = NULL;
+ }
}
return FALSE;
@@ -854,10 +919,15 @@ rspamd_parse_expression (const gchar *line, gsize len,
goto err;
}
+ guint op_priority = rspamd_expr_logic_priority (op);
+
if (op != OP_OBRACE) {
elt.type = ELT_OP;
- elt.p.op = op;
+ elt.p.op.op = op;
+ elt.p.op.op_flags = rspamd_expr_op_flags (op);
+ elt.p.op.logical_priority = op_priority;
g_array_append_val (e->expressions, elt);
+
if (!rspamd_ast_add_node (operand_stack,
rspamd_expr_dup_elt (pool, &elt), err)) {
goto err;
@@ -890,12 +960,17 @@ rspamd_parse_expression (const gchar *line, gsize len,
}
/* We ignore associativity for now */
+ guint op_priority = rspamd_expr_logic_priority (op);
+
if (op_stack != OP_OBRACE &&
- rspamd_expr_logic_priority (op) <
- rspamd_expr_logic_priority (op_stack)) {
+ op_priority < rspamd_expr_logic_priority (op_stack)) {
elt.type = ELT_OP;
- elt.p.op = op_stack;
+ elt.p.op.op = op_stack;
+ elt.p.op.op_flags = rspamd_expr_op_flags (op);
+ elt.p.op.logical_priority = op_priority;
+
g_array_append_val (e->expressions, elt);
+
if (!rspamd_ast_add_node (operand_stack,
rspamd_expr_dup_elt (pool, &elt), err)) {
goto err;
@@ -933,7 +1008,10 @@ rspamd_parse_expression (const gchar *line, gsize len,
!= OP_INVALID) {
if (op_stack != OP_OBRACE) {
elt.type = ELT_OP;
- elt.p.op = op_stack;
+ elt.p.op.op = op_stack;
+ elt.p.op.op_flags = rspamd_expr_op_flags (op_stack);
+ elt.p.op.logical_priority = rspamd_expr_logic_priority (op_stack);
+
g_array_append_val (e->expressions, elt);
if (!rspamd_ast_add_node (operand_stack,
rspamd_expr_dup_elt (pool, &elt), err)) {
@@ -991,52 +1069,16 @@ err:
*/
static gboolean
rspamd_ast_node_done (struct rspamd_expression_elt *elt,
- struct rspamd_expression_elt *parelt, gdouble acc, gdouble lim)
+ struct rspamd_expression_elt *parelt, gdouble acc)
{
gboolean ret = FALSE;
g_assert (elt->type == ELT_OP);
- switch (elt->p.op) {
+ switch (elt->p.op.op) {
case OP_NOT:
ret = TRUE;
break;
- case OP_PLUS:
- if (parelt && lim > 0) {
- g_assert (parelt->type == ELT_OP);
-
- switch (parelt->p.op) {
- case OP_GE:
- ret = acc >= lim;
- break;
- case OP_GT:
- ret = acc > lim;
- break;
- case OP_LE:
- ret = acc <= lim;
- break;
- case OP_LT:
- ret = acc < lim;
- break;
- default:
- ret = FALSE;
- break;
- }
- }
- break;
- case OP_GE:
- ret = acc >= lim;
- break;
- case OP_GT:
- ret = acc > lim;
- break;
- case OP_LE:
- ret = acc <= lim;
- break;
- case OP_LT:
- ret = acc < lim;
- break;
- case OP_MULT:
case OP_AND:
ret = acc == 0;
break;
@@ -1044,7 +1086,6 @@ rspamd_ast_node_done (struct rspamd_expression_elt *elt,
ret = acc != 0;
break;
default:
- g_assert (0);
break;
}
@@ -1052,36 +1093,28 @@ rspamd_ast_node_done (struct rspamd_expression_elt *elt,
}
static gdouble
-rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val,
- gdouble acc, gdouble lim)
+rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val, gdouble acc)
{
gdouble ret = val;
g_assert (elt->type == ELT_OP);
if (isnan (acc)) {
- switch (elt->p.op) {
+ switch (elt->p.op.op) {
case OP_PLUS:
case OP_MINUS:
case OP_OR:
case OP_AND:
case OP_MULT:
case OP_DIVIDE:
- /* Hack for arithmetics */
- if (!isnan (lim)) {
- acc = lim;
- }
- else {
- return val;
- }
- break;
+ return val;
default:
acc = val;
break;
}
}
- switch (elt->p.op) {
+ switch (elt->p.op.op) {
case OP_NOT:
ret = fabs (val) > DOUBLE_EPSILON ? 0.0 : 1.0;
break;
@@ -1098,16 +1131,16 @@ rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val,
ret = acc / val;
break;
case OP_GE:
- ret = acc >= lim;
+ ret = acc >= val;
break;
case OP_GT:
- ret = acc > lim;
+ ret = acc > val;
break;
case OP_LE:
- ret = acc <= lim;
+ ret = acc <= val;
break;
case OP_LT:
- ret = acc < lim;
+ ret = acc < val;
break;
case OP_AND:
ret = (acc * val);
@@ -1129,7 +1162,7 @@ rspamd_ast_process_node (struct rspamd_expression *e, GNode *node,
{
struct rspamd_expression_elt *elt, *celt, *parelt = NULL;
GNode *cld;
- gdouble acc = NAN, lim = NAN;
+ gdouble acc = NAN;
gdouble t1, t2, val;
gboolean calc_ticks = FALSE;
@@ -1181,33 +1214,19 @@ rspamd_ast_process_node (struct rspamd_expression *e, GNode *node,
case ELT_OP:
g_assert (node->children != NULL);
- /* Try to find limit at the parent node */
- if (node->parent) {
- parelt = node->parent->data;
- celt = node->parent->children->data;
-
- if (celt->type == ELT_LIMIT) {
- lim = celt->p.lim;
- }
- }
-
DL_FOREACH (node->children, cld) {
celt = cld->data;
- /* Save limit if we've found it */
- if (celt->type == ELT_LIMIT) {
- lim = celt->p.lim;
- continue;
- }
-
val = rspamd_ast_process_node (e, cld, process_data);
- msg_debug_expression ("before op: op=%s; lim=%.1f; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op), lim, acc, val);
- acc = rspamd_ast_do_op (elt, val, acc, lim);
- msg_debug_expression ("after op: op=%s; lim=%.1f; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op), lim, acc, val);
+ msg_debug_expression ("before op: op=%s; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op.op),
+ acc, val);
+ acc = rspamd_ast_do_op (elt, val, acc);
+ msg_debug_expression ("after op: op=%s; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op.op),
+ acc, val);
/* Check if we need to process further */
if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) {
- if (rspamd_ast_node_done (elt, parelt, acc, lim)) {
+ if (rspamd_ast_node_done (elt, parelt, acc)) {
msg_debug_expression ("optimizer: done");
return acc;
}
@@ -1319,7 +1338,7 @@ rspamd_ast_string_traverse (GNode *n, gpointer d)
}
}
else {
- op_str = rspamd_expr_op_to_str (elt->p.op);
+ op_str = rspamd_expr_op_to_str (elt->p.op.op);
g_string_append (res, op_str);
if (n->children) {
@@ -1401,7 +1420,7 @@ rspamd_expression_node_is_op (GNode *node, enum rspamd_expression_op op)
elt = node->data;
- if (elt->type == ELT_OP && elt->p.op == op) {
+ if (elt->type == ELT_OP && elt->p.op.op == op) {
return TRUE;
}
More information about the Commits
mailing list