commit 8139bbc: [Rework] Rework expressions processing

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Jun 17 13:35:10 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-06-16 21:30:34 +0100
URL: https://github.com/rspamd/rspamd/commit/8139bbca9cf81108c17b607acad8971e9a7d404b

[Rework] Rework expressions processing

---
 src/libutil/expression.c | 257 +++++++++++++++++++++++++----------------------
 1 file changed, 138 insertions(+), 119 deletions(-)

diff --git a/src/libutil/expression.c b/src/libutil/expression.c
index b7dc0cc96..4c07643ae 100644
--- a/src/libutil/expression.c
+++ b/src/libutil/expression.c
@@ -40,19 +40,23 @@ enum rspamd_expression_elt_type {
 enum rspamd_expression_op_flag {
 	RSPAMD_EXPRESSION_UNARY = 1u << 0u,
 	RSPAMD_EXPRESSION_BINARY = 1u << 1u,
-	RSPAMD_EXPRESSION_NARY = 1u << 2u
+	RSPAMD_EXPRESSION_NARY = 1u << 2u,
+	RSPAMD_EXPRESSION_ARITHMETIC = 1u << 3u,
+	RSPAMD_EXPRESSION_LOGICAL = 1u << 4u,
+	RSPAMD_EXPRESSION_COMPARISON = 1u << 5u,
 };
 
 struct rspamd_expression_operation {
 	enum rspamd_expression_op op;
 	guint logical_priority;
+	guint op_flags;
 };
 
 struct rspamd_expression_elt {
 	enum rspamd_expression_elt_type type;
 	union {
 		rspamd_expression_atom_t *atom;
-		enum rspamd_expression_op op;
+		struct rspamd_expression_operation op;
 		gdouble lim;
 	} p;
 
@@ -241,6 +245,48 @@ rspamd_expr_logic_priority (enum rspamd_expression_op op)
 	return ret;
 }
 
+static guint
+rspamd_expr_op_flags (enum rspamd_expression_op op)
+{
+	guint ret = 0;
+
+	switch (op) {
+	case OP_NOT:
+		ret |= RSPAMD_EXPRESSION_UNARY|RSPAMD_EXPRESSION_LOGICAL;
+		break;
+	case OP_MULT:
+		ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
+		break;
+	case OP_DIVIDE:
+		ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
+		break;
+	case OP_PLUS:
+		ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
+		break;
+	case OP_MINUS:
+		ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
+		break;
+	case OP_GE:
+	case OP_GT:
+	case OP_LE:
+	case OP_LT:
+		ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_COMPARISON;
+		break;
+	case OP_AND:
+		ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_LOGICAL;
+		break;
+	case OP_OR:
+		ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_LOGICAL;
+		break;
+	case OP_OBRACE:
+	case OP_CBRACE:
+	case OP_INVALID:
+		break;
+	}
+
+	return ret;
+}
+
 /*
  * Return FALSE if symbol is not operation symbol (operand)
  * Return TRUE if symbol is operation symbol
@@ -428,14 +474,14 @@ rspamd_ast_add_node (GPtrArray *operands, struct rspamd_expression_elt *op,
 
 	g_assert (op->type == ELT_OP);
 
-	if (op->p.op == OP_NOT) {
+	if (op->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
 		/* Unary operator */
 		res = g_node_new (op);
 		a1 = rspamd_expr_stack_elt_pop (operands);
 
 		if (a1 == NULL) {
 			g_set_error (err, rspamd_expr_quark(), EINVAL, "no operand to "
-					"unary '%s' operation", rspamd_expr_op_to_str (op->p.op));
+					"unary '%s' operation", rspamd_expr_op_to_str (op->p.op.op));
 			g_node_destroy (res);
 
 			return FALSE;
@@ -449,44 +495,54 @@ rspamd_ast_add_node (GPtrArray *operands, struct rspamd_expression_elt *op,
 		}
 	}
 	else {
-		/* For binary operators we might want to examine chains */
+		/* For binary/nary operators we might want to examine chains */
 		a2 = rspamd_expr_stack_elt_pop (operands);
 		a1 = rspamd_expr_stack_elt_pop (operands);
 
 		if (a2 == NULL) {
 			g_set_error (err, rspamd_expr_quark(), EINVAL, "no left operand to "
-					"'%s' operation", rspamd_expr_op_to_str (op->p.op));
+					"'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
 			return FALSE;
 		}
+
 		if (a1 == NULL) {
 			g_set_error (err, rspamd_expr_quark(), EINVAL, "no right operand to "
-					"'%s' operation", rspamd_expr_op_to_str (op->p.op));
+					"'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
 			return FALSE;
 		}
 
-		/* First try with a1 */
-		test = a1;
-		test_elt = test->data;
+		/* Nary stuff */
+		if (op->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
+			/*
+			 * We convert a set of ops like X + Y + Z to a nary tree like
+			 * X Y Z +
+			 * for the longest possible prefix of atoms/limits
+			 */
 
-		if (test_elt->type == ELT_OP && test_elt->p.op == op->p.op) {
-			/* Add children */
-			g_node_append (test, a2);
-			rspamd_expr_stack_elt_push (operands, a1);
-			return TRUE;
-		}
+			/* First try with a1 */
+			test = a1;
+			test_elt = test->data;
 
-		/* Now test a2 */
-		test = a2;
-		test_elt = test->data;
+			if (test_elt->type == ELT_OP && test_elt->p.op.op == op->p.op.op) {
+				/* Add children */
+				g_node_append (test, a2);
+				rspamd_expr_stack_elt_push (operands, a1);
+				return TRUE;
+			}
+
+			/* Now test a2 */
+			test = a2;
+			test_elt = test->data;
 
-		if (test_elt->type == ELT_OP && test_elt->p.op == op->p.op) {
-			/* Add children */
-			g_node_prepend (test, a1);
-			rspamd_expr_stack_elt_push (operands, a2);
-			return TRUE;
+			if (test_elt->type == ELT_OP && test_elt->p.op.op == op->p.op.op) {
+				/* Add children */
+				g_node_prepend (test, a1);
+				rspamd_expr_stack_elt_push (operands, a2);
+				return TRUE;
+			}
 		}
 
-		/* No optimizations possible, so create new level */
+		/* No optimizations possible, so create a new level */
 		res = g_node_new (op);
 		g_node_append (res, a1);
 		g_node_append (res, a2);
@@ -558,10 +614,10 @@ rspamd_ast_priority_cmp (GNode *a, GNode *b)
 	gdouble w1, w2;
 
 	if (ea->type == ELT_LIMIT) {
-		return -1;
+		return 1;
 	}
 	else if (eb->type == ELT_LIMIT) {
-		return 1;
+		return -1;
 	}
 
 	/* Special logic for atoms */
@@ -584,17 +640,26 @@ static gboolean
 rspamd_ast_resort_traverse (GNode *node, gpointer unused)
 {
 	GNode *children, *last;
+	struct rspamd_expression_elt *elt;
 
-	if (node->children) {
+	elt = (struct rspamd_expression_elt *)node->data;
 
-		children = node->children;
-		last = g_node_last_sibling (children);
-		/* Needed for utlist compatibility */
-		children->prev = last;
-		DL_SORT (node->children, rspamd_ast_priority_cmp);
-		/* Restore GLIB compatibility */
-		children = node->children;
-		children->prev = NULL;
+	/*
+	 * We sort merely logical operations, everything else is dangerous
+	 */
+	if (elt->type == ELT_OP && elt->p.op.op_flags & RSPAMD_EXPRESSION_LOGICAL) {
+
+		if (node->children) {
+
+			children = node->children;
+			last = g_node_last_sibling (children);
+			/* Needed for utlist compatibility */
+			children->prev = last;
+			DL_SORT (node->children, rspamd_ast_priority_cmp);
+			/* Restore GLIB compatibility */
+			children = node->children;
+			children->prev = NULL;
+		}
 	}
 
 	return FALSE;
@@ -854,10 +919,15 @@ rspamd_parse_expression (const gchar *line, gsize len,
 						goto err;
 					}
 
+					guint op_priority = rspamd_expr_logic_priority (op);
+
 					if (op != OP_OBRACE) {
 						elt.type = ELT_OP;
-						elt.p.op = op;
+						elt.p.op.op = op;
+						elt.p.op.op_flags = rspamd_expr_op_flags (op);
+						elt.p.op.logical_priority = op_priority;
 						g_array_append_val (e->expressions, elt);
+
 						if (!rspamd_ast_add_node (operand_stack,
 								rspamd_expr_dup_elt (pool, &elt), err)) {
 							goto err;
@@ -890,12 +960,17 @@ rspamd_parse_expression (const gchar *line, gsize len,
 					}
 
 					/* We ignore associativity for now */
+					guint op_priority = rspamd_expr_logic_priority (op);
+
 					if (op_stack != OP_OBRACE &&
-							rspamd_expr_logic_priority (op) <
-							rspamd_expr_logic_priority (op_stack)) {
+							op_priority < rspamd_expr_logic_priority (op_stack)) {
 						elt.type = ELT_OP;
-						elt.p.op = op_stack;
+						elt.p.op.op = op_stack;
+						elt.p.op.op_flags = rspamd_expr_op_flags (op);
+						elt.p.op.logical_priority = op_priority;
+
 						g_array_append_val (e->expressions, elt);
+
 						if (!rspamd_ast_add_node (operand_stack,
 								rspamd_expr_dup_elt (pool, &elt), err)) {
 							goto err;
@@ -933,7 +1008,10 @@ rspamd_parse_expression (const gchar *line, gsize len,
 			!= OP_INVALID) {
 		if (op_stack != OP_OBRACE) {
 			elt.type = ELT_OP;
-			elt.p.op = op_stack;
+			elt.p.op.op = op_stack;
+			elt.p.op.op_flags = rspamd_expr_op_flags (op_stack);
+			elt.p.op.logical_priority = rspamd_expr_logic_priority (op_stack);
+
 			g_array_append_val (e->expressions, elt);
 			if (!rspamd_ast_add_node (operand_stack,
 					rspamd_expr_dup_elt (pool, &elt), err)) {
@@ -991,52 +1069,16 @@ err:
  */
 static gboolean
 rspamd_ast_node_done (struct rspamd_expression_elt *elt,
-		struct rspamd_expression_elt *parelt, gdouble acc, gdouble lim)
+		struct rspamd_expression_elt *parelt, gdouble acc)
 {
 	gboolean ret = FALSE;
 
 	g_assert (elt->type == ELT_OP);
 
-	switch (elt->p.op) {
+	switch (elt->p.op.op) {
 	case OP_NOT:
 		ret = TRUE;
 		break;
-	case OP_PLUS:
-		if (parelt && lim > 0) {
-			g_assert (parelt->type == ELT_OP);
-
-			switch (parelt->p.op) {
-			case OP_GE:
-				ret = acc >= lim;
-				break;
-			case OP_GT:
-				ret = acc > lim;
-				break;
-			case OP_LE:
-				ret = acc <= lim;
-				break;
-			case OP_LT:
-				ret = acc < lim;
-				break;
-			default:
-				ret = FALSE;
-				break;
-			}
-		}
-		break;
-	case OP_GE:
-		ret = acc >= lim;
-		break;
-	case OP_GT:
-		ret = acc > lim;
-		break;
-	case OP_LE:
-		ret = acc <= lim;
-		break;
-	case OP_LT:
-		ret = acc < lim;
-		break;
-	case OP_MULT:
 	case OP_AND:
 		ret = acc == 0;
 		break;
@@ -1044,7 +1086,6 @@ rspamd_ast_node_done (struct rspamd_expression_elt *elt,
 		ret = acc != 0;
 		break;
 	default:
-		g_assert (0);
 		break;
 	}
 
@@ -1052,36 +1093,28 @@ rspamd_ast_node_done (struct rspamd_expression_elt *elt,
 }
 
 static gdouble
-rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val,
-		gdouble acc, gdouble lim)
+rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val, gdouble acc)
 {
 	gdouble ret = val;
 
 	g_assert (elt->type == ELT_OP);
 
 	if (isnan (acc)) {
-		switch (elt->p.op) {
+		switch (elt->p.op.op) {
 		case OP_PLUS:
 		case OP_MINUS:
 		case OP_OR:
 		case OP_AND:
 		case OP_MULT:
 		case OP_DIVIDE:
-			/* Hack for arithmetics */
-			if (!isnan (lim)) {
-				acc = lim;
-			}
-			else {
-				return val;
-			}
-			break;
+			return val;
 		default:
 			acc = val;
 			break;
 		}
 	}
 
-	switch (elt->p.op) {
+	switch (elt->p.op.op) {
 	case OP_NOT:
 		ret = fabs (val) > DOUBLE_EPSILON ? 0.0 : 1.0;
 		break;
@@ -1098,16 +1131,16 @@ rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val,
 		ret = acc / val;
 		break;
 	case OP_GE:
-		ret = acc >= lim;
+		ret = acc >= val;
 		break;
 	case OP_GT:
-		ret = acc > lim;
+		ret = acc > val;
 		break;
 	case OP_LE:
-		ret = acc <= lim;
+		ret = acc <= val;
 		break;
 	case OP_LT:
-		ret = acc < lim;
+		ret = acc < val;
 		break;
 	case OP_AND:
 		ret = (acc * val);
@@ -1129,7 +1162,7 @@ rspamd_ast_process_node (struct rspamd_expression *e, GNode *node,
 {
 	struct rspamd_expression_elt *elt, *celt, *parelt = NULL;
 	GNode *cld;
-	gdouble acc = NAN, lim = NAN;
+	gdouble acc = NAN;
 	gdouble t1, t2, val;
 	gboolean calc_ticks = FALSE;
 
@@ -1181,33 +1214,19 @@ rspamd_ast_process_node (struct rspamd_expression *e, GNode *node,
 	case ELT_OP:
 		g_assert (node->children != NULL);
 
-		/* Try to find limit at the parent node */
-		if (node->parent) {
-			parelt = node->parent->data;
-			celt = node->parent->children->data;
-
-			if (celt->type == ELT_LIMIT) {
-				lim = celt->p.lim;
-			}
-		}
-
 		DL_FOREACH (node->children, cld) {
 			celt = cld->data;
 
-			/* Save limit if we've found it */
-			if (celt->type == ELT_LIMIT) {
-				lim = celt->p.lim;
-				continue;
-			}
-
 			val = rspamd_ast_process_node (e, cld, process_data);
-			msg_debug_expression ("before op: op=%s; lim=%.1f; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op), lim, acc, val);
-			acc = rspamd_ast_do_op (elt, val, acc, lim);
-			msg_debug_expression ("after op: op=%s; lim=%.1f; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op), lim, acc, val);
+			msg_debug_expression ("before op: op=%s; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op.op),
+					acc, val);
+			acc = rspamd_ast_do_op (elt, val, acc);
+			msg_debug_expression ("after op: op=%s; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op.op),
+					acc, val);
 
 			/* Check if we need to process further */
 			if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) {
-				if (rspamd_ast_node_done (elt, parelt, acc, lim)) {
+				if (rspamd_ast_node_done (elt, parelt, acc)) {
 					msg_debug_expression ("optimizer: done");
 					return acc;
 				}
@@ -1319,7 +1338,7 @@ rspamd_ast_string_traverse (GNode *n, gpointer d)
 		}
 	}
 	else {
-		op_str = rspamd_expr_op_to_str (elt->p.op);
+		op_str = rspamd_expr_op_to_str (elt->p.op.op);
 		g_string_append (res, op_str);
 
 		if (n->children) {
@@ -1401,7 +1420,7 @@ rspamd_expression_node_is_op (GNode *node, enum rspamd_expression_op op)
 
 	elt = node->data;
 
-	if (elt->type == ELT_OP && elt->p.op == op) {
+	if (elt->type == ELT_OP && elt->p.op.op == op) {
 		return TRUE;
 	}
 


More information about the Commits mailing list