commit 52c535a: [Feature] Escape JSON when needed

Vsevolod Stakhov vsevolod at rspamd.com
Mon Oct 30 17:21:05 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-10-30 16:35:36 +0000
URL: https://github.com/rspamd/rspamd/commit/52c535ad6a54176dc10d35b859f62f8e1ac7a7b2

[Feature] Escape JSON when needed

---
 src/libserver/logger/logger.c         | 395 +++++++++++++++++++++-------------
 src/libserver/logger/logger_console.c |  14 +-
 src/libserver/logger/logger_file.c    |  12 +-
 src/libserver/logger/logger_private.h |  34 ++-
 4 files changed, 286 insertions(+), 169 deletions(-)

diff --git a/src/libserver/logger/logger.c b/src/libserver/logger/logger.c
index 7a6f62ddd..33117f72f 100644
--- a/src/libserver/logger/logger.c
+++ b/src/libserver/logger/logger.c
@@ -1028,15 +1028,15 @@ log_time(gdouble now, rspamd_logger_t *rspamd_log, gchar *timebuf,
 	}
 }
 
-gsize rspamd_log_fill_iov(struct iovec *iov,
-						  double ts,
-						  const gchar *module,
-						  const gchar *id,
-						  const gchar *function,
-						  gint level_flags,
-						  const gchar *message,
-						  gsize mlen,
-						  rspamd_logger_t *logger)
+void rspamd_log_fill_iov(struct rspamd_logger_iov_ctx *iov_ctx,
+						 double ts,
+						 const gchar *module,
+						 const gchar *id,
+						 const gchar *function,
+						 gint level_flags,
+						 const gchar *message,
+						 gsize mlen,
+						 rspamd_logger_t *logger)
 {
 	bool log_color = (logger->flags & RSPAMD_LOG_FLAG_COLOR);
 	bool log_severity = (logger->flags & RSPAMD_LOG_FLAG_SEVERITY);
@@ -1051,192 +1051,289 @@ gsize rspamd_log_fill_iov(struct iovec *iov,
 		log_systemd = false;
 	}
 
-	gint r;
+	glong r;
 
-	if (iov == NULL) {
+	if (iov_ctx->niov == 0) {
 		/* This is the case when we just return a number of IOV required for the logging */
 		if (log_json) {
-			return 3; /* Preamble, message, "}\n" */
+			iov_ctx->niov = 3; /* Preamble, message, "}\n" */
 		}
 		else {
 			if (log_rspamadm) {
 				if (logger->log_level == G_LOG_LEVEL_DEBUG) {
-					return 4;
+					iov_ctx->niov = 4;
 				}
 				else {
-					return 3; /* No time component */
+					iov_ctx->niov = 3; /* No time component */
 				}
 			}
 			else if (log_systemd) {
-				return 3;
+				iov_ctx->niov = 3;
 			}
 			else {
 				if (log_color) {
-					return 5;
+					iov_ctx->niov = 5;
 				}
 				else {
-					return 4;
+					iov_ctx->niov = 4;
 				}
 			}
 		}
+
+		g_assert(iov_ctx->niov <= G_N_ELEMENTS(iov_ctx->iov));
 	}
-	else {
-		static gchar timebuf[64], modulebuf[64];
-		static gchar tmpbuf[256];
 
-		if (!log_json && !log_systemd) {
-			log_time(ts, logger, timebuf, sizeof(timebuf));
-		}
+	static gchar timebuf[64], modulebuf[64];
+	static gchar tmpbuf[256];
 
-		if (G_UNLIKELY(log_json)) {
-			/* Perform JSON logging */
-			guint slen = id ? strlen(id) : strlen("(NULL)");
-			slen = MIN(RSPAMD_LOG_ID_LEN, slen);
-			r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "{\"ts\": %L, "
-														"\"pid\": %P, "
-														"\"severity\": \"%s\", "
-														"\"worker_type\": \"%s\", "
-														"\"id\": \"%*.s\", "
-														"\"module\": \"%s\", "
-														"\"function\": \"%s\", "
-														"\"message\": \"",
-								(gint64) (ts * 1e6),
-								logger->pid,
-								rspamd_get_log_severity_string(level_flags),
-								logger->process_type,
-								slen, id,
-								module,
-								function);
-			iov[0].iov_base = tmpbuf;
-			iov[0].iov_len = r;
-			iov[1].iov_base = (void *) message;
-			iov[1].iov_len = mlen;
-			iov[2].iov_base = (void *) "\"}\n";
-			iov[2].iov_len = sizeof("\"}\n") - 1;
-
-			return 3;
+	if (!log_json && !log_systemd) {
+		log_time(ts, logger, timebuf, sizeof(timebuf));
+	}
+
+	if (G_UNLIKELY(log_json)) {
+		/* Perform JSON logging */
+		guint slen = id ? strlen(id) : strlen("(NULL)");
+		slen = MIN(RSPAMD_LOG_ID_LEN, slen);
+		r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "{\"ts\": %L, "
+													"\"pid\": %P, "
+													"\"severity\": \"%s\", "
+													"\"worker_type\": \"%s\", "
+													"\"id\": \"%*.s\", "
+													"\"module\": \"%s\", "
+													"\"function\": \"%s\", "
+													"\"message\": \"",
+							(gint64) (ts * 1e6),
+							logger->pid,
+							rspamd_get_log_severity_string(level_flags),
+							logger->process_type,
+							slen, id,
+							module,
+							function);
+		iov_ctx->iov[0].iov_base = tmpbuf;
+		iov_ctx->iov[0].iov_len = r;
+		/* TODO: is it possible to have other 'bad' symbols here? */
+		if (rspamd_memcspn(message, "\"\\\r\n\b\t\v", mlen) == mlen) {
+			iov_ctx->iov[1].iov_base = (void *) message;
+			iov_ctx->iov[1].iov_len = mlen;
 		}
-		else if (G_LIKELY(!log_rspamadm)) {
-			if (!log_systemd) {
-				if (log_color) {
-					if (level_flags & (G_LOG_LEVEL_INFO | G_LOG_LEVEL_MESSAGE)) {
-						/* White */
-						r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "\033[0;37m");
-					}
-					else if (level_flags & G_LOG_LEVEL_WARNING) {
-						/* Magenta */
-						r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "\033[0;32m");
-					}
-					else if (level_flags & G_LOG_LEVEL_CRITICAL) {
-						/* Red */
-						r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "\033[1;31m");
-					}
-					else {
-						r = 0;
-					}
+		else {
+			/* We need to do JSON escaping of the quotes */
+			const char *p, *end = message + mlen;
+			long escaped_len;
+
+			for (p = message, escaped_len = 0; p < end; p++, escaped_len++) {
+				switch (*p) {
+				case '\v':
+				case '\0':
+					escaped_len += 5;
+					break;
+				case '\\':
+				case '"':
+				case '\n':
+				case '\r':
+				case '\b':
+				case '\t':
+					escaped_len++;
+					break;
+				default:
+					break;
 				}
-				else {
-					r = 0;
+			}
+
+
+			struct rspamd_logger_iov_thrash_stack *thrash_stack_elt = g_malloc(
+				sizeof(struct rspamd_logger_iov_thrash_stack) +
+				escaped_len);
+
+			char *dst = ((char *) thrash_stack_elt) + sizeof(struct rspamd_logger_iov_thrash_stack);
+			char *d;
+
+			thrash_stack_elt->prev = iov_ctx->thrash_stack;
+			iov_ctx->thrash_stack = thrash_stack_elt;
+
+			for (p = message, d = dst; p < end; p++, d++) {
+				switch (*p) {
+				case '\n':
+					*d++ = '\\';
+					*d = 'n';
+					break;
+				case '\r':
+					*d++ = '\\';
+					*d = 'r';
+					break;
+				case '\b':
+					*d++ = '\\';
+					*d = 'b';
+					break;
+				case '\t':
+					*d++ = '\\';
+					*d = 't';
+					break;
+				case '\f':
+					*d++ = '\\';
+					*d = 'f';
+					break;
+				case '\0':
+					*d++ = '\\';
+					*d++ = 'u';
+					*d++ = '0';
+					*d++ = '0';
+					*d++ = '0';
+					*d = '0';
+					break;
+				case '\v':
+					*d++ = '\\';
+					*d++ = 'u';
+					*d++ = '0';
+					*d++ = '0';
+					*d++ = '0';
+					*d = 'B';
+					break;
+				case '\\':
+					*d++ = '\\';
+					*d = '\\';
+					break;
+				case '"':
+					*d++ = '\\';
+					*d = '"';
+					break;
+				default:
+					*d = *p;
+					break;
 				}
+			}
 
-				if (log_severity) {
-					r += rspamd_snprintf(tmpbuf + r,
-										 sizeof(tmpbuf) - r,
-										 "%s [%s] #%P(%s) ",
-										 timebuf,
-										 rspamd_get_log_severity_string(level_flags),
-										 logger->pid,
-										 logger->process_type);
+			iov_ctx->iov[1].iov_base = dst;
+			iov_ctx->iov[1].iov_len = d - dst;
+		}
+		iov_ctx->iov[2].iov_base = (void *) "\"}\n";
+		iov_ctx->iov[2].iov_len = sizeof("\"}\n") - 1;
+	}
+	else if (G_LIKELY(!log_rspamadm)) {
+		if (!log_systemd) {
+			if (log_color) {
+				if (level_flags & (G_LOG_LEVEL_INFO | G_LOG_LEVEL_MESSAGE)) {
+					/* White */
+					r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "\033[0;37m");
+				}
+				else if (level_flags & G_LOG_LEVEL_WARNING) {
+					/* Magenta */
+					r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "\033[0;32m");
+				}
+				else if (level_flags & G_LOG_LEVEL_CRITICAL) {
+					/* Red */
+					r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "\033[1;31m");
 				}
 				else {
-					r += rspamd_snprintf(tmpbuf + r,
-										 sizeof(tmpbuf) - r,
-										 "%s #%P(%s) ",
-										 timebuf,
-										 logger->pid,
-										 logger->process_type);
+					r = 0;
 				}
 			}
 			else {
 				r = 0;
+			}
+
+			if (log_severity) {
 				r += rspamd_snprintf(tmpbuf + r,
 									 sizeof(tmpbuf) - r,
-									 "(%s) ",
+									 "%s [%s] #%P(%s) ",
+									 timebuf,
+									 rspamd_get_log_severity_string(level_flags),
+									 logger->pid,
 									 logger->process_type);
 			}
-
-			int mremain, mr;
-			char *m;
-
-			modulebuf[0] = '\0';
-			mremain = sizeof(modulebuf);
-			m = modulebuf;
-
-			if (id != NULL) {
-				guint slen = strlen(id);
-				slen = MIN(RSPAMD_LOG_ID_LEN, slen);
-				mr = rspamd_snprintf(m, mremain, "<%*.s>; ", slen,
-									 id);
-				m += mr;
-				mremain -= mr;
-			}
-			if (module != NULL) {
-				mr = rspamd_snprintf(m, mremain, "%s; ", module);
-				m += mr;
-				mremain -= mr;
-			}
-			if (function != NULL) {
-				mr = rspamd_snprintf(m, mremain, "%s: ", function);
-				m += mr;
-				mremain -= mr;
-			}
 			else {
-				mr = rspamd_snprintf(m, mremain, ": ");
-				m += mr;
-				mremain -= mr;
-			}
-
-			/* Ensure that we have a space at the end */
-			if (m > modulebuf && *(m - 1) != ' ') {
-				*(m - 1) = ' ';
+				r += rspamd_snprintf(tmpbuf + r,
+									 sizeof(tmpbuf) - r,
+									 "%s #%P(%s) ",
+									 timebuf,
+									 logger->pid,
+									 logger->process_type);
 			}
+		}
+		else {
+			r = 0;
+			r += rspamd_snprintf(tmpbuf + r,
+								 sizeof(tmpbuf) - r,
+								 "(%s) ",
+								 logger->process_type);
+		}
 
-			/* Construct IOV for log line */
-			iov[0].iov_base = tmpbuf;
-			iov[0].iov_len = r;
-			iov[1].iov_base = modulebuf;
-			iov[1].iov_len = m - modulebuf;
-			iov[2].iov_base = (void *) message;
-			iov[2].iov_len = mlen;
-			iov[3].iov_base = (void *) &lf_chr;
-			iov[3].iov_len = 1;
-
-			if (log_color) {
-				iov[4].iov_base = "\033[0m";
-				iov[4].iov_len = sizeof("\033[0m") - 1;
+		glong mremain, mr;
+		char *m;
 
-				return 5;
-			}
+		modulebuf[0] = '\0';
+		mremain = sizeof(modulebuf);
+		m = modulebuf;
 
-			return 4;
+		if (id != NULL) {
+			guint slen = strlen(id);
+			slen = MIN(RSPAMD_LOG_ID_LEN, slen);
+			mr = rspamd_snprintf(m, mremain, "<%*.s>; ", slen,
+								 id);
+			m += mr;
+			mremain -= mr;
+		}
+		if (module != NULL) {
+			mr = rspamd_snprintf(m, mremain, "%s; ", module);
+			m += mr;
+			mremain -= mr;
+		}
+		if (function != NULL) {
+			mr = rspamd_snprintf(m, mremain, "%s: ", function);
+			m += mr;
+			mremain -= mr;
 		}
 		else {
-			/* Rspamadm case */
-			int niov = 0;
-
-			if (logger->log_level == G_LOG_LEVEL_DEBUG) {
-				iov[niov].iov_base = (void *) timebuf;
-				iov[niov++].iov_len = strlen(timebuf);
-				iov[niov].iov_base = (void *) " ";
-				iov[niov++].iov_len = 1;
-			}
+			mr = rspamd_snprintf(m, mremain, ": ");
+			m += mr;
+			mremain -= mr;
+		}
 
-			iov[niov].iov_base = (void *) message;
-			iov[niov++].iov_len = mlen;
-			iov[niov].iov_base = (void *) &lf_chr;
-			iov[niov++].iov_len = 1;
+		/* Ensure that we have a space at the end */
+		if (m > modulebuf && *(m - 1) != ' ') {
+			*(m - 1) = ' ';
+		}
 
-			return niov;
+		/* Construct IOV for log line */
+		iov_ctx->iov[0].iov_base = tmpbuf;
+		iov_ctx->iov[0].iov_len = r;
+		iov_ctx->iov[1].iov_base = modulebuf;
+		iov_ctx->iov[1].iov_len = m - modulebuf;
+		iov_ctx->iov[2].iov_base = (void *) message;
+		iov_ctx->iov[2].iov_len = mlen;
+		iov_ctx->iov[3].iov_base = (void *) &lf_chr;
+		iov_ctx->iov[3].iov_len = 1;
+
+		if (log_color) {
+			iov_ctx->iov[4].iov_base = "\033[0m";
+			iov_ctx->iov[4].iov_len = sizeof("\033[0m") - 1;
+		}
+	}
+	else {
+		/* Rspamadm case */
+		int niov = 0;
+
+		if (logger->log_level == G_LOG_LEVEL_DEBUG) {
+			iov_ctx->iov[niov].iov_base = (void *) timebuf;
+			iov_ctx->iov[niov++].iov_len = strlen(timebuf);
+			iov_ctx->iov[niov].iov_base = (void *) " ";
+			iov_ctx->iov[niov++].iov_len = 1;
 		}
+
+		iov_ctx->iov[niov].iov_base = (void *) message;
+		iov_ctx->iov[niov++].iov_len = mlen;
+		iov_ctx->iov[niov].iov_base = (void *) &lf_chr;
+		iov_ctx->iov[niov++].iov_len = 1;
+	}
+}
+
+void rspamd_log_iov_free(struct rspamd_logger_iov_ctx *iov_ctx)
+{
+	struct rspamd_logger_iov_thrash_stack *st = iov_ctx->thrash_stack;
+
+	while (st) {
+		struct rspamd_logger_iov_thrash_stack *nst = st->prev;
+		g_free(st);
+		st = nst;
 	}
 }
\ No newline at end of file
diff --git a/src/libserver/logger/logger_console.c b/src/libserver/logger/logger_console.c
index 9af931faf..7f3c7708f 100644
--- a/src/libserver/logger/logger_console.c
+++ b/src/libserver/logger/logger_console.c
@@ -173,15 +173,15 @@ bool rspamd_log_console_log(const gchar *module, const gchar *id,
 #endif
 
 	now = rspamd_get_calendar_ticks();
-	gsize niov = rspamd_log_fill_iov(NULL, now, module, id,
-									 function, level_flags, message,
-									 mlen, rspamd_log);
-	struct iovec *iov = g_alloca(sizeof(struct iovec) * niov);
-	rspamd_log_fill_iov(iov, now, module, id, function, level_flags, message,
+
+	struct rspamd_logger_iov_ctx iov_ctx;
+	memset(&iov_ctx, 0, sizeof(iov_ctx));
+	rspamd_log_fill_iov(&iov_ctx, now, module, id,
+						function, level_flags, message,
 						mlen, rspamd_log);
 
 again:
-	r = writev(fd, iov, niov);
+	r = writev(fd, iov_ctx.iov, iov_ctx.niov);
 
 	if (r == -1) {
 		if (errno == EAGAIN || errno == EINTR) {
@@ -195,6 +195,7 @@ again:
 			rspamd_file_unlock(fd, FALSE);
 		}
 
+		rspamd_log_iov_free(&iov_ctx);
 		return false;
 	}
 
@@ -205,5 +206,6 @@ again:
 		rspamd_file_unlock(fd, FALSE);
 	}
 
+	rspamd_log_iov_free(&iov_ctx);
 	return true;
 }
\ No newline at end of file
diff --git a/src/libserver/logger/logger_file.c b/src/libserver/logger/logger_file.c
index 490cf0b65..20b04b852 100644
--- a/src/libserver/logger/logger_file.c
+++ b/src/libserver/logger/logger_file.c
@@ -465,13 +465,15 @@ bool rspamd_log_file_log(const gchar *module, const gchar *id,
 		now = rspamd_get_calendar_ticks();
 	}
 
-	gsize niov = rspamd_log_fill_iov(NULL, now, module, id, function, level_flags, message,
-									 mlen, rspamd_log);
-	struct iovec *iov = g_alloca(sizeof(struct iovec) * niov);
-	rspamd_log_fill_iov(iov, now, module, id, function, level_flags, message,
+	struct rspamd_logger_iov_ctx iov_ctx;
+	memset(&iov_ctx, 0, sizeof(iov_ctx));
+	rspamd_log_fill_iov(&iov_ctx, now, module, id, function, level_flags, message,
 						mlen, rspamd_log);
 
-	return file_log_helper(rspamd_log, priv, iov, niov, level_flags);
+	bool ret = file_log_helper(rspamd_log, priv, iov_ctx.iov, iov_ctx.niov, level_flags);
+	rspamd_log_iov_free(&iov_ctx);
+
+	return ret;
 }
 
 void *
diff --git a/src/libserver/logger/logger_private.h b/src/libserver/logger/logger_private.h
index 138f8df91..234a207e5 100644
--- a/src/libserver/logger/logger_private.h
+++ b/src/libserver/logger/logger_private.h
@@ -103,10 +103,20 @@ bool rspamd_log_file_log(const gchar *module, const gchar *id,
 bool rspamd_log_file_on_fork(rspamd_logger_t *logger, struct rspamd_config *cfg,
 							 gpointer arg, GError **err);
 
+struct rspamd_logger_iov_thrash_stack {
+	struct rspamd_logger_iov_thrash_stack *prev;
+	char data[0];
+};
+#define RSPAMD_LOGGER_MAX_IOV 8
+struct rspamd_logger_iov_ctx {
+	struct iovec iov[RSPAMD_LOGGER_MAX_IOV];
+	int niov;
+	struct rspamd_logger_iov_thrash_stack *thrash_stack;
+};
 /**
  * Fills IOV of logger (usable for file/console logging)
  * Warning: this function is NOT reentrant, do not call it twice from a single moment of execution
- * @param iov if NULL just returns the number of elements required in IOV array
+ * @param iov filled by this function
  * @param module
  * @param id
  * @param function
@@ -116,14 +126,20 @@ bool rspamd_log_file_on_fork(rspamd_logger_t *logger, struct rspamd_config *cfg,
  * @param rspamd_log
  * @return number of iov elements being filled
  */
-gsize rspamd_log_fill_iov(struct iovec *iov,
-						  double ts,
-						  const gchar *module, const gchar *id,
-						  const gchar *function,
-						  gint level_flags,
-						  const gchar *message,
-						  gsize mlen,
-						  rspamd_logger_t *rspamd_log);
+void rspamd_log_fill_iov(struct rspamd_logger_iov_ctx *iov_ctx,
+						 double ts,
+						 const gchar *module, const gchar *id,
+						 const gchar *function,
+						 gint level_flags,
+						 const gchar *message,
+						 gsize mlen,
+						 rspamd_logger_t *rspamd_log);
+
+/**
+ * Frees IOV context
+ * @param iov_ctx
+ */
+void rspamd_log_iov_free(struct rspamd_logger_iov_ctx *iov_ctx);
 /**
  * Escape log line by replacing unprintable characters to hex escapes like \xNN
  * @param src


More information about the Commits mailing list