commit 8cc97e3: [Project] Implement syntax highlighting for Lua

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Sep 4 10:56:04 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-09-04 11:47:22 +0100
URL: https://github.com/rspamd/rspamd/commit/8cc97e36d499665fb1b876901a50b6b0bf62ead8 (HEAD -> master)

[Project] Implement syntax highlighting for Lua

---
 lualib/lua_lexer.lua    | 162 ++++++++++++++++++++++++++++++++++
 src/rspamadm/lua_repl.c | 229 +++++++++++++++++++++++++++++-------------------
 2 files changed, 302 insertions(+), 89 deletions(-)

diff --git a/lualib/lua_lexer.lua b/lualib/lua_lexer.lua
new file mode 100644
index 000000000..8c751d82f
--- /dev/null
+++ b/lualib/lua_lexer.lua
@@ -0,0 +1,162 @@
+--[[
+Copyright (c) 2019, Vsevolod Stakhov <vsevolod at highsecure.ru>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+--[[ Lua LPEG grammar based on https://github.com/xolox/lua-lxsh/ ]]
+
+
+local lpeg = require "lpeg"
+
+local P = lpeg.P
+local R = lpeg.R
+local S = lpeg.S
+local D = R'09' -- Digits
+local I = R('AZ', 'az', '\127\255') + '_' -- Identifiers
+local B = -(I + D) -- Word boundary
+local SOS = lpeg.P(function(s, i) return i == 1 end) -- start of string
+local EOS = -lpeg.P(1) -- end of string
+
+-- Pattern for long strings and long comments.
+local longstring = #(P'[[' + (P'[' * P'='^0 * '[')) * P(function(input, index)
+  local level = input:match('^%[(=*)%[', index)
+  if level then
+    local _, last = input:find(']' .. level .. ']', index, true)
+    if last then return last + 1 end
+  end
+end)
+
+-- String literals.
+local singlequoted = P"'" * ((1 - S"'\r\n\f\\") + (P'\\' * 1))^0 * "'"
+local doublequoted = P'"' * ((1 - S'"\r\n\f\\') + (P'\\' * 1))^0 * '"'
+
+-- Comments.
+local eol = P'\r\n' + '\n'
+local line = (1 - S'\r\n\f')^0 * eol^-1
+local singleline = P'--' * line
+local multiline = P'--' * longstring
+
+-- Numbers.
+local sign = S'+-'^-1
+local decimal = D^1
+local hexadecimal = P'0' * S'xX' * R('09', 'AF', 'af') ^ 1
+local float = D^1 * P'.' * D^0 + P'.' * D^1
+local maybeexp = (float + decimal) * (S'eE' * sign * D^1)^-1
+
+local function compile_keywords(keywords)
+  local list = {}
+  for word in keywords:gmatch('%S+') do
+    list[#list + 1] = word
+  end
+  -- Sort by length
+  table.sort(list, function(a, b)
+    return #a > #b
+  end)
+
+  local pattern
+  for _, word in ipairs(list) do
+    local p = lpeg.P(word)
+    pattern = pattern and (pattern + p) or p
+  end
+
+  local AB = B + EOS -- ending boundary
+  return pattern * AB
+end
+
+-- Identifiers
+local ident = I * (I + D)^0
+local expr = ('.' * ident)^0
+
+local patterns = {
+  {'whitespace',  S'\r\n\f\t\v '^1},
+  {'constant', (P'true' + 'false' + 'nil') * B},
+  {'string', singlequoted + doublequoted + longstring},
+  {'comment', multiline + singleline},
+  {'number', hexadecimal + maybeexp},
+  {'operator', P'not' + '...' + 'and' + '..' + '~=' + '==' + '>=' + '<='
+      + 'or' + S']{=>^[<;)*(%}+-:,/.#'},
+  {'keyword', compile_keywords([[
+      break do else elseif end for function if in local repeat return then until while
+      ]])},
+  {'identifier', lpeg.Cmt(ident,
+      function(input, index)
+        return expr:match(input, index)
+      end)
+  },
+  {'error', 1},
+}
+
+local compiled
+
+local function compile_patterns()
+  if not compiled then
+    local function process(elt)
+      local n,grammar = elt[1],elt[2]
+      return lpeg.Cc(n) * lpeg.P(grammar) * lpeg.Cp()
+    end
+    local any = process(patterns[1])
+    for i = 2, #patterns do
+      any = any + process(patterns[i])
+    end
+    compiled = any
+  end
+
+  return compiled
+end
+
+local function sync(token, lnum, cnum)
+  local lastidx
+  lnum, cnum = lnum or 1, cnum or 1
+  if token:find '\n' then
+    for i in token:gmatch '()\n' do
+      lnum = lnum + 1
+      lastidx = i
+    end
+    cnum = #token - lastidx + 1
+  else
+    cnum = cnum + #token
+  end
+  return lnum, cnum
+end
+
+local exports = {}
+
+exports.gmatch = function(input)
+  local parser = compile_patterns()
+  local index, lnum, cnum = 1, 1, 1
+
+  return function()
+    local kind, after = parser:match(input, index)
+    if kind and after then
+      local text = input:sub(index, after - 1)
+      local oldlnum, oldcnum = lnum, cnum
+      index = after
+      lnum, cnum = sync(text, lnum, cnum)
+      return kind, text, oldlnum, oldcnum
+    end
+  end
+end
+
+exports.lex_to_table = function(input)
+  local out = {}
+
+  for kind, text, lnum, cnum in exports.gmatch(input) do
+    out[#out + 1] = {kind, text, lnum, cnum}
+  end
+
+  return out
+end
+
+return exports
+
diff --git a/src/rspamadm/lua_repl.c b/src/rspamadm/lua_repl.c
index 997fd4e9d..54a2ed01e 100644
--- a/src/rspamadm/lua_repl.c
+++ b/src/rspamadm/lua_repl.c
@@ -40,7 +40,6 @@ static guint max_history = 2000;
 static gchar *serve = NULL;
 static gchar *exec_line = NULL;
 static gint batch = -1;
-static gboolean per_line = FALSE;
 extern struct rspamd_async_session *rspamadm_session;
 
 static const char *default_history_file = ".rspamd_repl.hist";
@@ -125,8 +124,6 @@ static GOptionEntry entries[] = {
 				"Serve http lua server", NULL},
 		{"batch", 'b', 0, G_OPTION_ARG_NONE, &batch,
 				"Batch execution mode", NULL},
-		{"per-line", 'p', 0, G_OPTION_ARG_NONE, &per_line,
-				"Pass each line of input to the specified lua script", NULL},
 		{"exec", 'e', 0, G_OPTION_ARG_STRING, &exec_line,
 				"Execute specified script", NULL},
 		{"args", 'a', 0, G_OPTION_ARG_STRING_ARRAY, &lua_args,
@@ -228,15 +225,12 @@ rspamadm_lua_load_script (lua_State *L, const gchar *path)
 		return FALSE;
 	}
 
-	if (!per_line) {
-
-		if (lua_repl_thread_call (thread, 0, (void *)path, lua_thread_str_error_cb) != 0) {
-			return FALSE;
-		}
-
-		lua_settop (L, 0);
+	if (lua_repl_thread_call (thread, 0, (void *)path, lua_thread_str_error_cb) != 0) {
+		return FALSE;
 	}
 
+	lua_settop (L, 0);
+
 	return TRUE;
 }
 
@@ -274,24 +268,22 @@ rspamadm_exec_input (lua_State *L, const gchar *input)
 
 	g_string_free (tb, TRUE);
 
-	if (!per_line) {
 
-		top = lua_gettop (L);
+	top = lua_gettop (L);
 
-		if (lua_repl_thread_call (thread, 0, NULL, NULL) == 0) {
-			/* Print output */
-			for (i = top; i <= lua_gettop (L); i++) {
-				if (lua_isfunction (L, i)) {
-					lua_pushvalue (L, i);
-					cbref = luaL_ref (L, LUA_REGISTRYINDEX);
+	if (lua_repl_thread_call (thread, 0, NULL, NULL) == 0) {
+		/* Print output */
+		for (i = top; i <= lua_gettop (L); i++) {
+			if (lua_isfunction (L, i)) {
+				lua_pushvalue (L, i);
+				cbref = luaL_ref (L, LUA_REGISTRYINDEX);
 
-					rspamd_printf ("local function: %d\n", cbref);
-				} else {
-					memset (&tr, 0, sizeof (tr));
-					lua_logger_out_type (L, i, outbuf, sizeof (outbuf), &tr,
-							LUA_ESCAPE_UNPRINTABLE);
-					rspamd_printf ("%s\n", outbuf);
-				}
+				rspamd_printf ("local function: %d\n", cbref);
+			} else {
+				memset (&tr, 0, sizeof (tr));
+				lua_logger_out_type (L, i, outbuf, sizeof (outbuf), &tr,
+						LUA_ESCAPE_UNPRINTABLE);
+				rspamd_printf ("%s\n", outbuf);
 			}
 		}
 	}
@@ -516,8 +508,106 @@ rspamadm_lua_try_dot_command (lua_State *L, const gchar *input)
 	return FALSE;
 }
 
+#ifdef WITH_LUA_REPL
+static gint lex_ref_idx = -1;
+
 static void
-rspamadm_lua_run_repl (lua_State *L)
+lua_syntax_highlighter (const char *str, ReplxxColor *colours, int size, void *ud)
+{
+	lua_State *L = (lua_State *)ud;
+
+	if (lex_ref_idx == -1) {
+		if (!rspamd_lua_require_function (L, "lua_lexer", "lex_to_table")) {
+			fprintf (stderr, "cannot require lua_lexer!\n");
+
+			exit (EXIT_FAILURE);
+		}
+
+		lex_ref_idx = luaL_ref (L, LUA_REGISTRYINDEX);
+	}
+
+	lua_rawgeti (L, LUA_REGISTRYINDEX, lex_ref_idx);
+	lua_pushstring (L, str);
+
+	if (lua_pcall (L, 1, 1, 0) != 0) {
+		fprintf (stderr, "cannot lex a string!\n");
+	}
+	else {
+		/* Process what we have after lexing */
+		gsize nelts = rspamd_lua_table_size (L, -1);
+
+		for (gsize i = 0; i < nelts; i ++) {
+			/*
+			 * Indexes in the table:
+			 * 1 - type of element (string)
+			 * 2 - text (string)
+			 * 3 - line num (int), always 1...
+			 * 4 - column num (must be less than size)
+			 */
+			const gchar *what, *text;
+			gsize column, tlen, cur_top, elt_pos;
+			ReplxxColor elt_color = REPLXX_COLOR_DEFAULT;
+
+			cur_top = lua_gettop (L);
+			lua_rawgeti (L, -1, i + 1);
+			elt_pos = lua_gettop (L);
+			lua_rawgeti (L, elt_pos, 1);
+			what = lua_tostring (L, -1);
+			lua_rawgeti (L, elt_pos, 2);
+			text = lua_tolstring (L, -1, &tlen);
+			lua_rawgeti (L, elt_pos, 4);
+			column = lua_tointeger (L, -1);
+
+			g_assert (column > 0);
+			column --; /* Start from 0 */
+
+			if (column + tlen > size) {
+				/* Likely utf8 case, too complicated to match */
+				lua_settop (L, cur_top);
+				continue;
+			}
+
+			/* Check what and adjust color */
+			if (strcmp (what, "identifier") == 0) {
+				elt_color = REPLXX_COLOR_NORMAL;
+			}
+			else if (strcmp (what, "number") == 0) {
+				elt_color = REPLXX_COLOR_BLUE;
+			}
+			else if (strcmp (what, "string") == 0) {
+				elt_color = REPLXX_COLOR_GREEN;
+			}
+			else if (strcmp (what, "keyword") == 0) {
+				elt_color = REPLXX_COLOR_WHITE;
+			}
+			else if (strcmp (what, "constant") == 0) {
+				elt_color = REPLXX_COLOR_WHITE;
+			}
+			else if (strcmp (what, "operator") == 0) {
+				elt_color = REPLXX_COLOR_CYAN;
+			}
+			else if (strcmp (what, "comment") == 0) {
+				elt_color = REPLXX_COLOR_BRIGHTGREEN;
+			}
+			else if (strcmp (what, "error") == 0) {
+				elt_color = REPLXX_COLOR_ERROR;
+			}
+
+			for (gsize j = column; j < column + tlen; j ++) {
+				colours[j] = elt_color;
+			}
+
+			/* Restore stack */
+			lua_settop (L, cur_top);
+		}
+	}
+
+	lua_settop (L, 0);
+}
+#endif
+
+static void
+rspamadm_lua_run_repl (lua_State *L, bool is_batch)
 {
 	gchar *input;
 	gboolean is_multiline = FALSE;
@@ -546,6 +636,11 @@ rspamadm_lua_run_repl (lua_State *L)
 
 		lua_settop (L, 0);
 #else
+		if (!is_batch) {
+			replxx_set_highlighter_callback (rx_instance, lua_syntax_highlighter,
+					L);
+		}
+
 		if (!is_multiline) {
 			input = (gchar *)replxx_input (rx_instance, MAIN_PROMPT);
 
@@ -555,7 +650,9 @@ rspamadm_lua_run_repl (lua_State *L)
 
 			if (input[0] == '.') {
 				if (rspamadm_lua_try_dot_command (L, input)) {
-					replxx_history_add (rx_instance, input);
+					if (!is_batch) {
+						replxx_history_add (rx_instance, input);
+					}
 					continue;
 				}
 			}
@@ -567,7 +664,9 @@ rspamadm_lua_run_repl (lua_State *L)
 			}
 
 			rspamadm_exec_input (L, input);
-			replxx_history_add (rx_instance, input);
+			if (!is_batch) {
+				replxx_history_add (rx_instance, input);
+			}
 			lua_settop (L, 0);
 		}
 		else {
@@ -589,7 +688,9 @@ rspamadm_lua_run_repl (lua_State *L)
 					}
 				}
 
-				replxx_history_add (rx_instance, tb->str);
+				if (!is_batch) {
+					replxx_history_add (rx_instance, tb->str);
+				}
 				g_string_free (tb, TRUE);
 			}
 			else {
@@ -899,69 +1000,19 @@ rspamadm_lua (gint argc, gchar **argv, const struct rspamadm_command *cmd)
 		g_hash_table_insert (cmds_hash, (gpointer)cmds[i].name, &cmds[i]);
 	}
 
-	if (per_line) {
-		GIOChannel *in;
-		GString *buf;
-		gsize end_pos;
-		GIOStatus ret;
-		gint old_top;
-		GError *err = NULL;
-
-		in = g_io_channel_unix_new (STDIN_FILENO);
-		buf = g_string_sized_new (BUFSIZ);
-
-again:
-		while ((ret = g_io_channel_read_line_string (in, buf, &end_pos, &err)) ==
-				G_IO_STATUS_NORMAL) {
-			old_top = lua_gettop (L);
-			lua_pushvalue (L, -1);
-			lua_pushlstring (L, buf->str, MIN (buf->len, end_pos));
-			lua_setglobal (L, "input");
-
-			struct thread_entry *thread = lua_thread_pool_get_for_config (rspamd_main->cfg);
-			L = thread->lua_state;
 
-			lua_repl_thread_call (thread, 0, NULL, NULL);
-
-			lua_settop (L, old_top);
-		}
-
-		if (ret == G_IO_STATUS_AGAIN) {
-			goto again;
-		}
-
-		g_string_free (buf, TRUE);
-		g_io_channel_shutdown (in, FALSE, NULL);
-
-		if (ret == G_IO_STATUS_EOF) {
-			if (err) {
-				g_error_free (err);
-			}
-		}
-		else {
-			rspamd_fprintf (stderr, "IO error: %e\n", err);
-
-			if (err) {
-				g_error_free (err);
-			}
-
-			exit (-errno);
-		}
-	}
-	else {
 #ifdef WITH_LUA_REPL
-		rx_instance = replxx_init ();
+	rx_instance = replxx_init ();
 #endif
-		if (!batch) {
-			replxx_set_max_history_size (rx_instance, max_history);
-			replxx_history_load (rx_instance, histfile);
-			rspamadm_lua_run_repl (L);
-			replxx_history_save (rx_instance, histfile);
-		} else {
-			rspamadm_lua_run_repl (L);
-		}
+	if (!batch) {
+		replxx_set_max_history_size (rx_instance, max_history);
+		replxx_history_load (rx_instance, histfile);
+		rspamadm_lua_run_repl (L, false);
+		replxx_history_save (rx_instance, histfile);
+	} else {
+		rspamadm_lua_run_repl (L, true);
+	}
 #ifdef WITH_LUA_REPL
-		 replxx_end (rx_instance);
+	replxx_end (rx_instance);
 #endif
-	}
 }


More information about the Commits mailing list