commit 2c5aaa7: [Minor] Add lpeg re module
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Apr 8 12:28:03 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-04-08 11:51:01 +0100
URL: https://github.com/rspamd/rspamd/commit/2c5aaa7e85739d513143e825ed98a64c2e69785b
[Minor] Add lpeg re module
---
CMakeLists.txt | 1 +
contrib/lua-lpeg/lpegre.lua | 267 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 268 insertions(+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cf4223840..dc4a37ca6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1379,6 +1379,7 @@ INSTALL(FILES "contrib/lua-fun/fun.lua" DESTINATION ${LUALIBDIR})
INSTALL(FILES "contrib/lua-argparse/argparse.lua" DESTINATION ${LUALIBDIR})
INSTALL(FILES "contrib/lua-tableshape/tableshape.lua" DESTINATION ${LUALIBDIR})
INSTALL(FILES "contrib/lua-lupa/lupa.lua" DESTINATION ${LUALIBDIR})
+INSTALL(FILES "contrib/lua-lpeg/lpegre.lua" DESTINATION ${LUALIBDIR})
IF(ENABLE_TORCH MATCHES "ON")
INSTALL(FILES "contrib/lua-moses/moses.lua" DESTINATION ${LUALIBDIR})
diff --git a/contrib/lua-lpeg/lpegre.lua b/contrib/lua-lpeg/lpegre.lua
new file mode 100644
index 000000000..3bb8af7d4
--- /dev/null
+++ b/contrib/lua-lpeg/lpegre.lua
@@ -0,0 +1,267 @@
+-- $Id: re.lua $
+
+-- imported functions and modules
+local tonumber, type, print, error = tonumber, type, print, error
+local setmetatable = setmetatable
+local m = require"lpeg"
+
+-- 'm' will be used to parse expressions, and 'mm' will be used to
+-- create expressions; that is, 're' runs on 'm', creating patterns
+-- on 'mm'
+local mm = m
+
+-- pattern's metatable
+local mt = getmetatable(mm.P(0))
+
+
+
+-- No more global accesses after this point
+local version = _VERSION
+if version == "Lua 5.2" then _ENV = nil end
+
+
+local any = m.P(1)
+
+
+-- Pre-defined names
+local Predef = { nl = m.P"\n" }
+
+
+local mem
+local fmem
+local gmem
+
+
+local function updatelocale ()
+ mm.locale(Predef)
+ Predef.a = Predef.alpha
+ Predef.c = Predef.cntrl
+ Predef.d = Predef.digit
+ Predef.g = Predef.graph
+ Predef.l = Predef.lower
+ Predef.p = Predef.punct
+ Predef.s = Predef.space
+ Predef.u = Predef.upper
+ Predef.w = Predef.alnum
+ Predef.x = Predef.xdigit
+ Predef.A = any - Predef.a
+ Predef.C = any - Predef.c
+ Predef.D = any - Predef.d
+ Predef.G = any - Predef.g
+ Predef.L = any - Predef.l
+ Predef.P = any - Predef.p
+ Predef.S = any - Predef.s
+ Predef.U = any - Predef.u
+ Predef.W = any - Predef.w
+ Predef.X = any - Predef.x
+ mem = {} -- restart memoization
+ fmem = {}
+ gmem = {}
+ local mt = {__mode = "v"}
+ setmetatable(mem, mt)
+ setmetatable(fmem, mt)
+ setmetatable(gmem, mt)
+end
+
+
+updatelocale()
+
+
+
+local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
+
+
+local function patt_error (s, i)
+ local msg = (#s < i + 20) and s:sub(i)
+ or s:sub(i,i+20) .. "..."
+ msg = ("pattern error near '%s'"):format(msg)
+ error(msg, 2)
+end
+
+local function mult (p, n)
+ local np = mm.P(true)
+ while n >= 1 do
+ if n%2 >= 1 then np = np * p end
+ p = p * p
+ n = n/2
+ end
+ return np
+end
+
+local function equalcap (s, i, c)
+ if type(c) ~= "string" then return nil end
+ local e = #c + i
+ if s:sub(i, e - 1) == c then return e else return nil end
+end
+
+
+local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
+
+local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
+
+local arrow = S * "<-"
+
+local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
+
+name = m.C(name)
+
+
+-- a defined name only have meaning in a given environment
+local Def = name * m.Carg(1)
+
+
+local function getdef (id, defs)
+ local c = defs and defs[id]
+ if not c then error("undefined name: " .. id) end
+ return c
+end
+
+-- match a name and return a group of its corresponding definition
+-- and 'f' (to be folded in 'Suffix')
+local function defwithfunc (f)
+ return m.Cg(Def / getdef * m.Cc(f))
+end
+
+
+local num = m.C(m.R"09"^1) * S / tonumber
+
+local String = "'" * m.C((any - "'")^0) * "'" +
+ '"' * m.C((any - '"')^0) * '"'
+
+
+local defined = "%" * Def / function (c,Defs)
+ local cat = Defs and Defs[c] or Predef[c]
+ if not cat then error ("name '" .. c .. "' undefined") end
+ return cat
+end
+
+local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
+
+local item = (defined + Range + m.C(any)) / m.P
+
+local Class =
+ "["
+ * (m.C(m.P"^"^-1)) -- optional complement symbol
+ * m.Cf(item * (item - "]")^0, mt.__add) /
+ function (c, p) return c == "^" and any - p or p end
+ * "]"
+
+local function adddef (t, k, exp)
+ if t[k] then
+ error("'"..k.."' already defined as a rule")
+ else
+ t[k] = exp
+ end
+ return t
+end
+
+local function firstdef (n, r) return adddef({n}, n, r) end
+
+
+local function NT (n, b)
+ if not b then
+ error("rule '"..n.."' used outside a grammar")
+ else return mm.V(n)
+ end
+end
+
+
+local exp = m.P{ "Exp",
+ Exp = S * ( m.V"Grammar"
+ + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
+ Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
+ * (#seq_follow + patt_error);
+ Prefix = "&" * S * m.V"Prefix" / mt.__len
+ + "!" * S * m.V"Prefix" / mt.__unm
+ + m.V"Suffix";
+ Suffix = m.Cf(m.V"Primary" * S *
+ ( ( m.P"+" * m.Cc(1, mt.__pow)
+ + m.P"*" * m.Cc(0, mt.__pow)
+ + m.P"?" * m.Cc(-1, mt.__pow)
+ + "^" * ( m.Cg(num * m.Cc(mult))
+ + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
+ )
+ + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
+ + m.P"{}" * m.Cc(nil, m.Ct)
+ + defwithfunc(mt.__div)
+ )
+ + "=>" * S * defwithfunc(m.Cmt)
+ + "~>" * S * defwithfunc(m.Cf)
+ ) * S
+ )^0, function (a,b,f) return f(a,b) end );
+ Primary = "(" * m.V"Exp" * ")"
+ + String / mm.P
+ + Class
+ + defined
+ + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
+ function (n, p) return mm.Cg(p, n) end
+ + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
+ + m.P"{}" / mm.Cp
+ + "{~" * m.V"Exp" * "~}" / mm.Cs
+ + "{|" * m.V"Exp" * "|}" / mm.Ct
+ + "{" * m.V"Exp" * "}" / mm.C
+ + m.P"." * m.Cc(any)
+ + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
+ Definition = name * arrow * m.V"Exp";
+ Grammar = m.Cg(m.Cc(true), "G") *
+ m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0,
+ adddef) / mm.P
+}
+
+local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
+
+
+local function compile (p, defs)
+ if mm.type(p) == "pattern" then return p end -- already compiled
+ local cp = pattern:match(p, 1, defs)
+ if not cp then error("incorrect pattern", 3) end
+ return cp
+end
+
+local function match (s, p, i)
+ local cp = mem[p]
+ if not cp then
+ cp = compile(p)
+ mem[p] = cp
+ end
+ return cp:match(s, i or 1)
+end
+
+local function find (s, p, i)
+ local cp = fmem[p]
+ if not cp then
+ cp = compile(p) / 0
+ cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
+ fmem[p] = cp
+ end
+ local i, e = cp:match(s, i or 1)
+ if i then return i, e - 1
+ else return i
+ end
+end
+
+local function gsub (s, p, rep)
+ local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
+ gmem[p] = g
+ local cp = g[rep]
+ if not cp then
+ cp = compile(p)
+ cp = mm.Cs((cp / rep + 1)^0)
+ g[rep] = cp
+ end
+ return cp:match(s)
+end
+
+
+-- exported names
+local re = {
+ compile = compile,
+ match = match,
+ find = find,
+ gsub = gsub,
+ updatelocale = updatelocale,
+}
+
+if version == "Lua 5.1" then _G.re = re end
+
+return re
More information about the Commits
mailing list