commit 175d7ac: [Minor] Update lpeg to the upstream

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Jul 9 12:21:09 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-07-09 13:08:39 +0100
URL: https://github.com/rspamd/rspamd/commit/175d7ac25a9d638e859d677d662ced1cc746c6ce (HEAD -> master)

[Minor] Update lpeg to the upstream

---
 contrib/lua-lpeg/lpcap.c  | 64 +++++++++++++++++++++------------
 contrib/lua-lpeg/lpcap.h  | 20 +++++++++--
 contrib/lua-lpeg/lpcode.c | 84 ++++++++++++++++++++++++++++---------------
 contrib/lua-lpeg/lpcode.h |  6 ++--
 contrib/lua-lpeg/lpvm.c   | 92 +++++++++++++++++++++++++++--------------------
 5 files changed, 169 insertions(+), 97 deletions(-)

diff --git a/contrib/lua-lpeg/lpcap.c b/contrib/lua-lpeg/lpcap.c
index c9085de06..b332fde49 100644
--- a/contrib/lua-lpeg/lpcap.c
+++ b/contrib/lua-lpeg/lpcap.c
@@ -1,5 +1,5 @@
 /*
-** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $
+** $Id: lpcap.c $
 ** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
 */
 
@@ -271,15 +271,15 @@ int finddyncap (Capture *cap, Capture *last) {
 
 
 /*
-** Calls a runtime capture. Returns number of captures removed by
-** the call, including the initial Cgroup. (Captures to be added are
-** on the Lua stack.)
+** Calls a runtime capture. Returns number of captures "removed" by the
+** call, that is, those inside the group capture. Captures to be added
+** are on the Lua stack.
 */
 int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
   int n, id;
   lua_State *L = cs->L;
   int otop = lua_gettop(L);
-  Capture *open = findopen(close);
+  Capture *open = findopen(close);  /* get open group capture */
   assert(captype(open) == Cgroup);
   id = finddyncap(open, close);  /* get first dynamic capture argument */
   close->kind = Cclose;  /* closes the group */
@@ -299,7 +299,7 @@ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
   }
   else
     *rem = 0;  /* no dynamic captures removed */
-  return close - open;  /* number of captures of all kinds removed */
+  return close - open - 1;  /* number of captures to be removed */
 }
 
 
@@ -441,70 +441,88 @@ static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
 }
 
 
+#if !defined(MAXRECLEVEL)
+#define MAXRECLEVEL	200
+#endif
+
+
 /*
 ** Push all values of the current capture into the stack; returns
 ** number of values pushed
 */
 static int pushcapture (CapState *cs) {
   lua_State *L = cs->L;
+  int res;
   luaL_checkstack(L, 4, "too many captures");
+  if (cs->reclevel++ > MAXRECLEVEL)
+    return luaL_error(L, "subcapture nesting too deep");
   switch (captype(cs->cap)) {
     case Cposition: {
       lua_pushinteger(L, cs->cap->s - cs->s + 1);
       cs->cap++;
-      return 1;
+      res = 1;
+      break;
     }
     case Cconst: {
       pushluaval(cs);
       cs->cap++;
-      return 1;
+      res = 1;
+      break;
     }
     case Carg: {
       int arg = (cs->cap++)->idx;
       if (arg + FIXEDARGS > cs->ptop)
         return luaL_error(L, "reference to absent extra argument #%d", arg);
       lua_pushvalue(L, arg + FIXEDARGS);
-      return 1;
+      res = 1;
+      break;
     }
     case Csimple: {
       int k = pushnestedvalues(cs, 1);
       lua_insert(L, -k);  /* make whole match be first result */
-      return k;
+      res = k;
+      break;
     }
     case Cruntime: {
       lua_pushvalue(L, (cs->cap++)->idx);  /* value is in the stack */
-      return 1;
+      res = 1;
+      break;
     }
     case Cstring: {
       luaL_Buffer b;
       luaL_buffinit(L, &b);
       stringcap(&b, cs);
       luaL_pushresult(&b);
-      return 1;
+      res = 1;
+      break;
     }
     case Csubst: {
       luaL_Buffer b;
       luaL_buffinit(L, &b);
       substcap(&b, cs);
       luaL_pushresult(&b);
-      return 1;
+      res = 1;
+      break;
     }
     case Cgroup: {
       if (cs->cap->idx == 0)  /* anonymous group? */
-        return pushnestedvalues(cs, 0);  /* add all nested values */
+        res = pushnestedvalues(cs, 0);  /* add all nested values */
       else {  /* named group: add no values */
         nextcap(cs);  /* skip capture */
-        return 0;
+        res = 0;
       }
+      break;
     }
-    case Cbackref: return backrefcap(cs);
-    case Ctable: return tablecap(cs);
-    case Cfunction: return functioncap(cs);
-    case Cnum: return numcap(cs);
-    case Cquery: return querycap(cs);
-    case Cfold: return foldcap(cs);
-    default: assert(0); return 0;
+    case Cbackref: res = backrefcap(cs); break;
+    case Ctable: res = tablecap(cs); break;
+    case Cfunction: res = functioncap(cs); break;
+    case Cnum: res = numcap(cs); break;
+    case Cquery: res = querycap(cs); break;
+    case Cfold: res = foldcap(cs); break;
+    default: assert(0); res = 0;
   }
+  cs->reclevel--;
+  return res;
 }
 
 
@@ -521,7 +539,7 @@ int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
   int n = 0;
   if (!isclosecap(capture)) {  /* is there any capture? */
     CapState cs;
-    cs.ocap = cs.cap = capture; cs.L = L;
+    cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;
     cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
     do {  /* collect their values */
       n += pushcapture(&cs);
diff --git a/contrib/lua-lpeg/lpcap.h b/contrib/lua-lpeg/lpcap.h
index d762fdcfa..dc10d6969 100644
--- a/contrib/lua-lpeg/lpcap.h
+++ b/contrib/lua-lpeg/lpcap.h
@@ -1,5 +1,5 @@
 /*
-** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $
+** $Id: lpcap.h $
 */
 
 #if !defined(lpcap_h)
@@ -11,8 +11,21 @@
 
 /* kinds of captures */
 typedef enum CapKind {
-  Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
-  Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup
+  Cclose,  /* not used in trees */
+  Cposition,
+  Cconst,  /* ktable[key] is Lua constant */
+  Cbackref,  /* ktable[key] is "name" of group to get capture */
+  Carg,  /* 'key' is arg's number */
+  Csimple,  /* next node is pattern */
+  Ctable,  /* next node is pattern */
+  Cfunction,  /* ktable[key] is function; next node is pattern */
+  Cquery,  /* ktable[key] is table; next node is pattern */
+  Cstring,  /* ktable[key] is string; next node is pattern */
+  Cnum,  /* numbered capture; 'key' is number of value to return */
+  Csubst,  /* substitution capture; next node is pattern */
+  Cfold,  /* ktable[key] is function; next node is pattern */
+  Cruntime,  /* not used in trees (is uses another type for tree) */
+  Cgroup  /* ktable[key] is group's "name" */
 } CapKind;
 
 
@@ -31,6 +44,7 @@ typedef struct CapState {
   int ptop;  /* index of last argument to 'match' */
   const char *s;  /* original string */
   int valuecached;  /* value stored in cache slot */
+  int reclevel;  /* recursion level */
 } CapState;
 
 
diff --git a/contrib/lua-lpeg/lpcode.c b/contrib/lua-lpeg/lpcode.c
index 6feefeb43..392345972 100644
--- a/contrib/lua-lpeg/lpcode.c
+++ b/contrib/lua-lpeg/lpcode.c
@@ -1,5 +1,5 @@
 /*
-** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $
+** $Id: lpcode.c $
 ** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
 */
 
@@ -125,6 +125,27 @@ int tocharset (TTree *tree, Charset *cs) {
 }
 
 
+/*
+** Visit a TCall node taking care to stop recursion. If node not yet
+** visited, return 'f(sib2(tree))', otherwise return 'def' (default
+** value)
+*/
+static int callrecursive (TTree *tree, int f (TTree *t), int def) {
+  int key = tree->key;
+  assert(tree->tag == TCall);
+  assert(sib2(tree)->tag == TRule);
+  if (key == 0)  /* node already visited? */
+    return def;  /* return default value */
+  else {  /* first visit */
+    int result;
+    tree->key = 0;  /* mark call as already visited */
+    result = f(sib2(tree));  /* go to called rule */
+    tree->key = key;  /* restore tree */
+    return result;
+  }
+}
+
+
 /*
 ** Check whether a pattern tree has captures
 */
@@ -134,14 +155,17 @@ int hascaptures (TTree *tree) {
     case TCapture: case TRunTime:
       return 1;
     case TCall:
-      tree = sib2(tree); goto tailcall;  /* return hascaptures(sib2(tree)); */
+      return callrecursive(tree, hascaptures, 0);
+    case TRule:  /* do not follow siblings */
+      tree = sib1(tree); goto tailcall;
     case TOpenCall: assert(0);
     default: {
       switch (numsiblings[tree->tag]) {
         case 1:  /* return hascaptures(sib1(tree)); */
           tree = sib1(tree); goto tailcall;
         case 2:
-          if (hascaptures(sib1(tree))) return 1;
+          if (hascaptures(sib1(tree)))
+            return 1;
           /* else return hascaptures(sib2(tree)); */
           tree = sib2(tree); goto tailcall;
         default: assert(numsiblings[tree->tag] == 0); return 0;
@@ -208,9 +232,9 @@ int checkaux (TTree *tree, int pred) {
 
 /*
 ** number of characters to match a pattern (or -1 if variable)
-** ('count' avoids infinite loops for grammars)
 */
-int fixedlenx (TTree *tree, int count, int len) {
+int fixedlen (TTree *tree) {
+  int len = 0;  /* to accumulate in tail calls */
  tailcall:
   switch (tree->tag) {
     case TChar: case TSet: case TAny:
@@ -220,26 +244,29 @@ int fixedlenx (TTree *tree, int count, int len) {
     case TRep: case TRunTime: case TOpenCall:
       return -1;
     case TCapture: case TRule: case TGrammar:
-      /* return fixedlenx(sib1(tree), count); */
+      /* return fixedlen(sib1(tree)); */
       tree = sib1(tree); goto tailcall;
-    case TCall:
-      if (count++ >= MAXRULES)
-        return -1;  /* may be a loop */
-      /* else return fixedlenx(sib2(tree), count); */
-      tree = sib2(tree); goto tailcall;
+    case TCall: {
+      int n1 = callrecursive(tree, fixedlen, -1);
+      if (n1 < 0)
+        return -1;
+      else
+        return len + n1;
+    }
     case TSeq: {
-      len = fixedlenx(sib1(tree), count, len);
-      if (len < 0) return -1;
-      /* else return fixedlenx(sib2(tree), count, len); */
-      tree = sib2(tree); goto tailcall;
+      int n1 = fixedlen(sib1(tree));
+      if (n1 < 0)
+        return -1;
+      /* else return fixedlen(sib2(tree)) + len; */
+      len += n1; tree = sib2(tree); goto tailcall;
     }
     case TChoice: {
-      int n1, n2;
-      n1 = fixedlenx(sib1(tree), count, len);
-      if (n1 < 0) return -1;
-      n2 = fixedlenx(sib2(tree), count, len);
-      if (n1 == n2) return n1;
-      else return -1;
+      int n1 = fixedlen(sib1(tree));
+      int n2 = fixedlen(sib2(tree));
+      if (n1 != n2 || n1 < 0)
+        return -1;
+      else
+        return len + n1;
     }
     default: assert(0); return 0;
   };
@@ -248,7 +275,7 @@ int fixedlenx (TTree *tree, int count, int len) {
 
 /*
 ** Computes the 'first set' of a pattern.
-** The result is a conservative approximation:
+** The result is a conservative aproximation:
 **   match p ax -> x (for some x) ==> a belongs to first(p)
 ** or
 **   a not in first(p) ==> match p ax -> fail (for all x)
@@ -710,9 +737,10 @@ static void codeand (CompileState *compst, TTree *tree, int tt) {
 
 
 /*
-** Captures: if pattern has fixed (and not too big) length, use
-** a single IFullCapture instruction after the match; otherwise,
-** enclose the pattern with OpenCapture - CloseCapture.
+** Captures: if pattern has fixed (and not too big) length, and it
+** has no nested captures, use a single IFullCapture instruction
+** after the match; otherwise, enclose the pattern with OpenCapture -
+** CloseCapture.
 */
 static void codecapture (CompileState *compst, TTree *tree, int tt,
                          const Charset *fl) {
@@ -737,13 +765,13 @@ static void coderuntime (CompileState *compst, TTree *tree, int tt) {
 
 
 /*
-** Repetition; optimizations:
+** Repetion; optimizations:
 ** When pattern is a charset, can use special instruction ISpan.
 ** When pattern is head fail, or if it starts with characters that
 ** are disjoint from what follows the repetions, a simple test
 ** is enough (a fail inside the repetition would backtrack to fail
 ** again in the following pattern, so there is no need for a choice).
-** When 'opt' is true, the repetition can reuse the Choice already
+** When 'opt' is true, the repetion can reuse the Choice already
 ** active in the stack.
 */
 static void coderep (CompileState *compst, TTree *tree, int opt,
@@ -884,7 +912,7 @@ static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
 
 
 /*
-** Main code-generation function: dispatch to auxiliary functions
+** Main code-generation function: dispatch to auxiliar functions
 ** according to kind of tree. ('needfollow' should return true
 ** only for consructions that use 'fl'.)
 */
diff --git a/contrib/lua-lpeg/lpcode.h b/contrib/lua-lpeg/lpcode.h
index 896d3c79a..34ee27637 100644
--- a/contrib/lua-lpeg/lpcode.h
+++ b/contrib/lua-lpeg/lpcode.h
@@ -1,5 +1,5 @@
 /*
-** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $
+** $Id: lpcode.h $
 */
 
 #if !defined(lpcode_h)
@@ -13,7 +13,7 @@
 
 int tocharset (TTree *tree, Charset *cs);
 int checkaux (TTree *tree, int pred);
-int fixedlenx (TTree *tree, int count, int len);
+int fixedlen (TTree *tree);
 int hascaptures (TTree *tree);
 int lp_gc (lua_State *L);
 Instruction *compile (lua_State *L, Pattern *p);
@@ -35,8 +35,6 @@ int sizei (const Instruction *i);
 */
 #define nullable(t)	checkaux(t, PEnullable)
 
-#define fixedlen(t)     fixedlenx(t, 0, 0)
-
 
 
 #endif
diff --git a/contrib/lua-lpeg/lpvm.c b/contrib/lua-lpeg/lpvm.c
index e107292e2..3e67fcaf0 100644
--- a/contrib/lua-lpeg/lpvm.c
+++ b/contrib/lua-lpeg/lpvm.c
@@ -151,16 +151,29 @@ typedef struct Stack {
 
 
 /*
-** Double the size of the array of captures
+** Ensures the size of array 'capture' (with size '*capsize' and
+** 'captop' elements being used) is enough to accomodate 'n' extra
+** elements plus one.  (Because several opcodes add stuff to the capture
+** array, it is simpler to ensure the array always has at least one free
+** slot upfront and check its size later.)
 */
-static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
-  Capture *newc;
-  if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
-    luaL_error(L, "too many captures");
-  newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
-  memcpy(newc, cap, captop * sizeof(Capture));
-  lua_replace(L, caplistidx(ptop));
-  return newc;
+static Capture *growcap (lua_State *L, Capture *capture, int *capsize,
+                                       int captop, int n, int ptop) {
+  if (*capsize - captop > n)
+    return capture;  /* no need to grow array */
+  else {  /* must grow */
+    Capture *newc;
+    int newsize = captop + n + 1;  /* minimum size needed */
+    if (newsize < INT_MAX/((int)sizeof(Capture) * 2))
+      newsize *= 2;  /* twice that size, if not too big */
+    else if (newsize >= INT_MAX/((int)sizeof(Capture)))
+      luaL_error(L, "too many captures");
+    newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture));
+    memcpy(newc, capture, captop * sizeof(Capture));
+    *capsize = newsize;
+    lua_replace(L, caplistidx(ptop));
+    return newc;
+  }
 }
 
 
@@ -213,24 +226,24 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
 
 
 /*
-** Add capture values returned by a dynamic capture to the capture list
-** 'base', nested inside a group capture. 'fd' indexes the first capture
-** value, 'n' is the number of values (at least 1).
+** Add capture values returned by a dynamic capture to the list
+** 'capture', nested inside a group. 'fd' indexes the first capture
+** value, 'n' is the number of values (at least 1). The open group
+** capture is already in 'capture', before the place for the new entries.
 */
-static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
+static void adddyncaptures (const char *s, Capture *capture, int n, int fd) {
   int i;
-  /* Cgroup capture is already there */
-  assert(base[0].kind == Cgroup && base[0].siz == 0);
-  base[0].idx = 0;  /* make it an anonymous group */
-  for (i = 1; i <= n; i++) {  /* add runtime captures */
-    base[i].kind = Cruntime;
-    base[i].siz = 1;  /* mark it as closed */
-    base[i].idx = fd + i - 1;  /* stack index of capture value */
-    base[i].s = s;
+  assert(capture[-1].kind == Cgroup && capture[-1].siz == 0);
+  capture[-1].idx = 0;  /* make group capture an anonymous group */
+  for (i = 0; i < n; i++) {  /* add runtime captures */
+    capture[i].kind = Cruntime;
+    capture[i].siz = 1;  /* mark it as closed */
+    capture[i].idx = fd + i;  /* stack index of capture value */
+    capture[i].s = s;
   }
-  base[i].kind = Cclose;  /* close group */
-  base[i].siz = 1;
-  base[i].s = s;
+  capture[n].kind = Cclose;  /* close group */
+  capture[n].siz = 1;
+  capture[n].s = s;
 }
 
 
@@ -268,9 +281,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
   for (;;) {
 #if defined(DEBUG)
       printf("s: |%s| stck:%d, dyncaps:%d, caps:%d  ",
-             s, stack - getstackbase(L, ptop), ndyncap, captop);
+             s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop);
       printinst(op, p);
-      printcaplist(capture, capture + captop);
 #endif
     assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
     switch ((Opcode)p->i.code) {
@@ -418,23 +430,27 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
         CapState cs;
         int rem, res, n;
         int fr = lua_gettop(L) + 1;  /* stack index of first result */
-        cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop;
+        cs.reclevel = 0; cs.L = L;
+        cs.s = o; cs.ocap = capture; cs.ptop = ptop;
         n = runtimecap(&cs, capture + captop, s, &rem);  /* call function */
         captop -= n;  /* remove nested captures */
+        ndyncap -= rem;  /* update number of dynamic captures */
         fr -= rem;  /* 'rem' items were popped from Lua stack */
         res = resdyncaptures(L, fr, s - o, e - o);  /* get result */
         if (res == -1)  /* fail? */
           goto fail;
         s = o + res;  /* else update current position */
         n = lua_gettop(L) - fr + 1;  /* number of new captures */
-        ndyncap += n - rem;  /* update number of dynamic captures */
-        if (n > 0) {  /* any new capture? */
-          if ((captop += n + 2) >= capsize) {
-            capture = doublecap(L, capture, captop, ptop);
-            capsize = 2 * captop;
-          }
-          /* add new captures to 'capture' list */
-          adddyncaptures(s, capture + captop - n - 2, n, fr);
+        ndyncap += n;  /* update number of dynamic captures */
+        if (n == 0)  /* no new captures? */
+          captop--;  /* remove open group */
+        else {  /* new captures; keep original open group */
+          if (fr + n >= SHRT_MAX)
+            luaL_error(L, "too many results in match-time capture");
+          /* add new captures + close group to 'capture' list */
+          capture = growcap(L, capture, &capsize, captop, n + 1, ptop);
+          adddyncaptures(s, capture + captop, n, fr);
+          captop += n + 1;  /* new captures + close group */
         }
         p++;
         continue;
@@ -466,10 +482,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
       pushcapture: {
         capture[captop].idx = p->i.key;
         capture[captop].kind = getkind(p);
-        if (++captop >= capsize) {
-          capture = doublecap(L, capture, captop, ptop);
-          capsize = 2 * captop;
-        }
+        captop++;
+        capture = growcap(L, capture, &capsize, captop, 0, ptop);
         p++;
         continue;
       }


More information about the Commits mailing list