commit 5f80e2e: [Rework] Slightly improve old regexp API
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Jul 15 19:07:05 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-07-15 14:23:17 +0100
URL: https://github.com/rspamd/rspamd/commit/5f80e2e32e9e82c769b3f28be29cff4f62d9ecce
[Rework] Slightly improve old regexp API
---
src/libutil/regexp.c | 49 +++++++++++++++++++++++++++----------------------
src/libutil/regexp.h | 34 ++++++++++++++++++++++------------
2 files changed, 49 insertions(+), 34 deletions(-)
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
index 93b8125e4..6ec5b4d39 100644
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -306,10 +306,10 @@ rspamd_regexp_post_process (rspamd_regexp_t *r)
}
rspamd_regexp_t*
-rspamd_regexp_new (const gchar *pattern, const gchar *flags,
+rspamd_regexp_new_len (const gchar *pattern, gsize len, const gchar *flags,
GError **err)
{
- const gchar *start = pattern, *end, *flags_str = NULL;
+ const gchar *start = pattern, *end = start + len, *flags_str = NULL;
gchar *err_str;
rspamd_regexp_t *res;
gboolean explicit_utf = FALSE;
@@ -331,7 +331,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
return NULL;
}
- if (flags == NULL) {
+ if (flags == NULL && start < end) {
/* We need to parse pattern and detect flags set */
if (*start == '/') {
sep = '/';
@@ -347,14 +347,13 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
rspamd_flags |= RSPAMD_REGEXP_FLAG_FULL_MATCH;
}
- if (sep == '\0' || g_ascii_isalnum (sep)) {
+ if (g_ascii_isalnum (sep)) {
/* We have no flags, no separators and just use all line as expr */
start = pattern;
- end = start + strlen (pattern);
rspamd_flags &= ~RSPAMD_REGEXP_FLAG_FULL_MATCH;
}
else {
- end = strrchr (pattern, sep);
+ end = rspamd_memrchr(pattern, sep, len);
if (end == NULL || end <= start) {
g_set_error (err, rspamd_regexp_quark(), EINVAL,
@@ -370,7 +369,6 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
/* Strictly check all flags */
strict_flags = TRUE;
start = pattern;
- end = pattern + strlen (pattern);
flags_str = flags;
}
@@ -384,7 +382,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
#endif
if (flags_str != NULL) {
- while (*flags_str) {
+ while (flags_str < end) {
switch (*flags_str) {
case 'i':
regexp_flags |= PCRE_FLAG(CASELESS);
@@ -535,9 +533,16 @@ fin:
return res;
}
+rspamd_regexp_t *
+rspamd_regexp_new (const gchar *pattern, const gchar *flags,
+ GError **err)
+{
+ return rspamd_regexp_new_len(pattern, strlen(pattern), flags, err);
+}
+
#ifndef WITH_PCRE2
gboolean
-rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
+rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len,
const gchar **start, const gchar **end, gboolean raw,
GArray *captures)
{
@@ -672,7 +677,7 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
#else
/* PCRE 2 version */
gboolean
-rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
+rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len,
const gchar **start, const gchar **end, gboolean raw,
GArray *captures)
{
@@ -788,7 +793,7 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
#endif
const char*
-rspamd_regexp_get_pattern (rspamd_regexp_t *re)
+rspamd_regexp_get_pattern (const rspamd_regexp_t *re)
{
g_assert (re != NULL);
@@ -808,7 +813,7 @@ rspamd_regexp_set_flags (rspamd_regexp_t *re, guint new_flags)
}
guint
-rspamd_regexp_get_flags (rspamd_regexp_t *re)
+rspamd_regexp_get_flags (const rspamd_regexp_t *re)
{
g_assert (re != NULL);
@@ -816,7 +821,7 @@ rspamd_regexp_get_flags (rspamd_regexp_t *re)
}
guint
-rspamd_regexp_get_pcre_flags (rspamd_regexp_t *re)
+rspamd_regexp_get_pcre_flags (const rspamd_regexp_t *re)
{
g_assert (re != NULL);
@@ -824,7 +829,7 @@ rspamd_regexp_get_pcre_flags (rspamd_regexp_t *re)
}
gint
-rspamd_regexp_get_nbackrefs (rspamd_regexp_t *re)
+rspamd_regexp_get_nbackrefs (const rspamd_regexp_t *re)
{
g_assert (re != NULL);
@@ -832,7 +837,7 @@ rspamd_regexp_get_nbackrefs (rspamd_regexp_t *re)
}
gint
-rspamd_regexp_get_ncaptures (rspamd_regexp_t *re)
+rspamd_regexp_get_ncaptures (const rspamd_regexp_t *re)
{
g_assert (re != NULL);
@@ -840,7 +845,7 @@ rspamd_regexp_get_ncaptures (rspamd_regexp_t *re)
}
guint
-rspamd_regexp_get_maxhits (rspamd_regexp_t *re)
+rspamd_regexp_get_maxhits (const rspamd_regexp_t *re)
{
g_assert (re != NULL);
@@ -860,7 +865,7 @@ rspamd_regexp_set_maxhits (rspamd_regexp_t *re, guint new_maxhits)
}
guint64
-rspamd_regexp_get_cache_id (rspamd_regexp_t *re)
+rspamd_regexp_get_cache_id (const rspamd_regexp_t *re)
{
g_assert (re != NULL);
@@ -880,7 +885,7 @@ rspamd_regexp_set_cache_id (rspamd_regexp_t *re, guint64 id)
}
gboolean
-rspamd_regexp_match (rspamd_regexp_t *re, const gchar *text, gsize len,
+rspamd_regexp_match (const rspamd_regexp_t *re, const gchar *text, gsize len,
gboolean raw)
{
const gchar *start = NULL, *end = NULL;
@@ -888,7 +893,7 @@ rspamd_regexp_match (rspamd_regexp_t *re, const gchar *text, gsize len,
g_assert (re != NULL);
g_assert (text != NULL);
- if (len == 0 && text != NULL) {
+ if (len == 0) {
len = strlen (text);
}
@@ -926,7 +931,7 @@ rspamd_regexp_set_ud (rspamd_regexp_t *re, gpointer ud)
}
gpointer
-rspamd_regexp_get_ud (rspamd_regexp_t *re)
+rspamd_regexp_get_ud (const rspamd_regexp_t *re)
{
g_assert (re != NULL);
@@ -1169,7 +1174,7 @@ rspamd_regexp_library_init (struct rspamd_config *cfg)
}
gpointer
-rspamd_regexp_get_id (rspamd_regexp_t *re)
+rspamd_regexp_get_id (const rspamd_regexp_t *re)
{
g_assert (re != NULL);
@@ -1177,7 +1182,7 @@ rspamd_regexp_get_id (rspamd_regexp_t *re)
}
gpointer
-rspamd_regexp_get_class (rspamd_regexp_t *re)
+rspamd_regexp_get_class (const rspamd_regexp_t *re)
{
g_assert (re != NULL);
diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h
index 128edd761..1e98b7b3c 100644
--- a/src/libutil/regexp.h
+++ b/src/libutil/regexp.h
@@ -60,6 +60,16 @@ struct rspamd_re_capture {
rspamd_regexp_t *rspamd_regexp_new (const gchar *pattern, const gchar *flags,
GError **err);
+/**
+ * Create new rspamd regexp
+ * @param pattern regexp pattern
+ * @param flags flags (may be enclosed inside pattern)
+ * @param err error pointer set if compilation failed
+ * @return new regexp object
+ */
+rspamd_regexp_t *rspamd_regexp_new_len (const gchar *pattern, gsize len, const gchar *flags,
+ GError **err);
+
/**
* Search the specified regexp in the text
* @param re
@@ -71,7 +81,7 @@ rspamd_regexp_t *rspamd_regexp_new (const gchar *pattern, const gchar *flags,
* @param captures array of captured strings of type rspamd_fstring_capture or NULL
* @return
*/
-gboolean rspamd_regexp_search (rspamd_regexp_t *re,
+gboolean rspamd_regexp_search (const rspamd_regexp_t *re,
const gchar *text, gsize len,
const gchar **start, const gchar **end, gboolean raw,
GArray *captures);
@@ -84,7 +94,7 @@ gboolean rspamd_regexp_search (rspamd_regexp_t *re,
* @param len
* @return
*/
-gboolean rspamd_regexp_match (rspamd_regexp_t *re,
+gboolean rspamd_regexp_match (const rspamd_regexp_t *re,
const gchar *text, gsize len, gboolean raw);
/**
@@ -110,31 +120,31 @@ void rspamd_regexp_set_ud (rspamd_regexp_t *re, gpointer ud);
* @param re regexp object
* @return opaque pointer
*/
-gpointer rspamd_regexp_get_ud (rspamd_regexp_t *re);
+gpointer rspamd_regexp_get_ud (const rspamd_regexp_t *re);
/**
* Get regexp ID suitable for hashing
* @param re
* @return
*/
-gpointer rspamd_regexp_get_id (rspamd_regexp_t *re);
+gpointer rspamd_regexp_get_id (const rspamd_regexp_t *re);
/**
* Get pattern for the specified regexp object
* @param re
* @return
*/
-const char *rspamd_regexp_get_pattern (rspamd_regexp_t *re);
+const char *rspamd_regexp_get_pattern (const rspamd_regexp_t *re);
/**
* Get PCRE flags for the regexp
*/
-guint rspamd_regexp_get_pcre_flags (rspamd_regexp_t *re);
+guint rspamd_regexp_get_pcre_flags (const rspamd_regexp_t *re);
/**
* Get rspamd flags for the regexp
*/
-guint rspamd_regexp_get_flags (rspamd_regexp_t *re);
+guint rspamd_regexp_get_flags (const rspamd_regexp_t *re);
/**
* Set rspamd flags for the regexp
@@ -144,7 +154,7 @@ guint rspamd_regexp_set_flags (rspamd_regexp_t *re, guint new_flags);
/**
* Set regexp maximum hits
*/
-guint rspamd_regexp_get_maxhits (rspamd_regexp_t *re);
+guint rspamd_regexp_get_maxhits (const rspamd_regexp_t *re);
/**
* Get regexp maximum hits
@@ -154,17 +164,17 @@ guint rspamd_regexp_set_maxhits (rspamd_regexp_t *re, guint new_maxhits);
/**
* Returns number of backreferences in a regexp
*/
-gint rspamd_regexp_get_nbackrefs (rspamd_regexp_t *re);
+gint rspamd_regexp_get_nbackrefs (const rspamd_regexp_t *re);
/**
* Returns number of capture groups in a regexp
*/
-gint rspamd_regexp_get_ncaptures (rspamd_regexp_t *re);
+gint rspamd_regexp_get_ncaptures (const rspamd_regexp_t *re);
/**
* Returns cache id for a regexp
*/
-guint64 rspamd_regexp_get_cache_id (rspamd_regexp_t *re);
+guint64 rspamd_regexp_get_cache_id (const rspamd_regexp_t *re);
/**
* Sets cache id for a regexp
@@ -174,7 +184,7 @@ guint64 rspamd_regexp_set_cache_id (rspamd_regexp_t *re, guint64 id);
/**
* Get regexp class for the re object
*/
-gpointer rspamd_regexp_get_class (rspamd_regexp_t *re);
+gpointer rspamd_regexp_get_class (const rspamd_regexp_t *re);
/**
* Set regexp class for the re object
More information about the Commits
mailing list