commit 96b9470: [Rework] Make http normalize path function a generic function
Vsevolod Stakhov
vsevolod at rspamd.com
Sun Oct 23 21:21:05 UTC 2022
Author: Vsevolod Stakhov
Date: 2022-10-23 21:41:18 +0100
URL: https://github.com/rspamd/rspamd/commit/96b94707c1c6fde1cc2aa06522587114c5c6c809
[Rework] Make http normalize path function a generic function
---
src/controller.c | 12 +-
src/libserver/http/http_router.c | 6 +-
src/libserver/http/http_util.c | 224 --------------------------------------
src/libserver/http/http_util.h | 9 --
src/libserver/hyperscan_tools.cxx | 2 +-
src/libserver/url.c | 4 +-
src/libutil/util.c | 224 ++++++++++++++++++++++++++++++++++++++
src/libutil/util.h | 9 ++
test/lua/unit/url.lua | 2 +-
9 files changed, 246 insertions(+), 246 deletions(-)
diff --git a/src/controller.c b/src/controller.c
index e695d86a4..0ff7d64c0 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -3287,9 +3287,9 @@ rspamd_controller_handle_custom (struct rspamd_http_connection_entry *conn_ent,
lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
lookup.len = u.field_data[UF_PATH].len;
- rspamd_http_normalize_path_inplace ((gchar *)lookup.begin,
- lookup.len,
- &unnorm_len);
+ rspamd_normalize_path_inplace((gchar *) lookup.begin,
+ lookup.len,
+ &unnorm_len);
lookup.len = unnorm_len;
}
else {
@@ -3494,9 +3494,9 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e
lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
lookup.len = u.field_data[UF_PATH].len;
- rspamd_http_normalize_path_inplace ((gchar *)lookup.begin,
- lookup.len,
- &unnorm_len);
+ rspamd_normalize_path_inplace((gchar *) lookup.begin,
+ lookup.len,
+ &unnorm_len);
lookup.len = unnorm_len;
}
else {
diff --git a/src/libserver/http/http_router.c b/src/libserver/http/http_router.c
index 5c4990ab6..a70ea223f 100644
--- a/src/libserver/http/http_router.c
+++ b/src/libserver/http/http_router.c
@@ -302,9 +302,9 @@ rspamd_http_router_finish_handler (struct rspamd_http_connection *conn,
lookup.begin = pathbuf;
lookup.len = u.field_data[UF_PATH].len;
- rspamd_http_normalize_path_inplace (pathbuf,
- lookup.len,
- &unnorm_len);
+ rspamd_normalize_path_inplace(pathbuf,
+ lookup.len,
+ &unnorm_len);
lookup.len = unnorm_len;
}
else {
diff --git a/src/libserver/http/http_util.c b/src/libserver/http/http_util.c
index fd5adb3c1..c9035375b 100644
--- a/src/libserver/http/http_util.c
+++ b/src/libserver/http/http_util.c
@@ -299,228 +299,4 @@ rspamd_http_date_format (gchar *buf, gsize len, time_t time)
http_week[tms.tm_wday], tms.tm_mday,
http_month[tms.tm_mon], tms.tm_year + 1900,
tms.tm_hour, tms.tm_min, tms.tm_sec);
-}
-
-void
-rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
-{
- const gchar *p, *end, *slash = NULL, *dot = NULL;
- gchar *o;
- enum {
- st_normal = 0,
- st_got_dot,
- st_got_dot_dot,
- st_got_slash,
- st_got_slash_slash,
- } state = st_normal;
-
- p = path;
- end = path + len;
- o = path;
-
- while (p < end) {
- switch (state) {
- case st_normal:
- if (G_UNLIKELY (*p == '/')) {
- state = st_got_slash;
- slash = p;
- }
- else if (G_UNLIKELY (*p == '.')) {
- state = st_got_dot;
- dot = p;
- }
- else {
- *o++ = *p;
- }
- p ++;
- break;
- case st_got_slash:
- if (G_UNLIKELY (*p == '/')) {
- /* Ignore double slash */
- *o++ = *p;
- state = st_got_slash_slash;
- }
- else if (G_UNLIKELY (*p == '.')) {
- dot = p;
- state = st_got_dot;
- }
- else {
- *o++ = '/';
- *o++ = *p;
- slash = NULL;
- dot = NULL;
- state = st_normal;
- }
- p ++;
- break;
- case st_got_slash_slash:
- if (G_LIKELY (*p != '/')) {
- slash = p - 1;
- dot = NULL;
- state = st_normal;
- continue;
- }
- p ++;
- break;
- case st_got_dot:
- if (G_UNLIKELY (*p == '/')) {
- /* Remove any /./ or ./ paths */
- if (((o > path && *(o - 1) != '/') || (o == path)) && slash) {
- /* Preserve one slash */
- *o++ = '/';
- }
-
- slash = p;
- dot = NULL;
- /* Ignore last slash */
- state = st_normal;
- }
- else if (*p == '.') {
- /* Double dot character */
- state = st_got_dot_dot;
- }
- else {
- /* We have something like .some or /.some */
- if (dot && p > dot) {
- if (slash == dot - 1 && (o > path && *(o - 1) != '/')) {
- /* /.blah */
- memmove (o, slash, p - slash);
- o += p - slash;
- }
- else {
- memmove (o, dot, p - dot);
- o += p - dot;
- }
- }
-
- slash = NULL;
- dot = NULL;
- state = st_normal;
- continue;
- }
-
- p ++;
- break;
- case st_got_dot_dot:
- if (*p == '/') {
- /* We have something like /../ or ../ */
- if (slash) {
- /* We need to remove the last component from o if it is there */
- if (o > path + 2 && *(o - 1) == '/') {
- slash = rspamd_memrchr (path, '/', o - path - 2);
- }
- else if (o > path + 1) {
- slash = rspamd_memrchr (path, '/', o - path - 1);
- }
- else {
- slash = NULL;
- }
-
- if (slash) {
- o = (gchar *)slash;
- }
- /* Otherwise we keep these dots */
- slash = p;
- state = st_got_slash;
- }
- else {
- /* We have something like bla../, so we need to copy it as is */
- if (o > path && dot && p > dot) {
- memmove (o, dot, p - dot);
- o += p - dot;
- }
-
- slash = NULL;
- dot = NULL;
- state = st_normal;
- continue;
- }
- }
- else {
- /* We have something like ..bla or ... */
- if (slash) {
- *o ++ = '/';
- }
-
- if (dot && p > dot) {
- memmove (o, dot, p - dot);
- o += p - dot;
- }
-
- slash = NULL;
- dot = NULL;
- state = st_normal;
- continue;
- }
-
- p ++;
- break;
- }
- }
-
- /* Leftover */
- switch (state) {
- case st_got_dot_dot:
- /* Trailing .. */
- if (slash) {
- /* We need to remove the last component from o if it is there */
- if (o > path + 2 && *(o - 1) == '/') {
- slash = rspamd_memrchr (path, '/', o - path - 2);
- }
- else if (o > path + 1) {
- slash = rspamd_memrchr (path, '/', o - path - 1);
- }
- else {
- if (o == path) {
- /* Corner case */
- *o++ = '/';
- }
-
- slash = NULL;
- }
-
- if (slash) {
- /* Remove last / */
- o = (gchar *)slash;
- }
- }
- else {
- /* Corner case */
- if (o == path) {
- *o++ = '/';
- }
- else {
- if (dot && p > dot) {
- memmove (o, dot, p - dot);
- o += p - dot;
- }
- }
- }
- break;
- case st_got_dot:
- if (slash) {
- /* /. -> must be / */
- *o++ = '/';
- }
- else {
- if (o > path) {
- *o++ = '.';
- }
- }
- break;
- case st_got_slash:
- *o++ = '/';
- break;
- default:
-#if 0
- if (o > path + 1 && *(o - 1) == '/') {
- o --;
- }
-#endif
- break;
- }
-
- if (nlen) {
- *nlen = (o - path);
- }
}
\ No newline at end of file
diff --git a/src/libserver/http/http_util.h b/src/libserver/http/http_util.h
index 19b497f30..3d8356c6d 100644
--- a/src/libserver/http/http_util.h
+++ b/src/libserver/http/http_util.h
@@ -40,15 +40,6 @@ time_t rspamd_http_parse_date (const gchar *header, gsize len);
*/
glong rspamd_http_date_format (gchar *buf, gsize len, time_t time);
-/**
- * Normalize HTTP path removing dot sequences and repeating '/' symbols as
- * per rfc3986#section-5.2
- * @param path
- * @param len
- * @param nlen
- */
-void rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
-
#ifdef __cplusplus
}
#endif
diff --git a/src/libserver/hyperscan_tools.cxx b/src/libserver/hyperscan_tools.cxx
index 6ec5f7c36..bb1c9ffbc 100644
--- a/src/libserver/hyperscan_tools.cxx
+++ b/src/libserver/hyperscan_tools.cxx
@@ -140,7 +140,7 @@ public:
auto mut_fname = std::string{fname};
std::size_t sz;
- rspamd_http_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
+ rspamd_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
mut_fname.resize(sz);
auto dir = hs_known_files_cache::get_dir(mut_fname);
auto ext = hs_known_files_cache::get_extension(mut_fname);
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 805e3d65d..7be9d020a 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -2439,8 +2439,8 @@ rspamd_url_parse (struct rspamd_url *uri,
rspamd_url_shift (uri, unquoted_len, UF_PATH);
/* We now normalize path */
- rspamd_http_normalize_path_inplace (rspamd_url_data_unsafe (uri),
- uri->datalen, &unquoted_len);
+ rspamd_normalize_path_inplace(rspamd_url_data_unsafe (uri),
+ uri->datalen, &unquoted_len);
rspamd_url_shift (uri, unquoted_len, UF_PATH);
}
diff --git a/src/libutil/util.c b/src/libutil/util.c
index 547669536..bc62bb919 100644
--- a/src/libutil/util.c
+++ b/src/libutil/util.c
@@ -2471,3 +2471,227 @@ rspamd_sum_floats (float *ar, gsize *nelts)
*nelts = cnt;
return sum;
}
+
+void
+rspamd_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
+{
+ const gchar *p, *end, *slash = NULL, *dot = NULL;
+ gchar *o;
+ enum {
+ st_normal = 0,
+ st_got_dot,
+ st_got_dot_dot,
+ st_got_slash,
+ st_got_slash_slash,
+ } state = st_normal;
+
+ p = path;
+ end = path + len;
+ o = path;
+
+ while (p < end) {
+ switch (state) {
+ case st_normal:
+ if (G_UNLIKELY (*p == '/')) {
+ state = st_got_slash;
+ slash = p;
+ }
+ else if (G_UNLIKELY (*p == '.')) {
+ state = st_got_dot;
+ dot = p;
+ }
+ else {
+ *o++ = *p;
+ }
+ p ++;
+ break;
+ case st_got_slash:
+ if (G_UNLIKELY (*p == '/')) {
+ /* Ignore double slash */
+ *o++ = *p;
+ state = st_got_slash_slash;
+ }
+ else if (G_UNLIKELY (*p == '.')) {
+ dot = p;
+ state = st_got_dot;
+ }
+ else {
+ *o++ = '/';
+ *o++ = *p;
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ }
+ p ++;
+ break;
+ case st_got_slash_slash:
+ if (G_LIKELY (*p != '/')) {
+ slash = p - 1;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+ p ++;
+ break;
+ case st_got_dot:
+ if (G_UNLIKELY (*p == '/')) {
+ /* Remove any /./ or ./ paths */
+ if (((o > path && *(o - 1) != '/') || (o == path)) && slash) {
+ /* Preserve one slash */
+ *o++ = '/';
+ }
+
+ slash = p;
+ dot = NULL;
+ /* Ignore last slash */
+ state = st_normal;
+ }
+ else if (*p == '.') {
+ /* Double dot character */
+ state = st_got_dot_dot;
+ }
+ else {
+ /* We have something like .some or /.some */
+ if (dot && p > dot) {
+ if (slash == dot - 1 && (o > path && *(o - 1) != '/')) {
+ /* /.blah */
+ memmove (o, slash, p - slash);
+ o += p - slash;
+ }
+ else {
+ memmove (o, dot, p - dot);
+ o += p - dot;
+ }
+ }
+
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+
+ p ++;
+ break;
+ case st_got_dot_dot:
+ if (*p == '/') {
+ /* We have something like /../ or ../ */
+ if (slash) {
+ /* We need to remove the last component from o if it is there */
+ if (o > path + 2 && *(o - 1) == '/') {
+ slash = rspamd_memrchr (path, '/', o - path - 2);
+ }
+ else if (o > path + 1) {
+ slash = rspamd_memrchr (path, '/', o - path - 1);
+ }
+ else {
+ slash = NULL;
+ }
+
+ if (slash) {
+ o = (gchar *)slash;
+ }
+ /* Otherwise we keep these dots */
+ slash = p;
+ state = st_got_slash;
+ }
+ else {
+ /* We have something like bla../, so we need to copy it as is */
+ if (o > path && dot && p > dot) {
+ memmove (o, dot, p - dot);
+ o += p - dot;
+ }
+
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+ }
+ else {
+ /* We have something like ..bla or ... */
+ if (slash) {
+ *o ++ = '/';
+ }
+
+ if (dot && p > dot) {
+ memmove (o, dot, p - dot);
+ o += p - dot;
+ }
+
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+
+ p ++;
+ break;
+ }
+ }
+
+ /* Leftover */
+ switch (state) {
+ case st_got_dot_dot:
+ /* Trailing .. */
+ if (slash) {
+ /* We need to remove the last component from o if it is there */
+ if (o > path + 2 && *(o - 1) == '/') {
+ slash = rspamd_memrchr (path, '/', o - path - 2);
+ }
+ else if (o > path + 1) {
+ slash = rspamd_memrchr (path, '/', o - path - 1);
+ }
+ else {
+ if (o == path) {
+ /* Corner case */
+ *o++ = '/';
+ }
+
+ slash = NULL;
+ }
+
+ if (slash) {
+ /* Remove last / */
+ o = (gchar *)slash;
+ }
+ }
+ else {
+ /* Corner case */
+ if (o == path) {
+ *o++ = '/';
+ }
+ else {
+ if (dot && p > dot) {
+ memmove (o, dot, p - dot);
+ o += p - dot;
+ }
+ }
+ }
+ break;
+ case st_got_dot:
+ if (slash) {
+ /* /. -> must be / */
+ *o++ = '/';
+ }
+ else {
+ if (o > path) {
+ *o++ = '.';
+ }
+ }
+ break;
+ case st_got_slash:
+ *o++ = '/';
+ break;
+ default:
+#if 0
+ if (o > path + 1 && *(o - 1) == '/') {
+ o --;
+ }
+#endif
+ break;
+ }
+
+ if (nlen) {
+ *nlen = (o - path);
+ }
+}
diff --git a/src/libutil/util.h b/src/libutil/util.h
index f9be15d28..f747bce5b 100644
--- a/src/libutil/util.h
+++ b/src/libutil/util.h
@@ -526,6 +526,15 @@ extern const struct rspamd_controller_pbkdf pbkdf_list[];
*/
float rspamd_sum_floats (float *ar, gsize *nelts);
+/**
+ * Normalize file path removing dot sequences and repeating '/' symbols as
+ * per rfc3986#section-5.2
+ * @param path
+ * @param len
+ * @param nlen
+ */
+void rspamd_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
+
#ifdef __cplusplus
}
#endif
diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua
index 2016cc6f4..46eeef277 100644
--- a/test/lua/unit/url.lua
+++ b/test/lua/unit/url.lua
@@ -10,7 +10,7 @@ context("URL check functions", function()
local ffi = require("ffi")
ffi.cdef[[
- void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen);
+ void rspamd_normalize_path_inplace(char *path, size_t len, size_t *nlen);
]]
test_helper.init_url_parser()
More information about the Commits
mailing list