commit 4d436a6: [Fix] Fix issues found
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Nov 15 18:56:12 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-11-15 18:25:15 +0000
URL: https://github.com/rspamd/rspamd/commit/4d436a67f1c15b4c1e47864a7d7f267162a7da79
[Fix] Fix issues found
---
src/libmime/mime_encoding.c | 13 ++++++-------
src/libserver/protocol.c | 4 ++--
src/libutil/str_util.c | 23 +++++++++++++----------
test/lua/unit/utf.lua | 4 ++--
4 files changed, 23 insertions(+), 21 deletions(-)
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
index 942358d11..17da31f30 100644
--- a/src/libmime/mime_encoding.c
+++ b/src/libmime/mime_encoding.c
@@ -477,30 +477,29 @@ rspamd_mime_charset_utf_enforce (gchar *in, gsize len)
p = in;
end = in + len;
- while (p < end && len > 0 && (err_offset = rspamd_fast_utf8_validate (p, len) > 0)) {
+ while (p < end && len > 0 && (err_offset = rspamd_fast_utf8_validate (p, len)) > 0) {
+ err_offset --; /* As it returns it 1 indexed */
goffset cur_offset = err_offset;
while (cur_offset < len) {
goffset tmp = cur_offset;
- U8_NEXT (in, cur_offset, len, uc);
+ U8_NEXT (p, cur_offset, len, uc);
if (uc > 0) {
/* Fill string between err_offset and tmp with `?` character */
- memset (in + err_offset, '?',
- tmp - err_offset);
+ memset (p + err_offset - 1, '?', tmp - err_offset);
break;
}
}
if (uc < 0) {
/* Fill till the end */
- memset (p + err_offset, '?',
- len - err_offset);
+ memset (p + err_offset, '?', len - err_offset);
break;
}
- p = in + cur_offset;
+ p += cur_offset;
len = end - p;
}
}
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index c457fc455..e66101bb4 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -925,11 +925,11 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
goffset err_offset;
- if ((err_offset = rspamd_fast_utf8_validate (url->host, url->hostlen) == 0)) {
+ if ((err_offset = rspamd_fast_utf8_validate (url->host, url->hostlen)) == 0) {
obj = ucl_object_fromlstring (url->host, url->hostlen);
}
else {
- obj = ucl_object_fromlstring (url->host, err_offset);
+ obj = ucl_object_fromlstring (url->host, err_offset - 1);
}
}
else {
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index dd1b139d8..9f4ad1cb0 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -3071,30 +3071,31 @@ rspamd_str_make_utf_valid (const guchar *src, gsize slen,
}
p = src;
- dlen = slen;
+ dlen = slen + 1; /* As we add '\0' */
/* Check space required */
- while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (p, remain) > 0)) {
+ while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (p, remain)) > 0) {
gint i = 0;
+ err_offset --; /* As it returns it 1 indexed */
p += err_offset;
remain -= err_offset;
dlen += err_offset;
- /* Each invalid character of input requires 3 bytes of output */
+ /* Each invalid character of input requires 3 bytes of output (+2 bytes) */
while (i < remain) {
- gint old_i = i;
U8_NEXT (p, i, remain, uc);
if (uc < 0) {
- dlen += 3;
+ dlen += 2;
}
else {
- p += old_i;
- remain -= old_i;
break;
}
}
+
+ p += i;
+ remain -= i;
}
if (pool) {
@@ -3108,8 +3109,9 @@ rspamd_str_make_utf_valid (const guchar *src, gsize slen,
d = dst;
remain = slen;
- while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (p, remain) > 0)) {
+ while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (p, remain)) > 0) {
/* Copy valid */
+ err_offset --; /* As it returns it 1 indexed */
memcpy (d, p, err_offset);
d += err_offset;
@@ -3130,8 +3132,7 @@ rspamd_str_make_utf_valid (const guchar *src, gsize slen,
}
else {
/* Adjust p and remaining stuff and go to the outer cycle */
- p += old_i;
- remain -= old_i;
+ i = old_i;
break;
}
}
@@ -3139,6 +3140,8 @@ rspamd_str_make_utf_valid (const guchar *src, gsize slen,
* Now p is the first valid utf8 character and remain is the rest of the string
* so we can continue our loop
*/
+ p += i;
+ remain -= i;
}
if (err_offset == 0 && remain > 0) {
diff --git a/test/lua/unit/utf.lua b/test/lua/unit/utf.lua
index 75dd33977..2d2c77f67 100644
--- a/test/lua/unit/utf.lua
+++ b/test/lua/unit/utf.lua
@@ -5,7 +5,7 @@ context("UTF8 check functions", function()
ffi.cdef[[
unsigned int rspamd_str_lc_utf8 (char *str, unsigned int size);
unsigned int rspamd_str_lc (char *str, unsigned int size);
- char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen);
+ char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen, void *);
]]
local cases = {
@@ -58,7 +58,7 @@ context("UTF8 check functions", function()
local buf = ffi.new("char[?]", #c[1] + 1)
ffi.copy(buf, c[1])
- local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL))
+ local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL, NULL))
local function to_hex(s)
return (s:gsub('.', function (c)
return string.format('%02X', string.byte(c))
More information about the Commits
mailing list