commit d3e5066: [Fix] Fix incomplete utf8 sequences handling
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed May 13 15:42:21 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-05-13 16:22:37 +0100
URL: https://github.com/rspamd/rspamd/commit/d3e506655f0b7335f272c703c070889678bb6718
[Fix] Fix incomplete utf8 sequences handling
---
contrib/replxx/src/conversion.cxx | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/contrib/replxx/src/conversion.cxx b/contrib/replxx/src/conversion.cxx
index 8d724cc3c..ce9bd932b 100644
--- a/contrib/replxx/src/conversion.cxx
+++ b/contrib/replxx/src/conversion.cxx
@@ -54,9 +54,21 @@ ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, cons
while (i < slen && j < dstSize) {
UChar32 uc;
+ auto prev_i = i;
U8_NEXT (sourceStart, i, slen, uc);
if (uc <= 0) {
+ if (U8_IS_LEAD (sourceStart[prev_i])) {
+ auto lead_byte = sourceStart[prev_i];
+ auto trailing_bytes = (((uint8_t)(lead_byte)>=0xc2)+
+ ((uint8_t)(lead_byte)>=0xe0)+
+ ((uint8_t)(lead_byte)>=0xf0));
+
+ if (trailing_bytes + i > slen) {
+ return ConversionResult::sourceExhausted;
+ }
+ }
+
/* Replace with 0xFFFD */
uc = 0x0000FFFD;
}
More information about the Commits
mailing list