commit d3e5066: [Fix] Fix incomplete utf8 sequences handling

Vsevolod Stakhov vsevolod at highsecure.ru
Wed May 13 15:42:21 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-05-13 16:22:37 +0100
URL: https://github.com/rspamd/rspamd/commit/d3e506655f0b7335f272c703c070889678bb6718

[Fix] Fix incomplete utf8 sequences handling

---
 contrib/replxx/src/conversion.cxx | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/contrib/replxx/src/conversion.cxx b/contrib/replxx/src/conversion.cxx
index 8d724cc3c..ce9bd932b 100644
--- a/contrib/replxx/src/conversion.cxx
+++ b/contrib/replxx/src/conversion.cxx
@@ -54,9 +54,21 @@ ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, cons
 
 		while (i < slen && j < dstSize) {
 			UChar32 uc;
+			auto prev_i = i;
 			U8_NEXT (sourceStart, i, slen, uc);
 
 			if (uc <= 0) {
+				if (U8_IS_LEAD (sourceStart[prev_i])) {
+					auto lead_byte = sourceStart[prev_i];
+					auto trailing_bytes = (((uint8_t)(lead_byte)>=0xc2)+
+							((uint8_t)(lead_byte)>=0xe0)+
+							((uint8_t)(lead_byte)>=0xf0));
+
+					if (trailing_bytes + i > slen) {
+						return ConversionResult::sourceExhausted;
+					}
+				}
+
 				/* Replace with 0xFFFD */
 				uc = 0x0000FFFD;
 			}


More information about the Commits mailing list