commit cd90981: [Minor] HTML: More corner cases in entities decoding

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Jan 24 15:35:05 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-01-24 15:27:59 +0000
URL: https://github.com/rspamd/rspamd/commit/cd90981ee2bb70f0790cd9595da241c989dcd184 (HEAD -> master)

[Minor] HTML: More corner cases in entities decoding

---
 src/libserver/html.c | 69 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 49 insertions(+), 20 deletions(-)

diff --git a/src/libserver/html.c b/src/libserver/html.c
index 400ae3d89..5cadb499a 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -378,9 +378,11 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 				/* First find in entities table */
 				*h = '\0';
 				entity = e + 1;
+				uc = 0;
 
 				if (*entity != '#') {
 					k = kh_get (entity_by_name, html_entity_by_name, entity);
+					*h = ';';
 
 					if (k != kh_end (html_entity_by_name)) {
 						if (kh_val (html_entity_by_name, k)) {
@@ -392,12 +394,18 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 								t += rep_len;
 							}
 						} else {
-							if (end - t >= h - e) {
-								memmove (t, e, h - e);
-								t += h - e;
+							if (end - t > h - e + 1) {
+								memmove (t, e, h - e + 1);
+								t += h - e + 1;
 							}
 						}
 					}
+					else {
+						if (end - t > h - e + 1) {
+							memmove (t, e, h - e + 1);
+							t += h - e + 1;
+						}
+					}
 				}
 				else if (e + 2 < h) {
 					if (*(e + 2) == 'x' || *(e + 2) == 'X') {
@@ -409,6 +417,7 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 					else {
 						base = 10;
 					}
+
 					if (base == 10) {
 						uc = strtoul ((e + 2), &end_ptr, base);
 					}
@@ -418,13 +427,16 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 
 					if (end_ptr != NULL && *end_ptr != '\0') {
 						/* Skip undecoded */
-						if (end - t >= h - e) {
-							memmove (t, e, h - e);
-							t += h - e;
+						*h = ';';
+
+						if (end - t > h - e + 1) {
+							memmove (t, e, h - e + 1);
+							t += h - e + 1;
 						}
 					}
 					else {
 						/* Search for a replacement */
+						*h = ';';
 						k = kh_get (entity_by_number, html_entity_by_number, uc);
 
 						if (k != kh_end (html_entity_by_number)) {
@@ -437,9 +449,9 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 									t += rep_len;
 								}
 							} else {
-								if (end - t >= h - e) {
-									memmove (t, e, h - e);
-									t += h - e;
+								if (end - t > h - e + 1) {
+									memmove (t, e, h - e + 1);
+									t += h - e + 1;
 								}
 							}
 						}
@@ -448,24 +460,41 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 							goffset off = t - s;
 							UBool is_error = 0;
 
-							U8_APPEND (s, off, len, uc, is_error);
-							if (!is_error) {
-								t = s + off;
-							}
-							else {
-								/* Leave invalid entities as is */
-								if (end - t >= h - e) {
-									memmove (t, e, h - e);
-									t += h - e;
+							if (uc > 0) {
+								U8_APPEND (s, off, len, uc, is_error);
+								if (!is_error) {
+									t = s + off;
+								}
+								else {
+									/* Leave invalid entities as is */
+									if (end - t > h - e + 1) {
+										memmove (t, e, h - e + 1);
+										t += h - e + 1;
+									}
 								}
 							}
+							else if (end - t > h - e + 1) {
+								memmove (t, e, h - e + 1);
+								t += h - e + 1;
+							}
 						}
 					}
 				}
 
-				*h = ';';
 				state = 0;
 			}
+			else if (*h == '&') {
+				/* Previous `&` was bogus */
+				state = 1;
+
+				if (end - t > h - e) {
+					memmove (t, e, h - e);
+					t += h - e;
+				}
+
+				e = h;
+			}
+
 			h++;
 
 			break;
@@ -475,7 +504,7 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 	/* Leftover */
 	if (state == 1 && h > e) {
 		/* Unfinished entity, copy as is */
-		if (end - t >= h - e) {
+		if (end - t > h - e) {
 			memmove (t, e, h - e);
 			t += h - e;
 		}


More information about the Commits mailing list