commit 0cbb51f: [Fix] Html: Fix processing of fjlig entity

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Jun 14 10:56:04 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-06-14 11:50:41 +0100
URL: https://github.com/rspamd/rspamd/commit/0cbb51f4c06a63707f1e42646f032eb479b87b55 (HEAD -> master)

[Fix] Html: Fix processing of fjlig entity
TODO: Make a generic fix

---
 src/libserver/html.c          | 12 +++++++-----
 src/libserver/html_entities.h |  2 +-
 test/lua/unit/html.lua        |  3 +++
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/libserver/html.c b/src/libserver/html.c
index fcc6e5684..ca4ad5d38 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -222,14 +222,16 @@ rspamd_html_library_init (void)
 				G_N_ELEMENTS (entities_defs));
 
 		for (i = 0; i < G_N_ELEMENTS (entities_defs); i++) {
-			k = kh_put (entity_by_number, html_entity_by_number,
-					entities_defs[i].code, &rc);
-			kh_val (html_entity_by_number, k) = entities_defs[i].replacement;
+			if (entities_defs[i].code != 0) {
+				k = kh_put (entity_by_number, html_entity_by_number,
+						entities_defs[i].code, &rc);
+				kh_val (html_entity_by_number, k) = entities_defs[i].replacement;
+			}
 
 			k = kh_put (entity_by_name, html_entity_by_name,
 					entities_defs[i].name, &rc);
 			kh_val (html_entity_by_name, k) = entities_defs[i].replacement;
-	}
+		}
 
 		html_color_by_name = kh_init (color_by_name);
 		kh_resize (color_by_name, html_color_by_name,
@@ -238,7 +240,7 @@ rspamd_html_library_init (void)
 		rspamd_ftok_t *keys;
 
 		keys = g_malloc0 (sizeof (rspamd_ftok_t) *
-				G_N_ELEMENTS (html_colornames));
+						  G_N_ELEMENTS (html_colornames));
 
 		for (i = 0; i < G_N_ELEMENTS (html_colornames); i ++) {
 			struct html_color c;
diff --git a/src/libserver/html_entities.h b/src/libserver/html_entities.h
index c6155664b..8b323e9b3 100644
--- a/src/libserver/html_entities.h
+++ b/src/libserver/html_entities.h
@@ -1722,7 +1722,7 @@ static entity entities_defs[] = {
 		{"die", 168, "\xc2\xa8"},
 		{"ngt", 8815, "\xe2\x89\xaf"},
 		{"vcy", 1074, "\xd0\xb2"},
-		{"fjlig", 102, "\x66\x6a"},
+		{"fjlig", 0, "\x66\x6a"},
 		{"submult", 10945, "\xe2\xab\x81"},
 		{"ubrcy", 1118, "\xd1\x9e"},
 		{"ovbar", 9021, "\xe2\x8c\xbd"},
diff --git a/test/lua/unit/html.lua b/test/lua/unit/html.lua
index 68ee7d1eb..79d55502e 100644
--- a/test/lua/unit/html.lua
+++ b/test/lua/unit/html.lua
@@ -4,6 +4,9 @@ context("HTML processing", function()
 
   test("Extract text from HTML", function()
     local cases = {
+      -- Entities
+      {[[<html><body>.firebaseapp.com</body></html>]],
+       [[.firebaseapp.com]]},
       {[[
 <!DOCTYPE html>
 <html lang="en">


More information about the Commits mailing list