commit 0cbb51f: [Fix] Html: Fix processing of fjlig entity
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Jun 14 10:56:04 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-06-14 11:50:41 +0100
URL: https://github.com/rspamd/rspamd/commit/0cbb51f4c06a63707f1e42646f032eb479b87b55 (HEAD -> master)
[Fix] Html: Fix processing of fjlig entity
TODO: Make a generic fix
---
src/libserver/html.c | 12 +++++++-----
src/libserver/html_entities.h | 2 +-
test/lua/unit/html.lua | 3 +++
3 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/src/libserver/html.c b/src/libserver/html.c
index fcc6e5684..ca4ad5d38 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -222,14 +222,16 @@ rspamd_html_library_init (void)
G_N_ELEMENTS (entities_defs));
for (i = 0; i < G_N_ELEMENTS (entities_defs); i++) {
- k = kh_put (entity_by_number, html_entity_by_number,
- entities_defs[i].code, &rc);
- kh_val (html_entity_by_number, k) = entities_defs[i].replacement;
+ if (entities_defs[i].code != 0) {
+ k = kh_put (entity_by_number, html_entity_by_number,
+ entities_defs[i].code, &rc);
+ kh_val (html_entity_by_number, k) = entities_defs[i].replacement;
+ }
k = kh_put (entity_by_name, html_entity_by_name,
entities_defs[i].name, &rc);
kh_val (html_entity_by_name, k) = entities_defs[i].replacement;
- }
+ }
html_color_by_name = kh_init (color_by_name);
kh_resize (color_by_name, html_color_by_name,
@@ -238,7 +240,7 @@ rspamd_html_library_init (void)
rspamd_ftok_t *keys;
keys = g_malloc0 (sizeof (rspamd_ftok_t) *
- G_N_ELEMENTS (html_colornames));
+ G_N_ELEMENTS (html_colornames));
for (i = 0; i < G_N_ELEMENTS (html_colornames); i ++) {
struct html_color c;
diff --git a/src/libserver/html_entities.h b/src/libserver/html_entities.h
index c6155664b..8b323e9b3 100644
--- a/src/libserver/html_entities.h
+++ b/src/libserver/html_entities.h
@@ -1722,7 +1722,7 @@ static entity entities_defs[] = {
{"die", 168, "\xc2\xa8"},
{"ngt", 8815, "\xe2\x89\xaf"},
{"vcy", 1074, "\xd0\xb2"},
- {"fjlig", 102, "\x66\x6a"},
+ {"fjlig", 0, "\x66\x6a"},
{"submult", 10945, "\xe2\xab\x81"},
{"ubrcy", 1118, "\xd1\x9e"},
{"ovbar", 9021, "\xe2\x8c\xbd"},
diff --git a/test/lua/unit/html.lua b/test/lua/unit/html.lua
index 68ee7d1eb..79d55502e 100644
--- a/test/lua/unit/html.lua
+++ b/test/lua/unit/html.lua
@@ -4,6 +4,9 @@ context("HTML processing", function()
test("Extract text from HTML", function()
local cases = {
+ -- Entities
+ {[[<html><body>.firebaseapp.com</body></html>]],
+ [[.firebaseapp.com]]},
{[[
<!DOCTYPE html>
<html lang="en">
More information about the Commits
mailing list