commit 0e753fe: [Minor] Html entities seem like a big anecdote, enable bug-to-bug compat with WebKit

Vsevolod Stakhov vsevolod at highsecure.ru
Sat Jun 19 10:42:05 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-06-19 11:41:17 +0100
URL: https://github.com/rspamd/rspamd/commit/0e753fe22aa4e92cec5710dced0bb5e572180783 (HEAD -> master)

[Minor] Html entities seem like a big anecdote, enable bug-to-bug compat with WebKit

---
 src/libserver/html/html_entities.cxx | 180 ++++++++++++++++++-----------------
 1 file changed, 92 insertions(+), 88 deletions(-)

diff --git a/src/libserver/html/html_entities.cxx b/src/libserver/html/html_entities.cxx
index 144de5d99..50bf34f18 100644
--- a/src/libserver/html/html_entities.cxx
+++ b/src/libserver/html/html_entities.cxx
@@ -34,12 +34,14 @@ struct html_entity_def {
 	std::string name;
 	std::string replacement;
 	unsigned code;
+	bool allow_heuristic;
 };
 
-#define ENTITY_DEF(name, code, replacement) html_entity_def{(name), (replacement), (code)}
+#define ENTITY_DEF(name, code, replacement) html_entity_def{(name), (replacement), (code), false}
+#define ENTITY_DEF_HEUR(name, code, replacement) html_entity_def{(name), (replacement), (code), true}
 
 static const auto html_entities_array = rspamd::array_of<html_entity_def>(
-		ENTITY_DEF("szlig", 223, "\xc3\x9f"),
+		ENTITY_DEF_HEUR("szlig", 223, "\xc3\x9f"),
 		ENTITY_DEF("prime", 8242, "\xe2\x80\xb2"),
 		ENTITY_DEF("lnsim", 8934, "\xe2\x8b\xa6"),
 		ENTITY_DEF("nvDash", 8877, "\xe2\x8a\xad"),
@@ -53,7 +55,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("Dstrok", 272, "\xc4\x90"),
 		ENTITY_DEF("rrarr", 8649, "\xe2\x87\x89"),
 		ENTITY_DEF("rArr", 8658, "\xe2\x87\x92"),
-		ENTITY_DEF("Aacute", 193, "\xc3\x81"),
+		ENTITY_DEF_HEUR("Aacute", 193, "\xc3\x81"),
 		ENTITY_DEF("kappa", 954, "\xce\xba"),
 		ENTITY_DEF("Iopf", 120128, "\xf0\x9d\x95\x80"),
 		ENTITY_DEF("hyphen", 8208, "\xe2\x80\x90"),
@@ -75,16 +77,16 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("Vcy", 1042, "\xd0\x92"),
 		ENTITY_DEF("erDot", 8787, "\xe2\x89\x93"),
 		ENTITY_DEF("nsubE", 10949, "\xe2\xab\x85\xcc\xb8"),
-		ENTITY_DEF("egrave", 232, "\xc3\xa8"),
+		ENTITY_DEF_HEUR("egrave", 232, "\xc3\xa8"),
 		ENTITY_DEF("Lcedil", 315, "\xc4\xbb"),
 		ENTITY_DEF("lharul", 10602, "\xe2\xa5\xaa"),
-		ENTITY_DEF("middot", 183, "\xc2\xb7"),
+		ENTITY_DEF_HEUR("middot", 183, "\xc2\xb7"),
 		ENTITY_DEF("ggg", 8921, "\xe2\x8b\x99"),
 		ENTITY_DEF("NestedLessLess", 8810, "\xe2\x89\xaa"),
 		ENTITY_DEF("tau", 964, "\xcf\x84"),
 		ENTITY_DEF("setmn", 8726, "\xe2\x88\x96"),
 		ENTITY_DEF("frac78", 8542, "\xe2\x85\x9e"),
-		ENTITY_DEF("para", 182, "\xc2\xb6"),
+		ENTITY_DEF_HEUR("para", 182, "\xc2\xb6"),
 		ENTITY_DEF("Rcedil", 342, "\xc5\x96"),
 		ENTITY_DEF("propto", 8733, "\xe2\x88\x9d"),
 		ENTITY_DEF("sqsubset", 8847, "\xe2\x8a\x8f"),
@@ -112,12 +114,12 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("popf", 120161, "\xf0\x9d\x95\xa1"),
 		ENTITY_DEF("dbkarow", 10511, "\xe2\xa4\x8f"),
 		ENTITY_DEF("roang", 10221, "\xe2\x9f\xad"),
-		ENTITY_DEF("brvbar", 166, "\xc2\xa6"),
+		ENTITY_DEF_HEUR("brvbar", 166, "\xc2\xa6"),
 		ENTITY_DEF("CenterDot", 183, "\xc2\xb7"),
 		ENTITY_DEF("notindot", 8949, "\xe2\x8b\xb5\xcc\xb8"),
 		ENTITY_DEF("supmult", 10946, "\xe2\xab\x82"),
 		ENTITY_DEF("multimap", 8888, "\xe2\x8a\xb8"),
-		ENTITY_DEF("frac34", 190, "\xc2\xbe"),
+		ENTITY_DEF_HEUR("frac34", 190, "\xc2\xbe"),
 		ENTITY_DEF("mapsto", 8614, "\xe2\x86\xa6"),
 		ENTITY_DEF("flat", 9837, "\xe2\x99\xad"),
 		ENTITY_DEF("updownarrow", 8597, "\xe2\x86\x95"),
@@ -133,7 +135,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("dot", 729, "\xcb\x99"),
 		ENTITY_DEF("tbrk", 9140, "\xe2\x8e\xb4"),
 		ENTITY_DEF("LeftUpDownVector", 10577, "\xe2\xa5\x91"),
-		ENTITY_DEF("uml", 168, "\xc2\xa8"),
+		ENTITY_DEF_HEUR("uml", 168, "\xc2\xa8"),
 		ENTITY_DEF("bbrk", 9141, "\xe2\x8e\xb5"),
 		ENTITY_DEF("nearrow", 8599, "\xe2\x86\x97"),
 		ENTITY_DEF("backsimeq", 8909, "\xe2\x8b\x8d"),
@@ -142,7 +144,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("ldsh", 8626, "\xe2\x86\xb2"),
 		ENTITY_DEF("sce", 10928, "\xe2\xaa\xb0"),
 		ENTITY_DEF("angst", 197, "\xc3\x85"),
-		ENTITY_DEF("yen", 165, "\xc2\xa5"),
+		ENTITY_DEF_HEUR("yen", 165, "\xc2\xa5"),
 		ENTITY_DEF("nsupE", 10950, "\xe2\xab\x86\xcc\xb8"),
 		ENTITY_DEF("Uscr", 119984, "\xf0\x9d\x92\xb0"),
 		ENTITY_DEF("subplus", 10943, "\xe2\xaa\xbf"),
@@ -180,7 +182,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("smid", 8739, "\xe2\x88\xa3"),
 		ENTITY_DEF("cularr", 8630, "\xe2\x86\xb6"),
 		ENTITY_DEF("olcross", 10683, "\xe2\xa6\xbb"),
-		ENTITY_DEF("GT", 62, "\x3e"),
+		ENTITY_DEF_HEUR("GT", 62, "\x3e"),
 		ENTITY_DEF("scap", 10936, "\xe2\xaa\xb8"),
 		ENTITY_DEF("capcup", 10823, "\xe2\xa9\x87"),
 		ENTITY_DEF("NotSquareSubsetEqual", 8930, "\xe2\x8b\xa2"),
@@ -239,8 +241,8 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("gfr", 120100, "\xf0\x9d\x94\xa4"),
 		ENTITY_DEF("notnivb", 8958, "\xe2\x8b\xbe"),
 		ENTITY_DEF("Afr", 120068, "\xf0\x9d\x94\x84"),
-		ENTITY_DEF("ge", 8805, "\xe2\x89\xa5"),
-		ENTITY_DEF("iexcl", 161, "\xc2\xa1"),
+		ENTITY_DEF_HEUR("ge", 8805, "\xe2\x89\xa5"),
+		ENTITY_DEF_HEUR("iexcl", 161, "\xc2\xa1"),
 		ENTITY_DEF("dfr", 120097, "\xf0\x9d\x94\xa1"),
 		ENTITY_DEF("rsaquo", 8250, "\xe2\x80\xba"),
 		ENTITY_DEF("xcap", 8898, "\xe2\x8b\x82"),
@@ -257,7 +259,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("Prime", 8243, "\xe2\x80\xb3"),
 		ENTITY_DEF("prec", 8826, "\xe2\x89\xba"),
 		ENTITY_DEF("swnwar", 10538, "\xe2\xa4\xaa"),
-		ENTITY_DEF("COPY", 169, "\xc2\xa9"),
+		ENTITY_DEF_HEUR("COPY", 169, "\xc2\xa9"),
 		ENTITY_DEF("cong", 8773, "\xe2\x89\x85"),
 		ENTITY_DEF("sacute", 347, "\xc5\x9b"),
 		ENTITY_DEF("Nopf", 8469, "\xe2\x84\x95"),
@@ -267,9 +269,9 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("iota", 953, "\xce\xb9"),
 		ENTITY_DEF("notinE", 8953, "\xe2\x8b\xb9\xcc\xb8"),
 		ENTITY_DEF("jfr", 120103, "\xf0\x9d\x94\xa7"),
-		ENTITY_DEF("QUOT", 34, "\x22"),
+		ENTITY_DEF_HEUR("QUOT", 34, "\x22"),
 		ENTITY_DEF("vsupnE", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
-		ENTITY_DEF("igrave", 236, "\xc3\xac"),
+		ENTITY_DEF_HEUR("igrave", 236, "\xc3\xac"),
 		ENTITY_DEF("bsim", 8765, "\xe2\x88\xbd"),
 		ENTITY_DEF("npreceq", 10927, "\xe2\xaa\xaf\xcc\xb8"),
 		ENTITY_DEF("zcaron", 382, "\xc5\xbe"),
@@ -320,7 +322,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("xrArr", 10233, "\xe2\x9f\xb9"),
 		ENTITY_DEF("NotTildeEqual", 8772, "\xe2\x89\x84"),
 		ENTITY_DEF("Bfr", 120069, "\xf0\x9d\x94\x85"),
-		ENTITY_DEF("Iuml", 207, "\xc3\x8f"),
+		ENTITY_DEF_HEUR("Iuml", 207, "\xc3\x8f"),
 		ENTITY_DEF("leg", 8922, "\xe2\x8b\x9a"),
 		ENTITY_DEF("boxhU", 9576, "\xe2\x95\xa8"),
 		ENTITY_DEF("Gopf", 120126, "\xf0\x9d\x94\xbe"),
@@ -329,7 +331,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("precapprox", 10935, "\xe2\xaa\xb7"),
 		ENTITY_DEF("lcedil", 316, "\xc4\xbc"),
 		ENTITY_DEF("between", 8812, "\xe2\x89\xac"),
-		ENTITY_DEF("Oslash", 216, "\xc3\x98"),
+		ENTITY_DEF_HEUR("Oslash", 216, "\xc3\x98"),
 		ENTITY_DEF("breve", 728, "\xcb\x98"),
 		ENTITY_DEF("caps", 8745, "\xe2\x88\xa9\xef\xb8\x80"),
 		ENTITY_DEF("vangrt", 10652, "\xe2\xa6\x9c"),
@@ -349,7 +351,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("plussim", 10790, "\xe2\xa8\xa6"),
 		ENTITY_DEF("Darr", 8609, "\xe2\x86\xa1"),
 		ENTITY_DEF("nexist", 8708, "\xe2\x88\x84"),
-		ENTITY_DEF("cent", 162, "\xc2\xa2"),
+		ENTITY_DEF_HEUR("cent", 162, "\xc2\xa2"),
 		ENTITY_DEF("khcy", 1093, "\xd1\x85"),
 		ENTITY_DEF("smallsetminus", 8726, "\xe2\x88\x96"),
 		ENTITY_DEF("ycirc", 375, "\xc5\xb7"),
@@ -362,7 +364,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("dwangle", 10662, "\xe2\xa6\xa6"),
 		ENTITY_DEF("bowtie", 8904, "\xe2\x8b\x88"),
 		ENTITY_DEF("Dfr", 120071, "\xf0\x9d\x94\x87"),
-		ENTITY_DEF("iacute", 237, "\xc3\xad"),
+		ENTITY_DEF_HEUR("iacute", 237, "\xc3\xad"),
 		ENTITY_DEF("njcy", 1114, "\xd1\x9a"),
 		ENTITY_DEF("cfr", 120096, "\xf0\x9d\x94\xa0"),
 		ENTITY_DEF("TripleDot", 8411, "\xe2\x83\x9b"),
@@ -373,7 +375,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("Rang", 10219, "\xe2\x9f\xab"),
 		ENTITY_DEF("Wopf", 120142, "\xf0\x9d\x95\x8e"),
 		ENTITY_DEF("boxUl", 9564, "\xe2\x95\x9c"),
-		ENTITY_DEF("frac12", 189, "\xc2\xbd"),
+		ENTITY_DEF_HEUR("frac12", 189, "\xc2\xbd"),
 		ENTITY_DEF("clubs", 9827, "\xe2\x99\xa3"),
 		ENTITY_DEF("amalg", 10815, "\xe2\xa8\xbf"),
 		ENTITY_DEF("Lang", 10218, "\xe2\x9f\xaa"),
@@ -397,7 +399,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("rscr", 120007, "\xf0\x9d\x93\x87"),
 		ENTITY_DEF("Rrightarrow", 8667, "\xe2\x87\x9b"),
 		ENTITY_DEF("equest", 8799, "\xe2\x89\x9f"),
-		ENTITY_DEF("ntilde", 241, "\xc3\xb1"),
+		ENTITY_DEF_HEUR("ntilde", 241, "\xc3\xb1"),
 		ENTITY_DEF("Escr", 8496, "\xe2\x84\xb0"),
 		ENTITY_DEF("Lopf", 120131, "\xf0\x9d\x95\x83"),
 		ENTITY_DEF("GreaterGreater", 10914, "\xe2\xaa\xa2"),
@@ -431,7 +433,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("varpropto", 8733, "\xe2\x88\x9d"),
 		ENTITY_DEF("Lcaron", 317, "\xc4\xbd"),
 		ENTITY_DEF("lbrkslu", 10637, "\xe2\xa6\x8d"),
-		ENTITY_DEF("AElig", 198, "\xc3\x86"),
+		ENTITY_DEF_HEUR("AElig", 198, "\xc3\x86"),
 		ENTITY_DEF("varr", 8597, "\xe2\x86\x95"),
 		ENTITY_DEF("nvinfin", 10718, "\xe2\xa7\x9e"),
 		ENTITY_DEF("leq", 8804, "\xe2\x89\xa4"),
@@ -455,7 +457,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("leftharpoondown", 8637, "\xe2\x86\xbd"),
 		ENTITY_DEF("vfr", 120115, "\xf0\x9d\x94\xb3"),
 		ENTITY_DEF("gvertneqq", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
-		ENTITY_DEF("ouml", 246, "\xc3\xb6"),
+		ENTITY_DEF_HEUR("ouml", 246, "\xc3\xb6"),
 		ENTITY_DEF("raemptyv", 10675, "\xe2\xa6\xb3"),
 		ENTITY_DEF("Zcaron", 381, "\xc5\xbd"),
 		ENTITY_DEF("scE", 10932, "\xe2\xaa\xb4"),
@@ -506,14 +508,14 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("imagline", 8464, "\xe2\x84\x90"),
 		ENTITY_DEF("ncy", 1085, "\xd0\xbd"),
 		ENTITY_DEF("bigstar", 9733, "\xe2\x98\x85"),
-		ENTITY_DEF("REG", 174, "\xc2\xae"),
+		ENTITY_DEF_HEUR("REG", 174, "\xc2\xae"),
 		ENTITY_DEF("triangleq", 8796, "\xe2\x89\x9c"),
 		ENTITY_DEF("rsqb", 93, "\x5d"),
 		ENTITY_DEF("ddarr", 8650, "\xe2\x87\x8a"),
 		ENTITY_DEF("csub", 10959, "\xe2\xab\x8f"),
 		ENTITY_DEF("quest", 63, "\x3f"),
 		ENTITY_DEF("Star", 8902, "\xe2\x8b\x86"),
-		ENTITY_DEF("LT", 60, "\x3c"),
+		ENTITY_DEF_HEUR("LT", 60, "\x3c"),
 		ENTITY_DEF("ncong", 8775, "\xe2\x89\x87"),
 		ENTITY_DEF("prnE", 10933, "\xe2\xaa\xb5"),
 		ENTITY_DEF("bigtriangleup", 9651, "\xe2\x96\xb3"),
@@ -555,10 +557,10 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("boxDl", 9558, "\xe2\x95\x96"),
 		ENTITY_DEF("kappav", 1008, "\xcf\xb0"),
 		ENTITY_DEF("profsurf", 8979, "\xe2\x8c\x93"),
-		ENTITY_DEF("auml", 228, "\xc3\xa4"),
+		ENTITY_DEF_HEUR("auml", 228, "\xc3\xa4"),
 		ENTITY_DEF("heartsuit", 9829, "\xe2\x99\xa5"),
-		ENTITY_DEF("eacute", 233, "\xc3\xa9"),
-		ENTITY_DEF("gt", 62, "\x3e"),
+		ENTITY_DEF_HEUR("eacute", 233, "\xc3\xa9"),
+		ENTITY_DEF_HEUR("gt", 62, "\x3e"),
 		ENTITY_DEF("Gcedil", 290, "\xc4\xa2"),
 		ENTITY_DEF("easter", 10862, "\xe2\xa9\xae"),
 		ENTITY_DEF("Tcy", 1058, "\xd0\xa2"),
@@ -580,7 +582,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("Dot", 168, "\xc2\xa8"),
 		ENTITY_DEF("SquareIntersection", 8851, "\xe2\x8a\x93"),
 		ENTITY_DEF("map", 8614, "\xe2\x86\xa6"),
-		ENTITY_DEF("aelig", 230, "\xc3\xa6"),
+		ENTITY_DEF_HEUR("aelig", 230, "\xc3\xa6"),
 		ENTITY_DEF("RightArrow", 8594, "\xe2\x86\x92"),
 		ENTITY_DEF("rightharpoondown", 8641, "\xe2\x87\x81"),
 		ENTITY_DEF("bNot", 10989, "\xe2\xab\xad"),
@@ -591,7 +593,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("RightVectorBar", 10579, "\xe2\xa5\x93"),
 		ENTITY_DEF("nrarrw", 8605, "\xe2\x86\x9d\xcc\xb8"),
 		ENTITY_DEF("nbump", 8782, "\xe2\x89\x8e\xcc\xb8"),
-		ENTITY_DEF("iquest", 191, "\xc2\xbf"),
+		ENTITY_DEF_HEUR("iquest", 191, "\xc2\xbf"),
 		ENTITY_DEF("wr", 8768, "\xe2\x89\x80"),
 		ENTITY_DEF("UpArrow", 8593, "\xe2\x86\x91"),
 		ENTITY_DEF("notinva", 8713, "\xe2\x88\x89"),
@@ -615,9 +617,9 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("Gt", 8811, "\xe2\x89\xab"),
 		ENTITY_DEF("exist", 8707, "\xe2\x88\x83"),
 		ENTITY_DEF("gtrapprox", 10886, "\xe2\xaa\x86"),
-		ENTITY_DEF("euml", 235, "\xc3\xab"),
+		ENTITY_DEF_HEUR("euml", 235, "\xc3\xab"),
 		ENTITY_DEF("Equilibrium", 8652, "\xe2\x87\x8c"),
-		ENTITY_DEF("aacute", 225, "\xc3\xa1"),
+		ENTITY_DEF_HEUR("aacute", 225, "\xc3\xa1"),
 		ENTITY_DEF("omid", 10678, "\xe2\xa6\xb6"),
 		ENTITY_DEF("loarr", 8701, "\xe2\x87\xbd"),
 		ENTITY_DEF("SucceedsSlantEqual", 8829, "\xe2\x89\xbd"),
@@ -631,7 +633,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("UnderParenthesis", 9181, "\xe2\x8f\x9d"),
 		ENTITY_DEF("nparsl", 11005, "\xe2\xab\xbd\xe2\x83\xa5"),
 		ENTITY_DEF("Lacute", 313, "\xc4\xb9"),
-		ENTITY_DEF("deg", 176, "\xc2\xb0"),
+		ENTITY_DEF_HEUR("deg", 176, "\xc2\xb0"),
 		ENTITY_DEF("Racute", 340, "\xc5\x94"),
 		ENTITY_DEF("Verbar", 8214, "\xe2\x80\x96"),
 		ENTITY_DEF("sqcups", 8852, "\xe2\x8a\x94\xef\xb8\x80"),
@@ -647,7 +649,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("gE", 8807, "\xe2\x89\xa7"),
 		ENTITY_DEF("SmallCircle", 8728, "\xe2\x88\x98"),
 		ENTITY_DEF("diamondsuit", 9830, "\xe2\x99\xa6"),
-		ENTITY_DEF("Otilde", 213, "\xc3\x95"),
+		ENTITY_DEF_HEUR("Otilde", 213, "\xc3\x95"),
 		ENTITY_DEF("lneq", 10887, "\xe2\xaa\x87"),
 		ENTITY_DEF("lesdoto", 10881, "\xe2\xaa\x81"),
 		ENTITY_DEF("ltquest", 10875, "\xe2\xa9\xbb"),
@@ -697,7 +699,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("lfr", 120105, "\xf0\x9d\x94\xa9"),
 		ENTITY_DEF("emsp13", 8196, "\xe2\x80\x84"),
 		ENTITY_DEF("parsl", 11005, "\xe2\xab\xbd"),
-		ENTITY_DEF("ucirc", 251, "\xc3\xbb"),
+		ENTITY_DEF_HEUR("ucirc", 251, "\xc3\xbb"),
 		ENTITY_DEF("gsiml", 10896, "\xe2\xaa\x90"),
 		ENTITY_DEF("xsqcup", 10758, "\xe2\xa8\x86"),
 		ENTITY_DEF("Omicron", 927, "\xce\x9f"),
@@ -803,7 +805,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("Lsh", 8624, "\xe2\x86\xb0"),
 		ENTITY_DEF("boxvr", 9500, "\xe2\x94\x9c"),
 		ENTITY_DEF("scedil", 351, "\xc5\x9f"),
-		ENTITY_DEF("iuml", 239, "\xc3\xaf"),
+		ENTITY_DEF_HEUR("iuml", 239, "\xc3\xaf"),
 		ENTITY_DEF("NJcy", 1034, "\xd0\x8a"),
 		ENTITY_DEF("Dagger", 8225, "\xe2\x80\xa1"),
 		ENTITY_DEF("rarrap", 10613, "\xe2\xa5\xb5"),
@@ -812,7 +814,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("scnsim", 8937, "\xe2\x8b\xa9"),
 		ENTITY_DEF("hbar", 8463, "\xe2\x84\x8f"),
 		ENTITY_DEF("frac15", 8533, "\xe2\x85\x95"),
-		ENTITY_DEF("sup3", 179, "\xc2\xb3"),
+		ENTITY_DEF_HEUR("sup3", 179, "\xc2\xb3"),
 		ENTITY_DEF("NegativeThickSpace", 8203, "\xe2\x80\x8b"),
 		ENTITY_DEF("npr", 8832, "\xe2\x8a\x80"),
 		ENTITY_DEF("doteq", 8784, "\xe2\x89\x90"),
@@ -887,7 +889,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("NotPrecedesSlantEqual", 8928, "\xe2\x8b\xa0"),
 		ENTITY_DEF("phone", 9742, "\xe2\x98\x8e"),
 		ENTITY_DEF("Ecirc", 202, "\xc3\x8a"),
-		ENTITY_DEF("lt", 60, "\x3c"),
+		ENTITY_DEF_HEUR("lt", 60, "\x3c"),
 		ENTITY_DEF("intcal", 8890, "\xe2\x8a\xba"),
 		ENTITY_DEF("xdtri", 9661, "\xe2\x96\xbd"),
 		ENTITY_DEF("Abreve", 258, "\xc4\x82"),
@@ -973,7 +975,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("LongLeftRightArrow", 10231, "\xe2\x9f\xb7"),
 		ENTITY_DEF("Gfr", 120074, "\xf0\x9d\x94\x8a"),
 		ENTITY_DEF("sqsubseteq", 8849, "\xe2\x8a\x91"),
-		ENTITY_DEF("ograve", 242, "\xc3\xb2"),
+		ENTITY_DEF_HEUR("ograve", 242, "\xc3\xb2"),
 		ENTITY_DEF("larrhk", 8617, "\xe2\x86\xa9"),
 		ENTITY_DEF("sigma", 963, "\xcf\x83"),
 		ENTITY_DEF("NotSquareSupersetEqual", 8931, "\xe2\x8b\xa3"),
@@ -1005,10 +1007,10 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("NotExists", 8708, "\xe2\x88\x84"),
 		ENTITY_DEF("geq", 8805, "\xe2\x89\xa5"),
 		ENTITY_DEF("Ffr", 120073, "\xf0\x9d\x94\x89"),
-		ENTITY_DEF("divide", 247, "\xc3\xb7"),
+		ENTITY_DEF_HEUR("divide", 247, "\xc3\xb7"),
 		ENTITY_DEF("blank", 9251, "\xe2\x90\xa3"),
 		ENTITY_DEF("IEcy", 1045, "\xd0\x95"),
-		ENTITY_DEF("ordm", 186, "\xc2\xba"),
+		ENTITY_DEF_HEUR("ordm", 186, "\xc2\xba"),
 		ENTITY_DEF("fopf", 120151, "\xf0\x9d\x95\x97"),
 		ENTITY_DEF("ecir", 8790, "\xe2\x89\x96"),
 		ENTITY_DEF("complement", 8705, "\xe2\x88\x81"),
@@ -1032,7 +1034,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("supset", 8835, "\xe2\x8a\x83"),
 		ENTITY_DEF("gneqq", 8809, "\xe2\x89\xa9"),
 		ENTITY_DEF("Lstrok", 321, "\xc5\x81"),
-		ENTITY_DEF("AMP", 38, "\x26"),
+		ENTITY_DEF_HEUR("AMP", 38, "\x26"),
 		ENTITY_DEF("acE", 8766, "\xe2\x88\xbe\xcc\xb3"),
 		ENTITY_DEF("sqsupseteq", 8850, "\xe2\x8a\x92"),
 		ENTITY_DEF("nle", 8816, "\xe2\x89\xb0"),
@@ -1045,7 +1047,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("demptyv", 10673, "\xe2\xa6\xb1"),
 		ENTITY_DEF("eta", 951, "\xce\xb7"),
 		ENTITY_DEF("GreaterSlantEqual", 10878, "\xe2\xa9\xbe"),
-		ENTITY_DEF("ccedil", 231, "\xc3\xa7"),
+		ENTITY_DEF_HEUR("ccedil", 231, "\xc3\xa7"),
 		ENTITY_DEF("pfr", 120109, "\xf0\x9d\x94\xad"),
 		ENTITY_DEF("bbrktbrk", 9142, "\xe2\x8e\xb6"),
 		ENTITY_DEF("mcy", 1084, "\xd0\xbc"),
@@ -1058,10 +1060,10 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("apos", 39, "\x27"),
 		ENTITY_DEF("lrm", 8206, "\xe2\x80\x8e"),
 		ENTITY_DEF("male", 9794, "\xe2\x99\x82"),
-		ENTITY_DEF("agrave", 224, "\xc3\xa0"),
+		ENTITY_DEF_HEUR("agrave", 224, "\xc3\xa0"),
 		ENTITY_DEF("Lt", 8810, "\xe2\x89\xaa"),
 		ENTITY_DEF("capand", 10820, "\xe2\xa9\x84"),
-		ENTITY_DEF("aring", 229, "\xc3\xa5"),
+		ENTITY_DEF_HEUR("aring", 229, "\xc3\xa5"),
 		ENTITY_DEF("Jukcy", 1028, "\xd0\x84"),
 		ENTITY_DEF("bumpe", 8783, "\xe2\x89\x8f"),
 		ENTITY_DEF("dd", 8518, "\xe2\x85\x86"),
@@ -1076,7 +1078,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("hfr", 120101, "\xf0\x9d\x94\xa5"),
 		ENTITY_DEF("preceq", 10927, "\xe2\xaa\xaf"),
 		ENTITY_DEF("rationals", 8474, "\xe2\x84\x9a"),
-		ENTITY_DEF("Auml", 196, "\xc3\x84"),
+		ENTITY_DEF_HEUR("Auml", 196, "\xc3\x84"),
 		ENTITY_DEF("LeftRightArrow", 8596, "\xe2\x86\x94"),
 		ENTITY_DEF("blacktriangleright", 9656, "\xe2\x96\xb8"),
 		ENTITY_DEF("dharr", 8642, "\xe2\x87\x82"),
@@ -1111,7 +1113,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("circlearrowright", 8635, "\xe2\x86\xbb"),
 		ENTITY_DEF("NotCongruent", 8802, "\xe2\x89\xa2"),
 		ENTITY_DEF("Scedil", 350, "\xc5\x9e"),
-		ENTITY_DEF("raquo", 187, "\xc2\xbb"),
+		ENTITY_DEF_HEUR("raquo", 187, "\xc2\xbb"),
 		ENTITY_DEF("ycy", 1099, "\xd1\x8b"),
 		ENTITY_DEF("notinvb", 8951, "\xe2\x8b\xb7"),
 		ENTITY_DEF("andv", 10842, "\xe2\xa9\x9a"),
@@ -1121,7 +1123,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("downarrow", 8595, "\xe2\x86\x93"),
 		ENTITY_DEF("gesdotol", 10884, "\xe2\xaa\x84"),
 		ENTITY_DEF("Congruent", 8801, "\xe2\x89\xa1"),
-		ENTITY_DEF("pound", 163, "\xc2\xa3"),
+		ENTITY_DEF_HEUR("pound", 163, "\xc2\xa3"),
 		ENTITY_DEF("ZeroWidthSpace", 8203, "\xe2\x80\x8b"),
 		ENTITY_DEF("rdca", 10551, "\xe2\xa4\xb7"),
 		ENTITY_DEF("rmoust", 9137, "\xe2\x8e\xb1"),
@@ -1148,8 +1150,8 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("NotSquareSuperset", 8848, "\xe2\x8a\x90\xcc\xb8"),
 		ENTITY_DEF("Amacr", 256, "\xc4\x80"),
 		ENTITY_DEF("OpenCurlyDoubleQuote", 8220, "\xe2\x80\x9c"),
-		ENTITY_DEF("thorn", 254, "\xc3\xbe"),
-		ENTITY_DEF("ordf", 170, "\xc2\xaa"),
+		ENTITY_DEF_HEUR("thorn", 254, "\xc3\xbe"),
+		ENTITY_DEF_HEUR("ordf", 170, "\xc2\xaa"),
 		ENTITY_DEF("natur", 9838, "\xe2\x99\xae"),
 		ENTITY_DEF("xi", 958, "\xce\xbe"),
 		ENTITY_DEF("infin", 8734, "\xe2\x88\x9e"),
@@ -1166,10 +1168,10 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("LessEqualGreater", 8922, "\xe2\x8b\x9a"),
 		ENTITY_DEF("Implies", 8658, "\xe2\x87\x92"),
 		ENTITY_DEF("ETH", 208, "\xc3\x90"),
-		ENTITY_DEF("Yacute", 221, "\xc3\x9d"),
-		ENTITY_DEF("shy", 173, "\xc2\xad"),
+		ENTITY_DEF_HEUR("Yacute", 221, "\xc3\x9d"),
+		ENTITY_DEF_HEUR("shy", 173, "\xc2\xad"),
 		ENTITY_DEF("Rarrtl", 10518, "\xe2\xa4\x96"),
-		ENTITY_DEF("sup1", 185, "\xc2\xb9"),
+		ENTITY_DEF_HEUR("sup1", 185, "\xc2\xb9"),
 		ENTITY_DEF("reals", 8477, "\xe2\x84\x9d"),
 		ENTITY_DEF("blacklozenge", 10731, "\xe2\xa7\xab"),
 		ENTITY_DEF("ncedil", 326, "\xc5\x86"),
@@ -1201,7 +1203,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("Fscr", 8497, "\xe2\x84\xb1"),
 		ENTITY_DEF("veebar", 8891, "\xe2\x8a\xbb"),
 		ENTITY_DEF("Longleftrightarrow", 10234, "\xe2\x9f\xba"),
-		ENTITY_DEF("reg", 174, "\xc2\xae"),
+		ENTITY_DEF_HEUR("reg", 174, "\xc2\xae"),
 		ENTITY_DEF("NegativeMediumSpace", 8203, "\xe2\x80\x8b"),
 		ENTITY_DEF("Upsi", 978, "\xcf\x92"),
 		ENTITY_DEF("Mellintrf", 8499, "\xe2\x84\xb3"),
@@ -1270,7 +1272,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("strns", 175, "\xc2\xaf"),
 		ENTITY_DEF("intlarhk", 10775, "\xe2\xa8\x97"),
 		ENTITY_DEF("downharpoonright", 8642, "\xe2\x87\x82"),
-		ENTITY_DEF("yacute", 253, "\xc3\xbd"),
+		ENTITY_DEF_HEUR("yacute", 253, "\xc3\xbd"),
 		ENTITY_DEF("boxUr", 9561, "\xe2\x95\x99"),
 		ENTITY_DEF("triangleleft", 9667, "\xe2\x97\x83"),
 		ENTITY_DEF("DiacriticalDot", 729, "\xcb\x99"),
@@ -1304,7 +1306,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("tridot", 9708, "\xe2\x97\xac"),
 		ENTITY_DEF("ldquor", 8222, "\xe2\x80\x9e"),
 		ENTITY_DEF("sol", 47, "\x2f"),
-		ENTITY_DEF("ecirc", 234, "\xc3\xaa"),
+		ENTITY_DEF_HEUR("ecirc", 234, "\xc3\xaa"),
 		ENTITY_DEF("DoubleLeftArrow", 8656, "\xe2\x87\x90"),
 		ENTITY_DEF("Gscr", 119970, "\xf0\x9d\x92\xa2"),
 		ENTITY_DEF("ap", 8776, "\xe2\x89\x88"),
@@ -1317,7 +1319,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("backprime", 8245, "\xe2\x80\xb5"),
 		ENTITY_DEF("longleftrightarrow", 10231, "\xe2\x9f\xb7"),
 		ENTITY_DEF("ntriangleleft", 8938, "\xe2\x8b\xaa"),
-		ENTITY_DEF("copy", 169, "\xc2\xa9"),
+		ENTITY_DEF_HEUR("copy", 169, "\xc2\xa9"),
 		ENTITY_DEF("mapstodown", 8615, "\xe2\x86\xa7"),
 		ENTITY_DEF("seArr", 8664, "\xe2\x87\x98"),
 		ENTITY_DEF("ENG", 330, "\xc5\x8a"),
@@ -1351,7 +1353,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("nless", 8814, "\xe2\x89\xae"),
 		ENTITY_DEF("uharr", 8638, "\xe2\x86\xbe"),
 		ENTITY_DEF("lambda", 955, "\xce\xbb"),
-		ENTITY_DEF("uuml", 252, "\xc3\xbc"),
+		ENTITY_DEF_HEUR("uuml", 252, "\xc3\xbc"),
 		ENTITY_DEF("horbar", 8213, "\xe2\x80\x95"),
 		ENTITY_DEF("ccirc", 265, "\xc4\x89"),
 		ENTITY_DEF("sqcup", 8852, "\xe2\x8a\x94"),
@@ -1385,9 +1387,9 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("nearr", 8599, "\xe2\x86\x97"),
 		ENTITY_DEF("NotSubsetEqual", 8840, "\xe2\x8a\x88"),
 		ENTITY_DEF("planck", 8463, "\xe2\x84\x8f"),
-		ENTITY_DEF("Uuml", 220, "\xc3\x9c"),
+		ENTITY_DEF_HEUR("Uuml", 220, "\xc3\x9c"),
 		ENTITY_DEF("spadesuit", 9824, "\xe2\x99\xa0"),
-		ENTITY_DEF("sect", 167, "\xc2\xa7"),
+		ENTITY_DEF_HEUR("sect", 167, "\xc2\xa7"),
 		ENTITY_DEF("cdot", 267, "\xc4\x8b"),
 		ENTITY_DEF("boxVh", 9579, "\xe2\x95\xab"),
 		ENTITY_DEF("zscr", 120015, "\xf0\x9d\x93\x8f"),
@@ -1407,7 +1409,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("vrtri", 8883, "\xe2\x8a\xb3"),
 		ENTITY_DEF("part", 8706, "\xe2\x88\x82"),
 		ENTITY_DEF("esim", 8770, "\xe2\x89\x82"),
-		ENTITY_DEF("atilde", 227, "\xc3\xa3"),
+		ENTITY_DEF_HEUR("atilde", 227, "\xc3\xa3"),
 		ENTITY_DEF("DownRightTeeVector", 10591, "\xe2\xa5\x9f"),
 		ENTITY_DEF("jcirc", 309, "\xc4\xb5"),
 		ENTITY_DEF("Ecaron", 282, "\xc4\x9a"),
@@ -1464,7 +1466,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("nexists", 8708, "\xe2\x88\x84"),
 		ENTITY_DEF("theta", 952, "\xce\xb8"),
 		ENTITY_DEF("plankv", 8463, "\xe2\x84\x8f"),
-		ENTITY_DEF("sup2", 178, "\xc2\xb2"),
+		ENTITY_DEF_HEUR("sup2", 178, "\xc2\xb2"),
 		ENTITY_DEF("lessapprox", 10885, "\xe2\xaa\x85"),
 		ENTITY_DEF("gdot", 289, "\xc4\xa1"),
 		ENTITY_DEF("angmsdae", 10668, "\xe2\xa6\xac"),
@@ -1552,7 +1554,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("midast", 42, "\x2a"),
 		ENTITY_DEF("lscr", 120001, "\xf0\x9d\x93\x81"),
 		ENTITY_DEF("nGt", 8811, "\xe2\x89\xab\xe2\x83\x92"),
-		ENTITY_DEF("Euml", 203, "\xc3\x8b"),
+		ENTITY_DEF_HEUR("Euml", 203, "\xc3\x8b"),
 		ENTITY_DEF("blacktriangledown", 9662, "\xe2\x96\xbe"),
 		ENTITY_DEF("Rcy", 1056, "\xd0\xa0"),
 		ENTITY_DEF("dfisht", 10623, "\xe2\xa5\xbf"),
@@ -1588,14 +1590,14 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("plusb", 8862, "\xe2\x8a\x9e"),
 		ENTITY_DEF("odsold", 10684, "\xe2\xa6\xbc"),
 		ENTITY_DEF("varsupsetneqq", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
-		ENTITY_DEF("otilde", 245, "\xc3\xb5"),
+		ENTITY_DEF_HEUR("otilde", 245, "\xc3\xb5"),
 		ENTITY_DEF("gtcir", 10874, "\xe2\xa9\xba"),
 		ENTITY_DEF("lltri", 9722, "\xe2\x97\xba"),
 		ENTITY_DEF("rx", 8478, "\xe2\x84\x9e"),
 		ENTITY_DEF("ljcy", 1113, "\xd1\x99"),
 		ENTITY_DEF("parsim", 10995, "\xe2\xab\xb3"),
 		ENTITY_DEF("NotElement", 8713, "\xe2\x88\x89"),
-		ENTITY_DEF("plusmn", 177, "\xc2\xb1"),
+		ENTITY_DEF_HEUR("plusmn", 177, "\xc2\xb1"),
 		ENTITY_DEF("varsubsetneq", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
 		ENTITY_DEF("subset", 8834, "\xe2\x8a\x82"),
 		ENTITY_DEF("awint", 10769, "\xe2\xa8\x91"),
@@ -1622,12 +1624,12 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("nharr", 8622, "\xe2\x86\xae"),
 		ENTITY_DEF("varnothing", 8709, "\xe2\x88\x85"),
 		ENTITY_DEF("ShortDownArrow", 8595, "\xe2\x86\x93"),
-		ENTITY_DEF("nbsp", 160, " "),
+		ENTITY_DEF_HEUR("nbsp", 160, " "),
 		ENTITY_DEF("asympeq", 8781, "\xe2\x89\x8d"),
 		ENTITY_DEF("rbrkslu", 10640, "\xe2\xa6\x90"),
 		ENTITY_DEF("rho", 961, "\xcf\x81"),
 		ENTITY_DEF("Mscr", 8499, "\xe2\x84\xb3"),
-		ENTITY_DEF("eth", 240, "\xc3\xb0"),
+		ENTITY_DEF_HEUR("eth", 240, "\xc3\xb0"),
 		ENTITY_DEF("suplarr", 10619, "\xe2\xa5\xbb"),
 		ENTITY_DEF("Tab", 9, "\x09"),
 		ENTITY_DEF("omicron", 959, "\xce\xbf"),
@@ -1692,7 +1694,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("vartheta", 977, "\xcf\x91"),
 		ENTITY_DEF("nsub", 8836, "\xe2\x8a\x84"),
 		ENTITY_DEF("DownTee", 8868, "\xe2\x8a\xa4"),
-		ENTITY_DEF("acute", 180, "\xc2\xb4"),
+		ENTITY_DEF_HEUR("acute", 180, "\xc2\xb4"),
 		ENTITY_DEF("GreaterLess", 8823, "\xe2\x89\xb7"),
 		ENTITY_DEF("supplus", 10944, "\xe2\xab\x80"),
 		ENTITY_DEF("Vbar", 10987, "\xe2\xab\xab"),
@@ -1760,7 +1762,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("LeftRightVector", 10574, "\xe2\xa5\x8e"),
 		ENTITY_DEF("DownLeftVectorBar", 10582, "\xe2\xa5\x96"),
 		ENTITY_DEF("suphsub", 10967, "\xe2\xab\x97"),
-		ENTITY_DEF("cedil", 184, "\xc2\xb8"),
+		ENTITY_DEF_HEUR("cedil", 184, "\xc2\xb8"),
 		ENTITY_DEF("prurel", 8880, "\xe2\x8a\xb0"),
 		ENTITY_DEF("imagpart", 8465, "\xe2\x84\x91"),
 		ENTITY_DEF("Hscr", 8459, "\xe2\x84\x8b"),
@@ -1772,7 +1774,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("nesim", 8770, "\xe2\x89\x82\xcc\xb8"),
 		ENTITY_DEF("varepsilon", 1013, "\xcf\xb5"),
 		ENTITY_DEF("DoubleRightTee", 8872, "\xe2\x8a\xa8"),
-		ENTITY_DEF("not", 172, "\xc2\xac"),
+		ENTITY_DEF_HEUR("not", 172, "\xc2\xac"),
 		ENTITY_DEF("lesdot", 10879, "\xe2\xa9\xbf"),
 		ENTITY_DEF("backepsilon", 1014, "\xcf\xb6"),
 		ENTITY_DEF("srarr", 8594, "\xe2\x86\x92"),
@@ -1792,7 +1794,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("NewLine", 10, "\x0a"),
 		ENTITY_DEF("bigotimes", 10754, "\xe2\xa8\x82"),
 		ENTITY_DEF("lAtail", 10523, "\xe2\xa4\x9b"),
-		ENTITY_DEF("frac14", 188, "\xc2\xbc"),
+		ENTITY_DEF_HEUR("frac14", 188, "\xc2\xbc"),
 		ENTITY_DEF("or", 8744, "\xe2\x88\xa8"),
 		ENTITY_DEF("subedot", 10947, "\xe2\xab\x83"),
 		ENTITY_DEF("nmid", 8740, "\xe2\x88\xa4"),
@@ -1805,7 +1807,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("topcir", 10993, "\xe2\xab\xb1"),
 		ENTITY_DEF("ne", 8800, "\xe2\x89\xa0"),
 		ENTITY_DEF("osol", 8856, "\xe2\x8a\x98"),
-		ENTITY_DEF("amp", 38, "\x26"),
+		ENTITY_DEF_HEUR("amp", 38, "\x26"),
 		ENTITY_DEF("ncap", 10819, "\xe2\xa9\x83"),
 		ENTITY_DEF("Sscr", 119982, "\xf0\x9d\x92\xae"),
 		ENTITY_DEF("sung", 9834, "\xe2\x99\xaa"),
@@ -1846,7 +1848,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("Gcirc", 284, "\xc4\x9c"),
 		ENTITY_DEF("lesdotor", 10883, "\xe2\xaa\x83"),
 		ENTITY_DEF("escr", 8495, "\xe2\x84\xaf"),
-		ENTITY_DEF("THORN", 222, "\xc3\x9e"),
+		ENTITY_DEF_HEUR("THORN", 222, "\xc3\x9e"),
 		ENTITY_DEF("UpArrowBar", 10514, "\xe2\xa4\x92"),
 		ENTITY_DEF("nvrtrie", 8885, "\xe2\x8a\xb5\xe2\x83\x92"),
 		ENTITY_DEF("varkappa", 1008, "\xcf\xb0"),
@@ -1865,11 +1867,11 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("telrec", 8981, "\xe2\x8c\x95"),
 		ENTITY_DEF("vellip", 8942, "\xe2\x8b\xae"),
 		ENTITY_DEF("nrArr", 8655, "\xe2\x87\x8f"),
-		ENTITY_DEF("ugrave", 249, "\xc3\xb9"),
+		ENTITY_DEF_HEUR("ugrave", 249, "\xc3\xb9"),
 		ENTITY_DEF("uring", 367, "\xc5\xaf"),
 		ENTITY_DEF("Bernoullis", 8492, "\xe2\x84\xac"),
 		ENTITY_DEF("nles", 10877, "\xe2\xa9\xbd\xcc\xb8"),
-		ENTITY_DEF("macr", 175, "\xc2\xaf"),
+		ENTITY_DEF_HEUR("macr", 175, "\xc2\xaf"),
 		ENTITY_DEF("boxuR", 9560, "\xe2\x95\x98"),
 		ENTITY_DEF("clubsuit", 9827, "\xe2\x99\xa3"),
 		ENTITY_DEF("rightarrowtail", 8611, "\xe2\x86\xa3"),
@@ -1881,7 +1883,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("vltri", 8882, "\xe2\x8a\xb2"),
 		ENTITY_DEF("quaternions", 8461, "\xe2\x84\x8d"),
 		ENTITY_DEF("rfr", 120111, "\xf0\x9d\x94\xaf"),
-		ENTITY_DEF("Ouml", 214, "\xc3\x96"),
+		ENTITY_DEF_HEUR("Ouml", 214, "\xc3\x96"),
 		ENTITY_DEF("rsh", 8625, "\xe2\x86\xb1"),
 		ENTITY_DEF("emptyv", 8709, "\xe2\x88\x85"),
 		ENTITY_DEF("sqsup", 8848, "\xe2\x8a\x90"),
@@ -1891,7 +1893,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("eqsim", 8770, "\xe2\x89\x82"),
 		ENTITY_DEF("NotSucceedsEqual", 10928, "\xe2\xaa\xb0\xcc\xb8"),
 		ENTITY_DEF("primes", 8473, "\xe2\x84\x99"),
-		ENTITY_DEF("times", 215, "\xc3\x97"),
+		ENTITY_DEF_HEUR("times", 215, "\xc3\x97"),
 		ENTITY_DEF("rangd", 10642, "\xe2\xa6\x92"),
 		ENTITY_DEF("rightharpoonup", 8640, "\xe2\x87\x80"),
 		ENTITY_DEF("lrhard", 10605, "\xe2\xa5\xad"),
@@ -1925,7 +1927,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("pi", 960, "\xcf\x80"),
 		ENTITY_DEF("lesg", 8922, "\xe2\x8b\x9a\xef\xb8\x80"),
 		ENTITY_DEF("orderof", 8500, "\xe2\x84\xb4"),
-		ENTITY_DEF("uacute", 250, "\xc3\xba"),
+		ENTITY_DEF_HEUR("uacute", 250, "\xc3\xba"),
 		ENTITY_DEF("Barv", 10983, "\xe2\xab\xa7"),
 		ENTITY_DEF("Theta", 920, "\xce\x98"),
 		ENTITY_DEF("leftrightsquigarrow", 8621, "\xe2\x86\xad"),
@@ -1972,7 +1974,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("angmsdab", 10665, "\xe2\xa6\xa9"),
 		ENTITY_DEF("wedgeq", 8793, "\xe2\x89\x99"),
 		ENTITY_DEF("iogon", 303, "\xc4\xaf"),
-		ENTITY_DEF("laquo", 171, "\xc2\xab"),
+		ENTITY_DEF_HEUR("laquo", 171, "\xc2\xab"),
 		ENTITY_DEF("NestedGreaterGreater", 8811, "\xe2\x89\xab"),
 		ENTITY_DEF("UnionPlus", 8846, "\xe2\x8a\x8e"),
 		ENTITY_DEF("CircleDot", 8857, "\xe2\x8a\x99"),
@@ -1991,7 +1993,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("egsdot", 10904, "\xe2\xaa\x98"),
 		ENTITY_DEF("target", 8982, "\xe2\x8c\x96"),
 		ENTITY_DEF("lesges", 10899, "\xe2\xaa\x93"),
-		ENTITY_DEF("curren", 164, "\xc2\xa4"),
+		ENTITY_DEF_HEUR("curren", 164, "\xc2\xa4"),
 		ENTITY_DEF("yopf", 120170, "\xf0\x9d\x95\xaa"),
 		ENTITY_DEF("frac23", 8532, "\xe2\x85\x94"),
 		ENTITY_DEF("NotSucceedsTilde", 8831, "\xe2\x89\xbf\xcc\xb8"),
@@ -2009,7 +2011,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("lessgtr", 8822, "\xe2\x89\xb6"),
 		ENTITY_DEF("thickapprox", 8776, "\xe2\x89\x88"),
 		ENTITY_DEF("lbrksld", 10639, "\xe2\xa6\x8f"),
-		ENTITY_DEF("oslash", 248, "\xc3\xb8"),
+		ENTITY_DEF_HEUR("oslash", 248, "\xc3\xb8"),
 		ENTITY_DEF("NotCupCap", 8813, "\xe2\x89\xad"),
 		ENTITY_DEF("elinters", 9191, "\xe2\x8f\xa7"),
 		ENTITY_DEF("Assign", 8788, "\xe2\x89\x94"),
@@ -2024,7 +2026,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("race", 8765, "\xe2\x88\xbd\xcc\xb1"),
 		ENTITY_DEF("Ascr", 119964, "\xf0\x9d\x92\x9c"),
 		ENTITY_DEF("Xscr", 119987, "\xf0\x9d\x92\xb3"),
-		ENTITY_DEF("acirc", 226, "\xc3\xa2"),
+		ENTITY_DEF_HEUR("acirc", 226, "\xc3\xa2"),
 		ENTITY_DEF("otimesas", 10806, "\xe2\xa8\xb6"),
 		ENTITY_DEF("gscr", 8458, "\xe2\x84\x8a"),
 		ENTITY_DEF("gcy", 1075, "\xd0\xb3"),
@@ -2033,8 +2035,8 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("Acy", 1040, "\xd0\x90"),
 		ENTITY_DEF("NotGreaterLess", 8825, "\xe2\x89\xb9"),
 		ENTITY_DEF("dtdot", 8945, "\xe2\x8b\xb1"),
-		ENTITY_DEF("quot", 34, "\x22"),
-		ENTITY_DEF("micro", 181, "\xc2\xb5"),
+		ENTITY_DEF_HEUR("quot", 34, "\x22"),
+		ENTITY_DEF_HEUR("micro", 181, "\xc2\xb5"),
 		ENTITY_DEF("simplus", 10788, "\xe2\xa8\xa4"),
 		ENTITY_DEF("nsupseteq", 8841, "\xe2\x8a\x89"),
 		ENTITY_DEF("Ufr", 120088, "\xf0\x9d\x94\x98"),
@@ -2062,7 +2064,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("dcy", 1076, "\xd0\xb4"),
 		ENTITY_DEF("boxvl", 9508, "\xe2\x94\xa4"),
 		ENTITY_DEF("RightArrowBar", 8677, "\xe2\x87\xa5"),
-		ENTITY_DEF("yuml", 255, "\xc3\xbf"),
+		ENTITY_DEF_HEUR("yuml", 255, "\xc3\xbf"),
 		ENTITY_DEF("parallel", 8741, "\xe2\x88\xa5"),
 		ENTITY_DEF("succneqq", 10934, "\xe2\xaa\xb6"),
 		ENTITY_DEF("bemptyv", 10672, "\xe2\xa6\xb0"),
@@ -2156,7 +2158,7 @@ static const auto html_entities_array = rspamd::array_of<html_entity_def>(
 		ENTITY_DEF("cylcty", 9005, "\xe2\x8c\xad"),
 		ENTITY_DEF("sube", 8838, "\xe2\x8a\x86"),
 		ENTITY_DEF("NotEqualTilde", 8770, "\xe2\x89\x82\xcc\xb8"),
-		ENTITY_DEF("Yuml", 376, "\xc5\xb8"),
+		ENTITY_DEF_HEUR("Yuml", 376, "\xc5\xb8"),
 		ENTITY_DEF("comp", 8705, "\xe2\x88\x81"),
*** OUTPUT TRUNCATED, 19 LINES SKIPPED ***


More information about the Commits mailing list