commit a9dcbc7: [Rework] Move and adopt entities handling logic

Vsevolod Stakhov vsevolod at highsecure.ru
Fri May 21 15:35:08 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-05-21 09:44:16 +0100
URL: https://github.com/rspamd/rspamd/commit/a9dcbc77d64c9fde94abe0fc66d80fd324feb720

[Rework] Move and adopt entities handling logic

---
 src/libserver/CMakeLists.txt                       |    3 +-
 src/libserver/html/{html.cc => html.cxx}           |  218 +-
 .../html/{html_entities.hxx => html_entities.cxx}  |  213 +-
 src/libserver/html/html_entities.hxx               | 2175 +-------------------
 src/libserver/html/html_tag_defs.hxx               |    1 -
 5 files changed, 219 insertions(+), 2391 deletions(-)

diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt
index e8267292c..3a4bae81f 100644
--- a/src/libserver/CMakeLists.txt
+++ b/src/libserver/CMakeLists.txt
@@ -34,7 +34,8 @@ SET(LIBRSPAMDSERVERSRC
 				${CMAKE_CURRENT_SOURCE_DIR}/http/http_context.c
 				${CMAKE_CURRENT_SOURCE_DIR}/maps/map.c
 				${CMAKE_CURRENT_SOURCE_DIR}/maps/map_helpers.c
-				${CMAKE_CURRENT_SOURCE_DIR}/html/html.cc
+				${CMAKE_CURRENT_SOURCE_DIR}/html/html_entities.cxx
+				${CMAKE_CURRENT_SOURCE_DIR}/html/html.cxx
 				${LIBCSSSRC})
 
 # Librspamd-server
diff --git a/src/libserver/html/html.cc b/src/libserver/html/html.cxx
similarity index 93%
rename from src/libserver/html/html.cc
rename to src/libserver/html/html.cxx
index e650cc3e4..d1f2da438 100644
--- a/src/libserver/html/html.cc
+++ b/src/libserver/html/html.cxx
@@ -51,7 +51,6 @@ INIT_LOG_MODULE(html)
 
 
 [[maybe_unused]] static const html_tags_storage html_tags_defs;
-[[maybe_unused]] static const html_entities_storage html_entities_defs;
 
 static struct rspamd_url *rspamd_html_process_url(rspamd_mempool_t *pool,
 												  const gchar *start, guint len,
@@ -132,218 +131,7 @@ rspamd_html_tag_by_id(gint id) {
 /* Decode HTML entitles in text */
 guint
 rspamd_html_decode_entitles_inplace(gchar *s, gsize len) {
-	goffset l, rep_len;
-	gchar *t = s, *h = s, *e = s, *end_ptr, old_c;
-	const gchar *end;
-	const gchar *entity;
-	gboolean seen_hash = FALSE, seen_hex = FALSE;
-	enum {
-		do_undefined,
-		do_digits_only,
-		do_mixed,
-	} seen_digit_only;
-	gint state = 0, base;
-	UChar32 uc;
-	khiter_t k;
-
-	if (len == 0) {
-		return 0;
-	}
-	else {
-		l = len;
-	}
-
-	end = s + l;
-
-	while (h - s < l && t <= h) {
-		switch (state) {
-			/* Out of entity */
-		case 0:
-			if (*h == '&') {
-				state = 1;
-				seen_hash = FALSE;
-				seen_hex = FALSE;
-				seen_digit_only = do_undefined;
-				e = h;
-				h++;
-				continue;
-			}
-			else {
-				*t = *h;
-				h++;
-				t++;
-			}
-			break;
-		case 1:
-			if (*h == ';' && h > e) {
-decode_entity:
-				/* Determine base */
-				/* First find in entities table */
-				old_c = *h;
-				*h = '\0';
-				entity = e + 1;
-				uc = 0;
-
-				if (*entity != '#') {
-					k = kh_get (entity_by_name, html_entity_by_name, entity);
-					*h = old_c;
-
-					if (k != kh_end (html_entity_by_name)) {
-						if (kh_val (html_entity_by_name, k)) {
-							rep_len = strlen(kh_val (html_entity_by_name, k));
-
-							if (end - t >= rep_len) {
-								memcpy(t, kh_val (html_entity_by_name, k),
-										rep_len);
-								t += rep_len;
-							}
-						}
-						else {
-							if (end - t > h - e + 1) {
-								memmove(t, e, h - e + 1);
-								t += h - e + 1;
-							}
-						}
-					}
-					else {
-						if (end - t > h - e + 1) {
-							memmove(t, e, h - e + 1);
-							t += h - e + 1;
-						}
-					}
-				}
-				else if (e + 2 < h) {
-					if (*(e + 2) == 'x' || *(e + 2) == 'X') {
-						base = 16;
-					}
-					else if (*(e + 2) == 'o' || *(e + 2) == 'O') {
-						base = 8;
-					}
-					else {
-						base = 10;
-					}
-
-					if (base == 10) {
-						uc = strtoul((e + 2), &end_ptr, base);
-					}
-					else {
-						uc = strtoul((e + 3), &end_ptr, base);
-					}
-
-					if (end_ptr != NULL && *end_ptr != '\0') {
-						/* Skip undecoded */
-						*h = old_c;
-
-						if (end - t > h - e + 1) {
-							memmove(t, e, h - e + 1);
-							t += h - e + 1;
-						}
-					}
-					else {
-						/* Search for a replacement */
-						*h = old_c;
-						k = kh_get (entity_by_number, html_entity_by_number, uc);
-
-						if (k != kh_end (html_entity_by_number)) {
-							if (kh_val (html_entity_by_number, k)) {
-								rep_len = strlen(kh_val (html_entity_by_number, k));
-
-								if (end - t >= rep_len) {
-									memcpy(t, kh_val (html_entity_by_number, k),
-											rep_len);
-									t += rep_len;
-								}
-							}
-							else {
-								if (end - t > h - e + 1) {
-									memmove(t, e, h - e + 1);
-									t += h - e + 1;
-								}
-							}
-						}
-						else {
-							/* Unicode point */
-							goffset off = t - s;
-							UBool is_error = 0;
-
-							if (uc > 0) {
-								U8_APPEND (s, off, len, uc, is_error);
-								if (!is_error) {
-									t = s + off;
-								}
-								else {
-									/* Leave invalid entities as is */
-									if (end - t > h - e + 1) {
-										memmove(t, e, h - e + 1);
-										t += h - e + 1;
-									}
-								}
-							}
-							else if (end - t > h - e + 1) {
-								memmove(t, e, h - e + 1);
-								t += h - e + 1;
-							}
-						}
-
-						if (end - t > 0 && old_c != ';') {
-							/* Fuck email clients, fuck them */
-							*t++ = old_c;
-						}
-					}
-				}
-
-				state = 0;
-			}
-			else if (*h == '&') {
-				/* Previous `&` was bogus */
-				state = 1;
-
-				if (end - t > h - e) {
-					memmove(t, e, h - e);
-					t += h - e;
-				}
-
-				e = h;
-			}
-			else if (*h == '#') {
-				seen_hash = TRUE;
 
-				if (h + 1 < end && h[1] == 'x') {
-					seen_hex = TRUE;
-					/* Skip one more character */
-					h++;
-				}
-			}
-			else if (seen_digit_only != do_mixed &&
-					 (g_ascii_isdigit (*h) || (seen_hex && g_ascii_isxdigit (*h)))) {
-				seen_digit_only = do_digits_only;
-			}
-			else {
-				if (seen_digit_only == do_digits_only && seen_hash && h > e) {
-					/* We have seen some digits, so we can try to decode, eh */
-					/* Fuck retarded email clients... */
-					goto decode_entity;
-				}
-
-				seen_digit_only = do_mixed;
-			}
-
-			h++;
-
-			break;
-		}
-	}
-
-	/* Leftover */
-	if (state == 1 && h > e) {
-		/* Unfinished entity, copy as is */
-		if (end - t >= h - e) {
-			memmove(t, e, h - e);
-			t += h - e;
-		}
-	}
-
-	return (t - s);
 }
 
 static gboolean
@@ -3135,3 +2923,9 @@ rspamd_html_process_part (rspamd_mempool_t *pool,
 	return rspamd_html_process_part_full (pool, hc, in, NULL,
 			NULL, NULL, FALSE);
 }
+
+guint
+rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
+{
+	return rspamd::html::decode_html_entitles_inplace(s, len);
+}
diff --git a/src/libserver/html/html_entities.hxx b/src/libserver/html/html_entities.cxx
similarity index 96%
copy from src/libserver/html/html_entities.hxx
copy to src/libserver/html/html_entities.cxx
index 4953a0bf9..9be8c67aa 100644
--- a/src/libserver/html/html_entities.hxx
+++ b/src/libserver/html/html_entities.cxx
@@ -1,5 +1,5 @@
 /*-
- * Copyright 2018 Vsevolod Stakhov
+ * Copyright 2021 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,13 @@
  * limitations under the License.
  */
 
-#ifndef RSPAMD_HTML_ENTITIES_H
-#define RSPAMD_HTML_ENTITIES_H
+#include "config.h"
+#include "html_entities.hxx"
 
 #include <string>
 #include <contrib/robin-hood/robin_hood.h>
+#include <unicode/utf8.h>
+#include "libutil/cxx/util.hxx"
 
 namespace rspamd::html {
 
@@ -2171,7 +2173,7 @@ public:
 		}
 	}
 
-	auto by_name(std::string_view name) -> const html_entity_def* {
+	auto by_name(std::string_view name) const -> const html_entity_def * {
 		auto it = entity_by_name.find(name);
 
 		if (it != entity_by_name.end()) {
@@ -2181,7 +2183,7 @@ public:
 		return nullptr;
 	}
 
-	auto by_id(tag_id_t id) -> const html_entity_def* {
+	auto by_id(int id) const -> const html_entity_def * {
 		auto it = entity_by_id.find(id);
 		if (it != entity_by_id.end()) {
 			return &(it->second);
@@ -2191,6 +2193,205 @@ public:
 	}
 };
 
+static const html_entities_storage html_entities_defs;
+
+std::size_t
+decode_html_entitles_inplace(char *s, std::size_t len)
+{
+	long l, rep_len;
+	char *t = s, *h = s, *e = s, *end_ptr, old_c;
+	const gchar *end;
+	const gchar *entity;
+	bool seen_hash = false, seen_hex = false;
+	enum {
+		do_undefined,
+		do_digits_only,
+		do_mixed,
+	} seen_digit_only;
+	int state = 0, base;
+	UChar32 uc;
+
+	if (len == 0) {
+		return 0;
+	}
+	else {
+		l = len;
+	}
+
+	end = s + l;
+
+	while (h - s < l && t <= h) {
+		switch (state) {
+			/* Out of entity */
+		case 0:
+			if (*h == '&') {
+				state = 1;
+				seen_hash = false;
+				seen_hex = false;
+				seen_digit_only = do_undefined;
+				e = h;
+				h++;
+				continue;
+			}
+			else {
+				*t = *h;
+				h++;
+				t++;
+			}
+			break;
+		case 1:
+			if (*h == ';' && h > e) {
+decode_entity:
+				old_c = *h;
+				*h = '\0';
+				entity = e + 1;
+				uc = 0;
+
+				if (*entity != '#') {
+					const auto *entity_def = html_entities_defs.by_name({entity,
+																		 (std::size_t) (h - entity)});
+					*h = old_c;
+
+					if (entity_def) {
+						rep_len = entity_def->replacement.size();
+
+						if (end - t >= rep_len) {
+							memcpy(t, entity_def->replacement.data(),
+									rep_len);
+							t += rep_len;
+						}
+					}
+					else {
+						if (end - t > h - e + 1) {
+							memmove(t, e, h - e + 1);
+							t += h - e + 1;
+						}
+					}
+				}
+				else if (e + 2 < h) {
+					if (*(e + 2) == 'x' || *(e + 2) == 'X') {
+						base = 16;
+					}
+					else if (*(e + 2) == 'o' || *(e + 2) == 'O') {
+						base = 8;
+					}
+					else {
+						base = 10;
+					}
+
+					if (base == 10) {
+						uc = strtoul((e + 2), &end_ptr, base);
+					}
+					else {
+						uc = strtoul((e + 3), &end_ptr, base);
+					}
+
+					if (end_ptr != nullptr && *end_ptr != '\0') {
+						/* Skip undecoded */
+						*h = old_c;
+
+						if (end - t > h - e + 1) {
+							memmove(t, e, h - e + 1);
+							t += h - e + 1;
+						}
+					}
+					else {
+						/* Search for a replacement */
+						*h = old_c;
+						const auto *entity_def = html_entities_defs.by_id(uc);
+
+						if (entity_def) {
+							rep_len = entity_def->replacement.size();
+
+							if (end - t >= rep_len) {
+								memcpy(t, entity_def->replacement.data(),
+										rep_len);
+								t += rep_len;
+							}
+						}
+						else {
+							/* Unicode point */
+							goffset off = t - s;
+							UBool is_error = 0;
+
+							if (uc > 0) {
+								U8_APPEND (s, off, len, uc, is_error);
+								if (!is_error) {
+									t = s + off;
+								}
+								else {
+									/* Leave invalid entities as is */
+									if (end - t > h - e + 1) {
+										memmove(t, e, h - e + 1);
+										t += h - e + 1;
+									}
+								}
+							}
+							else if (end - t > h - e + 1) {
+								memmove(t, e, h - e + 1);
+								t += h - e + 1;
+							}
+						}
+
+						if (end - t > 0 && old_c != ';') {
+							/* Fuck email clients, fuck them */
+							*t++ = old_c;
+						}
+					}
+				}
+
+				state = 0;
+			}
+			else if (*h == '&') {
+				/* Previous `&` was bogus */
+				state = 1;
+
+				if (end - t > h - e) {
+					memmove(t, e, h - e);
+					t += h - e;
+				}
+
+				e = h;
+			}
+			else if (*h == '#') {
+				seen_hash = true;
+
+				if (h + 1 < end && h[1] == 'x') {
+					seen_hex = true;
+					/* Skip one more character */
+					h++;
+				}
+			}
+			else if (seen_digit_only != do_mixed &&
+					 (g_ascii_isdigit (*h) || (seen_hex && g_ascii_isxdigit (*h)))) {
+				seen_digit_only = do_digits_only;
+			}
+			else {
+				if (seen_digit_only == do_digits_only && seen_hash && h > e) {
+					/* We have seen some digits, so we can try to decode, eh */
+					/* Fuck retarded email clients... */
+					goto decode_entity;
+				}
+
+				seen_digit_only = do_mixed;
+			}
+
+			h++;
+
+			break;
+		}
+	}
+
+	/* Leftover */
+	if (state == 1 && h > e) {
+		/* Unfinished entity, copy as is */
+		if (end - t >= h - e) {
+			memmove(t, e, h - e);
+			t += h - e;
+		}
+	}
+
+	return (t - s);
 }
 
-#endif
+} // namespace rspamd::html
\ No newline at end of file
diff --git a/src/libserver/html/html_entities.hxx b/src/libserver/html/html_entities.hxx
index 4953a0bf9..9e48c20a0 100644
--- a/src/libserver/html/html_entities.hxx
+++ b/src/libserver/html/html_entities.hxx
@@ -1,5 +1,5 @@
 /*-
- * Copyright 2018 Vsevolod Stakhov
+ * Copyright 2021 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,2180 +16,13 @@
 
 #ifndef RSPAMD_HTML_ENTITIES_H
 #define RSPAMD_HTML_ENTITIES_H
+#pragma once
 
-#include <string>
-#include <contrib/robin-hood/robin_hood.h>
+#include <utility>
 
 namespace rspamd::html {
 
-struct html_entity_def {
-	std::string name;
-	std::string replacement;
-	unsigned code;
-};
-
-#define ENTITY_DEF(name, code, replacement) html_entity_def{(name), (replacement), (code)}
-
-static const auto html_entities_array = rspamd::array_of<html_entity_def>(
-		ENTITY_DEF("szlig", 223, "\xc3\x9f"),
-		ENTITY_DEF("prime", 8242, "\xe2\x80\xb2"),
-		ENTITY_DEF("lnsim", 8934, "\xe2\x8b\xa6"),
-		ENTITY_DEF("nvDash", 8877, "\xe2\x8a\xad"),
-		ENTITY_DEF("isinsv", 8947, "\xe2\x8b\xb3"),
-		ENTITY_DEF("notin", 8713, "\xe2\x88\x89"),
-		ENTITY_DEF("becaus", 8757, "\xe2\x88\xb5"),
-		ENTITY_DEF("Leftrightarrow", 8660, "\xe2\x87\x94"),
-		ENTITY_DEF("EmptySmallSquare", 9723, "\xe2\x97\xbb"),
-		ENTITY_DEF("SquareUnion", 8852, "\xe2\x8a\x94"),
-		ENTITY_DEF("subdot", 10941, "\xe2\xaa\xbd"),
-		ENTITY_DEF("Dstrok", 272, "\xc4\x90"),
-		ENTITY_DEF("rrarr", 8649, "\xe2\x87\x89"),
-		ENTITY_DEF("rArr", 8658, "\xe2\x87\x92"),
-		ENTITY_DEF("Aacute", 193, "\xc3\x81"),
-		ENTITY_DEF("kappa", 954, "\xce\xba"),
-		ENTITY_DEF("Iopf", 120128, "\xf0\x9d\x95\x80"),
-		ENTITY_DEF("hyphen", 8208, "\xe2\x80\x90"),
-		ENTITY_DEF("rarrbfs", 10528, "\xe2\xa4\xa0"),
-		ENTITY_DEF("supsetneqq", 10956, "\xe2\xab\x8c"),
-		ENTITY_DEF("gacute", 501, "\xc7\xb5"),
-		ENTITY_DEF("VeryThinSpace", 8202, "\xe2\x80\x8a"),
-		ENTITY_DEF("tint", 8749, "\xe2\x88\xad"),
-		ENTITY_DEF("ffr", 120099, "\xf0\x9d\x94\xa3"),
-		ENTITY_DEF("kgreen", 312, "\xc4\xb8"),
-		ENTITY_DEF("nis", 8956, "\xe2\x8b\xbc"),
-		ENTITY_DEF("NotRightTriangleBar", 10704, "\xe2\xa7\x90\xcc\xb8"),
-		ENTITY_DEF("Eogon", 280, "\xc4\x98"),
-		ENTITY_DEF("lbrke", 10635, "\xe2\xa6\x8b"),
-		ENTITY_DEF("phi", 966, "\xcf\x86"),
-		ENTITY_DEF("notnivc", 8957, "\xe2\x8b\xbd"),
-		ENTITY_DEF("utilde", 361, "\xc5\xa9"),
-		ENTITY_DEF("Fopf", 120125, "\xf0\x9d\x94\xbd"),
-		ENTITY_DEF("Vcy", 1042, "\xd0\x92"),
-		ENTITY_DEF("erDot", 8787, "\xe2\x89\x93"),
-		ENTITY_DEF("nsubE", 10949, "\xe2\xab\x85\xcc\xb8"),
-		ENTITY_DEF("egrave", 232, "\xc3\xa8"),
-		ENTITY_DEF("Lcedil", 315, "\xc4\xbb"),
-		ENTITY_DEF("lharul", 10602, "\xe2\xa5\xaa"),
-		ENTITY_DEF("middot", 183, "\xc2\xb7"),
-		ENTITY_DEF("ggg", 8921, "\xe2\x8b\x99"),
-		ENTITY_DEF("NestedLessLess", 8810, "\xe2\x89\xaa"),
-		ENTITY_DEF("tau", 964, "\xcf\x84"),
-		ENTITY_DEF("setmn", 8726, "\xe2\x88\x96"),
-		ENTITY_DEF("frac78", 8542, "\xe2\x85\x9e"),
-		ENTITY_DEF("para", 182, "\xc2\xb6"),
-		ENTITY_DEF("Rcedil", 342, "\xc5\x96"),
-		ENTITY_DEF("propto", 8733, "\xe2\x88\x9d"),
-		ENTITY_DEF("sqsubset", 8847, "\xe2\x8a\x8f"),
-		ENTITY_DEF("ensp", 8194, "\xe2\x80\x82"),
-		ENTITY_DEF("boxvH", 9578, "\xe2\x95\xaa"),
-		ENTITY_DEF("NotGreaterTilde", 8821, "\xe2\x89\xb5"),
-		ENTITY_DEF("ffllig", 64260, "\xef\xac\x84"),
-		ENTITY_DEF("kcedil", 311, "\xc4\xb7"),
-		ENTITY_DEF("omega", 969, "\xcf\x89"),
-		ENTITY_DEF("sime", 8771, "\xe2\x89\x83"),
-		ENTITY_DEF("LeftTriangleEqual", 8884, "\xe2\x8a\xb4"),
-		ENTITY_DEF("bsemi", 8271, "\xe2\x81\x8f"),
-		ENTITY_DEF("rdquor", 8221, "\xe2\x80\x9d"),
-		ENTITY_DEF("Utilde", 360, "\xc5\xa8"),
-		ENTITY_DEF("bsol", 92, "\x5c"),
-		ENTITY_DEF("risingdotseq", 8787, "\xe2\x89\x93"),
-		ENTITY_DEF("ultri", 9720, "\xe2\x97\xb8"),
-		ENTITY_DEF("rhov", 1009, "\xcf\xb1"),
-		ENTITY_DEF("TildeEqual", 8771, "\xe2\x89\x83"),
-		ENTITY_DEF("jukcy", 1108, "\xd1\x94"),
-		ENTITY_DEF("perp", 8869, "\xe2\x8a\xa5"),
-		ENTITY_DEF("capbrcup", 10825, "\xe2\xa9\x89"),
-		ENTITY_DEF("ltrie", 8884, "\xe2\x8a\xb4"),
-		ENTITY_DEF("LessTilde", 8818, "\xe2\x89\xb2"),
-		ENTITY_DEF("popf", 120161, "\xf0\x9d\x95\xa1"),
-		ENTITY_DEF("dbkarow", 10511, "\xe2\xa4\x8f"),
-		ENTITY_DEF("roang", 10221, "\xe2\x9f\xad"),
-		ENTITY_DEF("brvbar", 166, "\xc2\xa6"),
-		ENTITY_DEF("CenterDot", 183, "\xc2\xb7"),
-		ENTITY_DEF("notindot", 8949, "\xe2\x8b\xb5\xcc\xb8"),
-		ENTITY_DEF("supmult", 10946, "\xe2\xab\x82"),
-		ENTITY_DEF("multimap", 8888, "\xe2\x8a\xb8"),
-		ENTITY_DEF("frac34", 190, "\xc2\xbe"),
-		ENTITY_DEF("mapsto", 8614, "\xe2\x86\xa6"),
-		ENTITY_DEF("flat", 9837, "\xe2\x99\xad"),
-		ENTITY_DEF("updownarrow", 8597, "\xe2\x86\x95"),
-		ENTITY_DEF("gne", 10888, "\xe2\xaa\x88"),
-		ENTITY_DEF("nrarrc", 10547, "\xe2\xa4\xb3\xcc\xb8"),
-		ENTITY_DEF("suphsol", 10185, "\xe2\x9f\x89"),
-		ENTITY_DEF("nGtv", 8811, "\xe2\x89\xab\xcc\xb8"),
-		ENTITY_DEF("hopf", 120153, "\xf0\x9d\x95\x99"),
-		ENTITY_DEF("pointint", 10773, "\xe2\xa8\x95"),
-		ENTITY_DEF("glj", 10916, "\xe2\xaa\xa4"),
-		ENTITY_DEF("LeftDoubleBracket", 10214, "\xe2\x9f\xa6"),
-		ENTITY_DEF("NotSupersetEqual", 8841, "\xe2\x8a\x89"),
-		ENTITY_DEF("dot", 729, "\xcb\x99"),
-		ENTITY_DEF("tbrk", 9140, "\xe2\x8e\xb4"),
-		ENTITY_DEF("LeftUpDownVector", 10577, "\xe2\xa5\x91"),
-		ENTITY_DEF("uml", 168, "\xc2\xa8"),
-		ENTITY_DEF("bbrk", 9141, "\xe2\x8e\xb5"),
-		ENTITY_DEF("nearrow", 8599, "\xe2\x86\x97"),
-		ENTITY_DEF("backsimeq", 8909, "\xe2\x8b\x8d"),
-		ENTITY_DEF("dblac", 733, "\xcb\x9d"),
-		ENTITY_DEF("circleddash", 8861, "\xe2\x8a\x9d"),
-		ENTITY_DEF("ldsh", 8626, "\xe2\x86\xb2"),
-		ENTITY_DEF("sce", 10928, "\xe2\xaa\xb0"),
-		ENTITY_DEF("angst", 197, "\xc3\x85"),
-		ENTITY_DEF("yen", 165, "\xc2\xa5"),
-		ENTITY_DEF("nsupE", 10950, "\xe2\xab\x86\xcc\xb8"),
-		ENTITY_DEF("Uscr", 119984, "\xf0\x9d\x92\xb0"),
-		ENTITY_DEF("subplus", 10943, "\xe2\xaa\xbf"),
-		ENTITY_DEF("nleqq", 8806, "\xe2\x89\xa6\xcc\xb8"),
-		ENTITY_DEF("nprcue", 8928, "\xe2\x8b\xa0"),
-		ENTITY_DEF("Ocirc", 212, "\xc3\x94"),
-		ENTITY_DEF("disin", 8946, "\xe2\x8b\xb2"),
-		ENTITY_DEF("EqualTilde", 8770, "\xe2\x89\x82"),
-		ENTITY_DEF("YUcy", 1070, "\xd0\xae"),
-		ENTITY_DEF("Kscr", 119974, "\xf0\x9d\x92\xa6"),
-		ENTITY_DEF("lg", 8822, "\xe2\x89\xb6"),
-		ENTITY_DEF("nLeftrightarrow", 8654, "\xe2\x87\x8e"),
-		ENTITY_DEF("eplus", 10865, "\xe2\xa9\xb1"),
-		ENTITY_DEF("les", 10877, "\xe2\xa9\xbd"),
-		ENTITY_DEF("sfr", 120112, "\xf0\x9d\x94\xb0"),
-		ENTITY_DEF("HumpDownHump", 8782, "\xe2\x89\x8e"),
-		ENTITY_DEF("Fouriertrf", 8497, "\xe2\x84\xb1"),
-		ENTITY_DEF("Updownarrow", 8661, "\xe2\x87\x95"),
-		ENTITY_DEF("nrarr", 8603, "\xe2\x86\x9b"),
-		ENTITY_DEF("radic", 8730, "\xe2\x88\x9a"),
-		ENTITY_DEF("gnap", 10890, "\xe2\xaa\x8a"),
-		ENTITY_DEF("zeta", 950, "\xce\xb6"),
-		ENTITY_DEF("Qscr", 119980, "\xf0\x9d\x92\xac"),
-		ENTITY_DEF("NotRightTriangleEqual", 8941, "\xe2\x8b\xad"),
-		ENTITY_DEF("nshortmid", 8740, "\xe2\x88\xa4"),
-		ENTITY_DEF("SHCHcy", 1065, "\xd0\xa9"),
-		ENTITY_DEF("piv", 982, "\xcf\x96"),
-		ENTITY_DEF("angmsdaa", 10664, "\xe2\xa6\xa8"),
-		ENTITY_DEF("curlywedge", 8911, "\xe2\x8b\x8f"),
-		ENTITY_DEF("sqcaps", 8851, "\xe2\x8a\x93\xef\xb8\x80"),
-		ENTITY_DEF("sum", 8721, "\xe2\x88\x91"),
-		ENTITY_DEF("rarrtl", 8611, "\xe2\x86\xa3"),
-		ENTITY_DEF("gescc", 10921, "\xe2\xaa\xa9"),
-		ENTITY_DEF("sup", 8835, "\xe2\x8a\x83"),
-		ENTITY_DEF("smid", 8739, "\xe2\x88\xa3"),
-		ENTITY_DEF("cularr", 8630, "\xe2\x86\xb6"),
*** OUTPUT TRUNCATED, 2035 LINES SKIPPED ***


More information about the Commits mailing list