commit a12f317: [Project] More fixes for closed tags

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Jul 1 17:00:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-07-01 17:58:59 +0100
URL: https://github.com/rspamd/rspamd/commit/a12f31775403b757c0e1bd4d96aaa5cec2a3f00a (HEAD -> master)

[Project] More fixes for closed tags

---
 src/libserver/html/html.cxx | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index ae73b7413..82c5d213c 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1335,6 +1335,8 @@ html_process_input(rspamd_mempool_t *pool,
 		if (cur_tag->flags & FL_CLOSED) {
 			cur_tag->closing.end = cur_tag->content_offset;
 			cur_tag->closing.start = cur_tag->tag_start;
+
+			cur_tag = parent_tag;
 		}
 	};
 
@@ -1916,17 +1918,10 @@ TEST_CASE("html text extraction")
 {
 
 	const std::vector<std::pair<std::string, std::string>> cases{
-			/* Tables */
-			{"<table>\n"
-			 "      <tr>\n"
-			 "        <th>heada</th>\n"
-			 "        <th>headb</th>\n"
-			 "      </tr>\n"
-			 "      <tr>\n"
-			 "        <td>data1</td>\n"
-			 "        <td>data2</td>\n"
-			 "      </tr>\n"
-			 "    </table>", "heada headb\ndata1 data2\n"},
+			{"  <body>\n"
+			 "    <!-- escape content -->\n"
+			 "    a b a > b a < b a & b 'a "a"\n"
+			 "  </body>", R"|(a b a > b a < b a & b 'a "a")|"},
 			/* XML tags */
 			{"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n"
 			 " <!DOCTYPE html\n"
@@ -1975,7 +1970,17 @@ TEST_CASE("html text extraction")
 			 "  </body>\n"
 			 "</html>", "Hello, world! test\ndata<>\nstuff?"},
 			{"<p><!--comment-->test</br></hr><br>", "test\n"},
-
+			/* Tables */
+			{"<table>\n"
+			 "      <tr>\n"
+			 "        <th>heada</th>\n"
+			 "        <th>headb</th>\n"
+			 "      </tr>\n"
+			 "      <tr>\n"
+			 "        <td>data1</td>\n"
+			 "        <td>data2</td>\n"
+			 "      </tr>\n"
+			 "    </table>", "heada headb\ndata1 data2\n"},
 	};
 
 	rspamd_url_init(NULL);


More information about the Commits mailing list