commit a12f317: [Project] More fixes for closed tags
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Jul 1 17:00:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-07-01 17:58:59 +0100
URL: https://github.com/rspamd/rspamd/commit/a12f31775403b757c0e1bd4d96aaa5cec2a3f00a (HEAD -> master)
[Project] More fixes for closed tags
---
src/libserver/html/html.cxx | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index ae73b7413..82c5d213c 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1335,6 +1335,8 @@ html_process_input(rspamd_mempool_t *pool,
if (cur_tag->flags & FL_CLOSED) {
cur_tag->closing.end = cur_tag->content_offset;
cur_tag->closing.start = cur_tag->tag_start;
+
+ cur_tag = parent_tag;
}
};
@@ -1916,17 +1918,10 @@ TEST_CASE("html text extraction")
{
const std::vector<std::pair<std::string, std::string>> cases{
- /* Tables */
- {"<table>\n"
- " <tr>\n"
- " <th>heada</th>\n"
- " <th>headb</th>\n"
- " </tr>\n"
- " <tr>\n"
- " <td>data1</td>\n"
- " <td>data2</td>\n"
- " </tr>\n"
- " </table>", "heada headb\ndata1 data2\n"},
+ {" <body>\n"
+ " <!-- escape content -->\n"
+ " a b a > b a < b a & b 'a "a"\n"
+ " </body>", R"|(a b a > b a < b a & b 'a "a")|"},
/* XML tags */
{"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n"
" <!DOCTYPE html\n"
@@ -1975,7 +1970,17 @@ TEST_CASE("html text extraction")
" </body>\n"
"</html>", "Hello, world! test\ndata<>\nstuff?"},
{"<p><!--comment-->test</br></hr><br>", "test\n"},
-
+ /* Tables */
+ {"<table>\n"
+ " <tr>\n"
+ " <th>heada</th>\n"
+ " <th>headb</th>\n"
+ " </tr>\n"
+ " <tr>\n"
+ " <td>data1</td>\n"
+ " <td>data2</td>\n"
+ " </tr>\n"
+ " </table>", "heada headb\ndata1 data2\n"},
};
rspamd_url_init(NULL);
More information about the Commits
mailing list