From c6e34dd0b1b50e354b07b45022984e8efac9b171 Mon Sep 17 00:00:00 2001 From: Denis Kasak Date: Thu, 22 Mar 2018 20:47:03 +0100 Subject: [PATCH 1/2] Escape each HTML ref individually. --- matrix/colors.py | 12 +++++++----- tests/http_parser_test.py | 8 ++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/matrix/colors.py b/matrix/colors.py index fc6d3bb..e6121f7 100644 --- a/matrix/colors.py +++ b/matrix/colors.py @@ -328,10 +328,6 @@ class MatrixHtmlParser(HTMLParser): except AttributeError: return HTMLParser.unescape(self, text) - def feed(self, text): - text = self.unescape(text) - return HTMLParser.feed(self, text) - def _toggle_attribute(self, attribute): if self.text: self.substrings.append( @@ -395,7 +391,13 @@ class MatrixHtmlParser(HTMLParser): pass def handle_data(self, data): - self.text = self.text + data + self.text += data + + def handle_entityref(self, name): + self.text += self.unescape("&{};".format(name)) + + def handle_charref(self, name): + self.text += self.unescape("&{};".format(name)) def get_substrings(self): if self.text: diff --git a/tests/http_parser_test.py b/tests/http_parser_test.py index e58903a..fba8e2d 100644 --- a/tests/http_parser_test.py +++ b/tests/http_parser_test.py @@ -31,3 +31,11 @@ def test_html_numeric_reference_parsing(entitydef): num = entitydef[2] parser = MatrixHtmlParser() assert parser.unescape('&#{};'.format(num)) == character + + +def test_parsing_of_escaped_brackets(): + p = MatrixHtmlParser() + p.feed('
<faketag>
') + s = p.get_substrings() + print(s) + assert s[0].text == '' and len(s) == 1 From 392698b9b562bf7887d6678c90ea71b04a972f99 Mon Sep 17 00:00:00 2001 From: Denis Kasak Date: Thu, 22 Mar 2018 20:47:10 +0100 Subject: [PATCH 2/2] MatrixHtmlParser: Factor out adding substrings into a method. --- matrix/colors.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/matrix/colors.py b/matrix/colors.py index e6121f7..231531e 100644 --- a/matrix/colors.py +++ b/matrix/colors.py @@ -328,10 +328,13 @@ class MatrixHtmlParser(HTMLParser): except AttributeError: return HTMLParser.unescape(self, text) + def add_substring(self, text, attrs): + fmt_string = FormattedString(text, attrs) + self.substrings.append(fmt_string) + def _toggle_attribute(self, attribute): if self.text: - self.substrings.append( - FormattedString(self.text, self.attributes.copy())) + self.add_substring(self.text, self.attributes.copy()) self.text = "" self.attributes[attribute] = not self.attributes[attribute] @@ -348,11 +351,9 @@ class MatrixHtmlParser(HTMLParser): self._toggle_attribute("quote") elif tag == "br": if self.text: - self.substrings.append( - FormattedString(self.text, self.attributes.copy())) + self.add_substring(self.text, self.attributes.copy()) self.text = "\n" - self.substrings.append( - FormattedString(self.text, DEFAULT_ATRIBUTES.copy())) + self.add_substring(self.text, DEFAULT_ATRIBUTES.copy()) self.text = "" elif tag == "font": for key, value in attrs: @@ -363,8 +364,7 @@ class MatrixHtmlParser(HTMLParser): continue if self.text: - self.substrings.append( - FormattedString(self.text, self.attributes.copy())) + self.add_substring(self.text, self.attributes.copy()) self.text = "" self.attributes["fgcolor"] = color else: @@ -383,8 +383,7 @@ class MatrixHtmlParser(HTMLParser): self._toggle_attribute("quote") elif tag == "font": if self.text: - self.substrings.append( - FormattedString(self.text, self.attributes.copy())) + self.add_substring(self.text, self.attributes.copy()) self.text = "" self.attributes["fgcolor"] = None else: @@ -401,8 +400,7 @@ class MatrixHtmlParser(HTMLParser): def get_substrings(self): if self.text: - self.substrings.append( - FormattedString(self.text, self.attributes.copy())) + self.add_substring(self.text, self.attributes.copy()) return self.substrings