Escape each HTML ref individually.

This commit is contained in:
Denis Kasak 2018-03-22 20:47:03 +01:00
parent 2915e5d288
commit c6e34dd0b1
2 changed files with 15 additions and 5 deletions

View file

@ -328,10 +328,6 @@ class MatrixHtmlParser(HTMLParser):
except AttributeError: except AttributeError:
return HTMLParser.unescape(self, text) return HTMLParser.unescape(self, text)
def feed(self, text):
text = self.unescape(text)
return HTMLParser.feed(self, text)
def _toggle_attribute(self, attribute): def _toggle_attribute(self, attribute):
if self.text: if self.text:
self.substrings.append( self.substrings.append(
@ -395,7 +391,13 @@ class MatrixHtmlParser(HTMLParser):
pass pass
def handle_data(self, data): def handle_data(self, data):
self.text = self.text + data self.text += data
def handle_entityref(self, name):
self.text += self.unescape("&{};".format(name))
def handle_charref(self, name):
self.text += self.unescape("&{};".format(name))
def get_substrings(self): def get_substrings(self):
if self.text: if self.text:

View file

@ -31,3 +31,11 @@ def test_html_numeric_reference_parsing(entitydef):
num = entitydef[2] num = entitydef[2]
parser = MatrixHtmlParser() parser = MatrixHtmlParser()
assert parser.unescape('&#{};'.format(num)) == character assert parser.unescape('&#{};'.format(num)) == character
def test_parsing_of_escaped_brackets():
p = MatrixHtmlParser()
p.feed('<pre><code>&lt;faketag&gt;</code></pre>')
s = p.get_substrings()
print(s)
assert s[0].text == '<faketag>' and len(s) == 1