Escape each HTML ref individually.
This commit is contained in:
parent
2915e5d288
commit
c6e34dd0b1
2 changed files with 15 additions and 5 deletions
|
@ -328,10 +328,6 @@ class MatrixHtmlParser(HTMLParser):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return HTMLParser.unescape(self, text)
|
return HTMLParser.unescape(self, text)
|
||||||
|
|
||||||
def feed(self, text):
|
|
||||||
text = self.unescape(text)
|
|
||||||
return HTMLParser.feed(self, text)
|
|
||||||
|
|
||||||
def _toggle_attribute(self, attribute):
|
def _toggle_attribute(self, attribute):
|
||||||
if self.text:
|
if self.text:
|
||||||
self.substrings.append(
|
self.substrings.append(
|
||||||
|
@ -395,7 +391,13 @@ class MatrixHtmlParser(HTMLParser):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
self.text = self.text + data
|
self.text += data
|
||||||
|
|
||||||
|
def handle_entityref(self, name):
|
||||||
|
self.text += self.unescape("&{};".format(name))
|
||||||
|
|
||||||
|
def handle_charref(self, name):
|
||||||
|
self.text += self.unescape("&{};".format(name))
|
||||||
|
|
||||||
def get_substrings(self):
|
def get_substrings(self):
|
||||||
if self.text:
|
if self.text:
|
||||||
|
|
|
@ -31,3 +31,11 @@ def test_html_numeric_reference_parsing(entitydef):
|
||||||
num = entitydef[2]
|
num = entitydef[2]
|
||||||
parser = MatrixHtmlParser()
|
parser = MatrixHtmlParser()
|
||||||
assert parser.unescape('&#{};'.format(num)) == character
|
assert parser.unescape('&#{};'.format(num)) == character
|
||||||
|
|
||||||
|
|
||||||
|
def test_parsing_of_escaped_brackets():
|
||||||
|
p = MatrixHtmlParser()
|
||||||
|
p.feed('<pre><code><faketag></code></pre>')
|
||||||
|
s = p.get_substrings()
|
||||||
|
print(s)
|
||||||
|
assert s[0].text == '<faketag>' and len(s) == 1
|
||||||
|
|
Loading…
Reference in a new issue