Unescape all HTML entities prior to parsing.

This commit is contained in:
Denis Kasak 2018-03-17 17:52:17 +01:00
parent d239eda675
commit eea8936822

View file

@ -32,6 +32,8 @@ try:
except ImportError:
from html.parser import HTMLParser
from html.entities import name2codepoint
FormattedString = namedtuple('FormattedString', ['text', 'attributes'])
quote_wrapper = textwrap.TextWrapper(
@ -314,6 +316,10 @@ class MatrixHtmlParser(HTMLParser):
self.substrings = [] # type: List[FormattedString]
self.attributes = DEFAULT_ATRIBUTES.copy()
def feed(self, text):
text = self.unescape(text)
return HTMLParser.feed(self, text)
def _toggle_attribute(self, attribute):
if self.text:
self.substrings.append(