From eea893682228299b9d90c69f52eb9075612d8708 Mon Sep 17 00:00:00 2001
From: Denis Kasak <dkasak@termina.org.uk>
Date: Sat, 17 Mar 2018 17:52:17 +0100
Subject: [PATCH 1/3] Unescape all HTML entities prior to parsing.

---
 matrix/colors.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/matrix/colors.py b/matrix/colors.py
index 0c080ff..247c8f0 100644
--- a/matrix/colors.py
+++ b/matrix/colors.py
@@ -32,6 +32,8 @@ try:
 except ImportError:
     from html.parser import HTMLParser
 
+from html.entities import name2codepoint
+
 FormattedString = namedtuple('FormattedString', ['text', 'attributes'])
 
 quote_wrapper = textwrap.TextWrapper(
@@ -314,6 +316,10 @@ class MatrixHtmlParser(HTMLParser):
         self.substrings = []  # type: List[FormattedString]
         self.attributes = DEFAULT_ATRIBUTES.copy()
 
+    def feed(self, text):
+        text = self.unescape(text)
+        return HTMLParser.feed(self, text)
+
     def _toggle_attribute(self, attribute):
         if self.text:
             self.substrings.append(

From 0a868b80bbf5cb5dc74adccf68f17b305ada6c0b Mon Sep 17 00:00:00 2001
From: Denis Kasak <dkasak@termina.org.uk>
Date: Mon, 19 Mar 2018 09:37:52 +0100
Subject: [PATCH 2/3] Add HTML character reference parsing test.

---
 tests/http_parser_test.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 tests/http_parser_test.py

diff --git a/tests/http_parser_test.py b/tests/http_parser_test.py
new file mode 100644
index 0000000..e58903a
--- /dev/null
+++ b/tests/http_parser_test.py
@@ -0,0 +1,33 @@
+import html.entities
+
+from hypothesis import given
+from hypothesis.strategies import sampled_from
+
+from matrix.colors import MatrixHtmlParser
+
+try:
+    # python 3
+    html_entities = [(name, char, ord(char))
+                     for name, char in html.entities.html5.items()
+                     if not name.endswith(';')]
+except AttributeError:
+    # python 2
+    html_entities = [(name, unichr(codepoint), codepoint)
+                     for name, codepoint
+                     in html.entities.name2codepoint.items()]
+
+
+@given(sampled_from(html_entities))
+def test_html_named_entity_parsing(entitydef):
+    name = entitydef[0]
+    character = entitydef[1]
+    parser = MatrixHtmlParser()
+    assert parser.unescape('&{};'.format(name)) == character
+
+
+@given(sampled_from(html_entities))
+def test_html_numeric_reference_parsing(entitydef):
+    character = entitydef[1]
+    num = entitydef[2]
+    parser = MatrixHtmlParser()
+    assert parser.unescape('&#{};'.format(num)) == character

From 4be54d032ef93d2be4612fca5026ca6251d3040f Mon Sep 17 00:00:00 2001
From: Denis Kasak <dkasak@termina.org.uk>
Date: Mon, 19 Mar 2018 11:00:05 +0100
Subject: [PATCH 3/3] Implement MatrixHtmlParser.unescape shim.

This is for Python 2/3 compatibility, since Python 3 deprecates the
instance method and Python 2 doesn't have html.unescape.
---
 matrix/colors.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/matrix/colors.py b/matrix/colors.py
index 247c8f0..fc6d3bb 100644
--- a/matrix/colors.py
+++ b/matrix/colors.py
@@ -32,6 +32,7 @@ try:
 except ImportError:
     from html.parser import HTMLParser
 
+import html
 from html.entities import name2codepoint
 
 FormattedString = namedtuple('FormattedString', ['text', 'attributes'])
@@ -316,6 +317,17 @@ class MatrixHtmlParser(HTMLParser):
         self.substrings = []  # type: List[FormattedString]
         self.attributes = DEFAULT_ATRIBUTES.copy()
 
+    def unescape(self, text):
+        """Shim to unescape HTML in both Python 2 and 3.
+
+        The instance method was deprecated in Python 3 and html.unescape
+        doesn't exist in Python 2 so this is needed.
+        """
+        try:
+            return html.unescape(text)
+        except AttributeError:
+            return HTMLParser.unescape(self, text)
+
     def feed(self, text):
         text = self.unescape(text)
         return HTMLParser.feed(self, text)