Don't process markdown in URL's

2020-06-07 19:32:26 +02:00 · 2020-06-07 19:32:26 +02:00 · d37e08cce2
commit d37e08cce2
parent 172826ea6b
1 changed files with 36 additions and 8 deletions
--- a/matrix/colors.py
+++ b/matrix/colors.py
@ -88,11 +88,21 @@ class Formatted(object):
        substrings = []  # type: List[FormattedString]
        attributes = DEFAULT_ATTRIBUTES.copy()
        # Disallow backticks in URL's so that code blocks are unaffected by the
        # URL handling
        url_regex = r"\b[a-z]+://[^\s`]+"
        # Escaped things are not markdown delimiters, so substitute them away
-        # when (quickly) looking for the last delimiters in the line. Note that
+        # when (quickly) looking for the last delimiters in the line.
-        # the replacement needs to be the same length as the original for the
+        # Additionally, URL's are ignored for the purposes of markdown
-        # indices to be correct.
+        # delimiters.
-        escaped_masked = re.sub(r"\\[\\*_`]", "aa", line)
+        # Note that the replacement needs to be the same length as the original
        # for the indices to be correct.
        escaped_masked = re.sub(
            r"\\[\\*_`]|(?:" + url_regex + ")",
            lambda m: "a" * len(m[0]),
            line
        )
        def last_match_index(regex, offset_in_match):
            matches = list(re.finditer(regex, escaped_masked))
@ -140,13 +150,30 @@ class Formatted(object):
        escapable_chars = wrapper_init_chars.copy()
        escapable_chars.add("\\")
        # Collect URL spans
        url_spans = [m.span() for m in re.finditer(url_regex, line)]
        url_spans.reverse()  # we'll be popping from the end
        # Whether we are currently in a URL
        in_url = False
        i = 0
        while i < len(line):
            # Update the 'in_url' flag. The first condition is not a while loop
            # because URL's must contain '://', ensuring that we will not skip
            # 2 URL's in one iteration.
            if url_spans and i >= url_spans[-1][1]:
                in_url = False
                url_spans.pop()
            if url_spans and i >= url_spans[-1][0]:
                in_url = True
            # Markdown escape
            if i + 1 < len(line) and line[i] == "\\" \
                    and (line[i + 1] in escapable_chars
                            if not attributes["code"]
-                            else line[i + 1] == "`"):
+                            else line[i + 1] == "`") \
                    and not in_url:
                text += line[i + 1]
                i = i + 2
@ -208,7 +235,7 @@ class Formatted(object):
                    attributes["bgcolor"] = None
            # Markdown wrapper (emphasis/bold/code)
-            elif line[i] in wrapper_init_chars:
+            elif line[i] in wrapper_init_chars and not in_url:
                for l in range(wrapper_max_len, 0, -1):
                    if i + l <= len(line) and line[i : i + l] in wrappers:
                        descriptor = wrappers[line[i : i + l]]
@ -255,8 +282,9 @@ class Formatted(object):
                        break
                else:
-                    # No wrapper matched here (NOTE: cannot happen if "*" and
+                    # No wrapper matched here (NOTE: cannot happen since all
-                    # "_" are both in wrappers, but for completeness' sake)
+                    # wrapper prefixes are also wrappers, but for completeness'
                    # sake)
                    text = text + line[i]
                    i = i + 1