diff --git a/matrix/colors.py b/matrix/colors.py
index a1222f4..14f047a 100644
--- a/matrix/colors.py
+++ b/matrix/colors.py
@@ -88,23 +88,44 @@ class Formatted(object):
substrings = [] # type: List[FormattedString]
attributes = DEFAULT_ATTRIBUTES.copy()
- def last_match_index(regex, subject, offset_in_match):
- matches = list(re.finditer(regex, subject))
+ # Escaped things are not markdown delimiters, so substitute them away
+ # when (quickly) looking for the last delimiters in the line. Note that
+ # the replacement needs to be the same length as the original for the
+ # indices to be correct.
+ escaped_masked = re.sub(r"\\[\\*_`]", "aa", line)
+
+ def last_match_index(regex, offset_in_match):
+ matches = list(re.finditer(regex, escaped_masked))
return matches[-1].span()[0] + offset_in_match if matches else -1
+ # 'needs_word': whether the wrapper must surround words, for example
+ # '*italic*' and not '* not-italic *'.
+ # 'validate': whether it can occur within the current attributes
wrappers = {
"**": {
"key": "bold",
- "last_index": last_match_index(r"\S\*\*", line, 1),
+ "last_index": last_match_index(r"\S\*\*", 1),
+ "needs_word": True,
+ "validate": lambda attrs: not attrs["code"],
},
"*": {
"key": "italic",
- "last_index": last_match_index(r"\S\*($|[^*])", line, 1),
+ "last_index": last_match_index(r"\S\*($|[^*])", 1),
+ "needs_word": True,
+ "validate": lambda attrs: not attrs["code"],
},
"_": {
"key": "italic",
- "last_index": last_match_index(r"\S_", line, 1),
+ "last_index": last_match_index(r"\S_", 1),
+ "needs_word": True,
+ "validate": lambda attrs: not attrs["code"],
},
+ "`": {
+ "key": "code",
+ "last_index": last_match_index(r"`", 0),
+ "needs_word": False,
+ "validate": lambda attrs: True,
+ }
}
wrapper_init_chars = set(k[0] for k in wrappers.keys())
wrapper_max_len = max(len(k) for k in wrappers.keys())
@@ -115,14 +136,17 @@ class Formatted(object):
"\x1F": "underline",
}
- last_backtick = line.rfind("`")
+ # Characters that consume a prefixed backslash
+ escapable_chars = wrapper_init_chars.copy()
+ escapable_chars.add("\\")
i = 0
while i < len(line):
# Markdown escape
- # NOTE: IRC-native formatting characters are not escaped
if i + 1 < len(line) and line[i] == "\\" \
- and line[i + 1] not in "\x02\x03\x0F\x1D\x1F":
+ and (line[i + 1] in escapable_chars
+ if not attributes["code"]
+ else line[i + 1] == "`"):
text += line[i + 1]
i = i + 2
@@ -183,32 +207,26 @@ class Formatted(object):
else:
attributes["bgcolor"] = None
- # Markdown inline code
- elif line[i] == "`" and (attributes["code"] or last_backtick > i):
- if text:
- # strip leading and trailing spaces and compress consecutive
- # spaces in inline code blocks
- if attributes["code"]:
- text = text.strip()
- text = re.sub(r"\s+", " ", text)
-
- substrings.append(
- FormattedString(text, attributes.copy())
- )
- text = ""
- attributes["code"] = not attributes["code"]
- i = i + 1
-
- # Markdown wrapper (emphasis/bold)
- elif line[i] in wrapper_init_chars and not attributes["code"]:
+ # Markdown wrapper (emphasis/bold/code)
+ elif line[i] in wrapper_init_chars:
for l in range(wrapper_max_len, 0, -1):
if i + l <= len(line) and line[i : i + l] in wrappers:
descriptor = wrappers[line[i : i + l]]
+ if not descriptor["validate"](attributes):
+ continue
+
if attributes[descriptor["key"]]:
- # Can only turn off if preceded by non-whitespace
- if not line[i - 1].isspace():
+ # needs_word wrappers can only be turned off if
+ # preceded by non-whitespace
+ if (i >= 1 and not line[i - 1].isspace()) \
+ or not descriptor["needs_word"]:
if text:
+ # strip leading and trailing spaces and
+ # compress consecutive spaces in inline
+ # code blocks
+ if descriptor["key"] == "code":
+ text = re.sub(r"\s+", " ", text.strip())
substrings.append(
FormattedString(text, attributes.copy()))
text = ""
@@ -218,10 +236,11 @@ class Formatted(object):
text = text + line[i : i + l]
i = i + l
- # Must have a chance of closing this, and be followed
- # by non-whitespace
+ # Must have a chance of closing this, and needs_word
+ # wrappers must be followed by non-whitespace
elif descriptor["last_index"] >= i + l and \
- not line[i + l].isspace():
+ (not line[i + l].isspace() or \
+ not descriptor["needs_word"]):
if text:
substrings.append(
FormattedString(text, attributes.copy()))
diff --git a/tests/color_test.py b/tests/color_test.py
index 9ede04b..60785c7 100644
--- a/tests/color_test.py
+++ b/tests/color_test.py
@@ -109,6 +109,16 @@ def test_input_line_markdown_various2():
assert "norm** code **code *code
norm `norm" \
== formatted.to_html()
+def test_input_line_backslash():
+ def convert(s): return Formatted.from_input_line(s).to_html()
+ assert "pre italic* ital norm" == convert("pre *italic\\* ital* norm")
+ assert "*norm* norm" == convert("\\*norm* norm")
+ assert "*ital" == convert("*\\*ital*")
+ assert "C:\\path
" == convert("`C:\\path`")
+ assert "with`tick
" == convert("`with\\`tick`")
+ assert "`un`matched" == convert("`un\\`matched")
+ assert "bold *bital norm" == convert("**bold *\\*bital*** norm")
+
def test_conversion():
formatted = Formatted.from_input_line("*Hello*")
formatted2 = Formatted.from_html(formatted.to_html())