Fix handling of backslashes in relation to code blocks (#203)
Backslashes are now correctly preserved inside code blocks while still allowing the user to escape a backtick. The handling of backticks and bold/italic wrappers was unified so that they share the same escaping code. Backslashes only escape Markdown wrapper characters (*, _, `). If they are encountered before another character, they are considered literal.
This commit is contained in:
parent
3cf5cc077e
commit
a5e6434c31
2 changed files with 60 additions and 31 deletions
|
@ -88,23 +88,44 @@ class Formatted(object):
|
||||||
substrings = [] # type: List[FormattedString]
|
substrings = [] # type: List[FormattedString]
|
||||||
attributes = DEFAULT_ATTRIBUTES.copy()
|
attributes = DEFAULT_ATTRIBUTES.copy()
|
||||||
|
|
||||||
def last_match_index(regex, subject, offset_in_match):
|
# Escaped things are not markdown delimiters, so substitute them away
|
||||||
matches = list(re.finditer(regex, subject))
|
# when (quickly) looking for the last delimiters in the line. Note that
|
||||||
|
# the replacement needs to be the same length as the original for the
|
||||||
|
# indices to be correct.
|
||||||
|
escaped_masked = re.sub(r"\\[\\*_`]", "aa", line)
|
||||||
|
|
||||||
|
def last_match_index(regex, offset_in_match):
|
||||||
|
matches = list(re.finditer(regex, escaped_masked))
|
||||||
return matches[-1].span()[0] + offset_in_match if matches else -1
|
return matches[-1].span()[0] + offset_in_match if matches else -1
|
||||||
|
|
||||||
|
# 'needs_word': whether the wrapper must surround words, for example
|
||||||
|
# '*italic*' and not '* not-italic *'.
|
||||||
|
# 'validate': whether it can occur within the current attributes
|
||||||
wrappers = {
|
wrappers = {
|
||||||
"**": {
|
"**": {
|
||||||
"key": "bold",
|
"key": "bold",
|
||||||
"last_index": last_match_index(r"\S\*\*", line, 1),
|
"last_index": last_match_index(r"\S\*\*", 1),
|
||||||
|
"needs_word": True,
|
||||||
|
"validate": lambda attrs: not attrs["code"],
|
||||||
},
|
},
|
||||||
"*": {
|
"*": {
|
||||||
"key": "italic",
|
"key": "italic",
|
||||||
"last_index": last_match_index(r"\S\*($|[^*])", line, 1),
|
"last_index": last_match_index(r"\S\*($|[^*])", 1),
|
||||||
|
"needs_word": True,
|
||||||
|
"validate": lambda attrs: not attrs["code"],
|
||||||
},
|
},
|
||||||
"_": {
|
"_": {
|
||||||
"key": "italic",
|
"key": "italic",
|
||||||
"last_index": last_match_index(r"\S_", line, 1),
|
"last_index": last_match_index(r"\S_", 1),
|
||||||
|
"needs_word": True,
|
||||||
|
"validate": lambda attrs: not attrs["code"],
|
||||||
},
|
},
|
||||||
|
"`": {
|
||||||
|
"key": "code",
|
||||||
|
"last_index": last_match_index(r"`", 0),
|
||||||
|
"needs_word": False,
|
||||||
|
"validate": lambda attrs: True,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
wrapper_init_chars = set(k[0] for k in wrappers.keys())
|
wrapper_init_chars = set(k[0] for k in wrappers.keys())
|
||||||
wrapper_max_len = max(len(k) for k in wrappers.keys())
|
wrapper_max_len = max(len(k) for k in wrappers.keys())
|
||||||
|
@ -115,14 +136,17 @@ class Formatted(object):
|
||||||
"\x1F": "underline",
|
"\x1F": "underline",
|
||||||
}
|
}
|
||||||
|
|
||||||
last_backtick = line.rfind("`")
|
# Characters that consume a prefixed backslash
|
||||||
|
escapable_chars = wrapper_init_chars.copy()
|
||||||
|
escapable_chars.add("\\")
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(line):
|
while i < len(line):
|
||||||
# Markdown escape
|
# Markdown escape
|
||||||
# NOTE: IRC-native formatting characters are not escaped
|
|
||||||
if i + 1 < len(line) and line[i] == "\\" \
|
if i + 1 < len(line) and line[i] == "\\" \
|
||||||
and line[i + 1] not in "\x02\x03\x0F\x1D\x1F":
|
and (line[i + 1] in escapable_chars
|
||||||
|
if not attributes["code"]
|
||||||
|
else line[i + 1] == "`"):
|
||||||
text += line[i + 1]
|
text += line[i + 1]
|
||||||
i = i + 2
|
i = i + 2
|
||||||
|
|
||||||
|
@ -183,32 +207,26 @@ class Formatted(object):
|
||||||
else:
|
else:
|
||||||
attributes["bgcolor"] = None
|
attributes["bgcolor"] = None
|
||||||
|
|
||||||
# Markdown inline code
|
# Markdown wrapper (emphasis/bold/code)
|
||||||
elif line[i] == "`" and (attributes["code"] or last_backtick > i):
|
elif line[i] in wrapper_init_chars:
|
||||||
if text:
|
|
||||||
# strip leading and trailing spaces and compress consecutive
|
|
||||||
# spaces in inline code blocks
|
|
||||||
if attributes["code"]:
|
|
||||||
text = text.strip()
|
|
||||||
text = re.sub(r"\s+", " ", text)
|
|
||||||
|
|
||||||
substrings.append(
|
|
||||||
FormattedString(text, attributes.copy())
|
|
||||||
)
|
|
||||||
text = ""
|
|
||||||
attributes["code"] = not attributes["code"]
|
|
||||||
i = i + 1
|
|
||||||
|
|
||||||
# Markdown wrapper (emphasis/bold)
|
|
||||||
elif line[i] in wrapper_init_chars and not attributes["code"]:
|
|
||||||
for l in range(wrapper_max_len, 0, -1):
|
for l in range(wrapper_max_len, 0, -1):
|
||||||
if i + l <= len(line) and line[i : i + l] in wrappers:
|
if i + l <= len(line) and line[i : i + l] in wrappers:
|
||||||
descriptor = wrappers[line[i : i + l]]
|
descriptor = wrappers[line[i : i + l]]
|
||||||
|
|
||||||
|
if not descriptor["validate"](attributes):
|
||||||
|
continue
|
||||||
|
|
||||||
if attributes[descriptor["key"]]:
|
if attributes[descriptor["key"]]:
|
||||||
# Can only turn off if preceded by non-whitespace
|
# needs_word wrappers can only be turned off if
|
||||||
if not line[i - 1].isspace():
|
# preceded by non-whitespace
|
||||||
|
if (i >= 1 and not line[i - 1].isspace()) \
|
||||||
|
or not descriptor["needs_word"]:
|
||||||
if text:
|
if text:
|
||||||
|
# strip leading and trailing spaces and
|
||||||
|
# compress consecutive spaces in inline
|
||||||
|
# code blocks
|
||||||
|
if descriptor["key"] == "code":
|
||||||
|
text = re.sub(r"\s+", " ", text.strip())
|
||||||
substrings.append(
|
substrings.append(
|
||||||
FormattedString(text, attributes.copy()))
|
FormattedString(text, attributes.copy()))
|
||||||
text = ""
|
text = ""
|
||||||
|
@ -218,10 +236,11 @@ class Formatted(object):
|
||||||
text = text + line[i : i + l]
|
text = text + line[i : i + l]
|
||||||
i = i + l
|
i = i + l
|
||||||
|
|
||||||
# Must have a chance of closing this, and be followed
|
# Must have a chance of closing this, and needs_word
|
||||||
# by non-whitespace
|
# wrappers must be followed by non-whitespace
|
||||||
elif descriptor["last_index"] >= i + l and \
|
elif descriptor["last_index"] >= i + l and \
|
||||||
not line[i + l].isspace():
|
(not line[i + l].isspace() or \
|
||||||
|
not descriptor["needs_word"]):
|
||||||
if text:
|
if text:
|
||||||
substrings.append(
|
substrings.append(
|
||||||
FormattedString(text, attributes.copy()))
|
FormattedString(text, attributes.copy()))
|
||||||
|
|
|
@ -109,6 +109,16 @@ def test_input_line_markdown_various2():
|
||||||
assert "norm** <code>code **code *code</code> norm `norm" \
|
assert "norm** <code>code **code *code</code> norm `norm" \
|
||||||
== formatted.to_html()
|
== formatted.to_html()
|
||||||
|
|
||||||
|
def test_input_line_backslash():
|
||||||
|
def convert(s): return Formatted.from_input_line(s).to_html()
|
||||||
|
assert "pre <em>italic* ital</em> norm" == convert("pre *italic\\* ital* norm")
|
||||||
|
assert "*norm* norm" == convert("\\*norm* norm")
|
||||||
|
assert "<em>*ital</em>" == convert("*\\*ital*")
|
||||||
|
assert "<code>C:\\path</code>" == convert("`C:\\path`")
|
||||||
|
assert "<code>with`tick</code>" == convert("`with\\`tick`")
|
||||||
|
assert "`un`matched" == convert("`un\\`matched")
|
||||||
|
assert "<strong>bold </strong><em><strong>*bital</strong></em> norm" == convert("**bold *\\*bital*** norm")
|
||||||
|
|
||||||
def test_conversion():
|
def test_conversion():
|
||||||
formatted = Formatted.from_input_line("*Hello*")
|
formatted = Formatted.from_input_line("*Hello*")
|
||||||
formatted2 = Formatted.from_html(formatted.to_html())
|
formatted2 = Formatted.from_html(formatted.to_html())
|
||||||
|
|
Loading…
Reference in a new issue