Better half-markdown (#202)
Better half-markdown. This fixes some issues with the current semi-markdown-parser to make life easier until a full markdown parser is implemented. Changes: - A * that would normally start italics but isn't matched by a closing *, is now left alone. - A ` that would normally start a code block but isn't matched by a closing `, is now left alone. - Backslashes should work as expected. - Support for **bold** and the alternative _italic_ style.
This commit is contained in:
parent
0ce5b65835
commit
170c5811a3
2 changed files with 132 additions and 79 deletions
169
matrix/colors.py
169
matrix/colors.py
|
@ -88,66 +88,63 @@ class Formatted(object):
|
||||||
substrings = [] # type: List[FormattedString]
|
substrings = [] # type: List[FormattedString]
|
||||||
attributes = DEFAULT_ATTRIBUTES.copy()
|
attributes = DEFAULT_ATTRIBUTES.copy()
|
||||||
|
|
||||||
|
def last_match_index(regex, subject, offset_in_match):
|
||||||
|
matches = list(re.finditer(regex, subject))
|
||||||
|
return matches[-1].span()[0] + offset_in_match if matches else -1
|
||||||
|
|
||||||
|
wrappers = {
|
||||||
|
"**": {
|
||||||
|
"key": "bold",
|
||||||
|
"last_index": last_match_index(r"\S\*\*", line, 1),
|
||||||
|
},
|
||||||
|
"*": {
|
||||||
|
"key": "italic",
|
||||||
|
"last_index": last_match_index(r"\S\*($|[^*])", line, 1),
|
||||||
|
},
|
||||||
|
"_": {
|
||||||
|
"key": "italic",
|
||||||
|
"last_index": last_match_index(r"\S_", line, 1),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
wrapper_init_chars = set(k[0] for k in wrappers.keys())
|
||||||
|
wrapper_max_len = max(len(k) for k in wrappers.keys())
|
||||||
|
|
||||||
|
irc_toggles = {
|
||||||
|
"\x02": "bold",
|
||||||
|
"\x1D": "italic",
|
||||||
|
"\x1F": "underline",
|
||||||
|
}
|
||||||
|
|
||||||
|
last_backtick = line.rfind("`")
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(line):
|
while i < len(line):
|
||||||
# Bold
|
# Markdown escape
|
||||||
if line[i] == "\x02" and not attributes["code"]:
|
# NOTE: IRC-native formatting characters are not escaped
|
||||||
|
if i + 1 < len(line) and line[i] == "\\" \
|
||||||
|
and line[i + 1] not in "\x02\x03\x0F\x1D\x1F":
|
||||||
|
text += line[i + 1]
|
||||||
|
i = i + 2
|
||||||
|
|
||||||
|
# IRC bold/italic/underline
|
||||||
|
elif line[i] in irc_toggles and not attributes["code"]:
|
||||||
if text:
|
if text:
|
||||||
substrings.append(FormattedString(text, attributes.copy()))
|
substrings.append(FormattedString(text, attributes.copy()))
|
||||||
text = ""
|
text = ""
|
||||||
attributes["bold"] = not attributes["bold"]
|
key = irc_toggles[line[i]]
|
||||||
|
attributes[key] = not attributes[key]
|
||||||
i = i + 1
|
i = i + 1
|
||||||
|
|
||||||
# Markdown inline code
|
# IRC reset
|
||||||
elif line[i] == "`":
|
elif line[i] == "\x0F" and not attributes["code"]:
|
||||||
if text:
|
|
||||||
# strip leading and trailing spaces and compress consecutive
|
|
||||||
# spaces in inline code blocks
|
|
||||||
if attributes["code"]:
|
|
||||||
text = text.strip()
|
|
||||||
text = re.sub(r"\s+", " ", text)
|
|
||||||
|
|
||||||
substrings.append(
|
|
||||||
FormattedString(text, attributes.copy())
|
|
||||||
)
|
|
||||||
text = ""
|
|
||||||
attributes["code"] = not attributes["code"]
|
|
||||||
i = i + 1
|
|
||||||
|
|
||||||
# Markdown emphasis
|
|
||||||
elif line[i] == "*" and not attributes["code"]:
|
|
||||||
if attributes["italic"] and not line[i - 1].isspace():
|
|
||||||
if text:
|
|
||||||
substrings.append(
|
|
||||||
FormattedString(text, attributes.copy())
|
|
||||||
)
|
|
||||||
text = ""
|
|
||||||
attributes["italic"] = not attributes["italic"]
|
|
||||||
i = i + 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif attributes["italic"] and line[i - 1].isspace():
|
|
||||||
text = text + line[i]
|
|
||||||
i = i + 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif i + 1 < len(line) and line[i + 1].isspace():
|
|
||||||
text = text + line[i]
|
|
||||||
i = i + 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif i == len(line) - 1:
|
|
||||||
text = text + line[i]
|
|
||||||
i = i + 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
substrings.append(FormattedString(text, attributes.copy()))
|
substrings.append(FormattedString(text, attributes.copy()))
|
||||||
text = ""
|
text = ""
|
||||||
attributes["italic"] = not attributes["italic"]
|
# Reset all the attributes
|
||||||
|
attributes = DEFAULT_ATTRIBUTES.copy()
|
||||||
i = i + 1
|
i = i + 1
|
||||||
|
|
||||||
# Color
|
# IRC color
|
||||||
elif line[i] == "\x03" and not attributes["code"]:
|
elif line[i] == "\x03" and not attributes["code"]:
|
||||||
if text:
|
if text:
|
||||||
substrings.append(FormattedString(text, attributes.copy()))
|
substrings.append(FormattedString(text, attributes.copy()))
|
||||||
|
@ -185,37 +182,73 @@ class Formatted(object):
|
||||||
attributes["bgcolor"] = color_line_to_weechat(color_string)
|
attributes["bgcolor"] = color_line_to_weechat(color_string)
|
||||||
else:
|
else:
|
||||||
attributes["bgcolor"] = None
|
attributes["bgcolor"] = None
|
||||||
# Reset
|
|
||||||
elif line[i] == "\x0F" and not attributes["code"]:
|
# Markdown inline code
|
||||||
|
elif line[i] == "`" and (attributes["code"] or last_backtick > i):
|
||||||
if text:
|
if text:
|
||||||
substrings.append(FormattedString(text, attributes.copy()))
|
# strip leading and trailing spaces and compress consecutive
|
||||||
|
# spaces in inline code blocks
|
||||||
|
if attributes["code"]:
|
||||||
|
text = text.strip()
|
||||||
|
text = re.sub(r"\s+", " ", text)
|
||||||
|
|
||||||
|
substrings.append(
|
||||||
|
FormattedString(text, attributes.copy())
|
||||||
|
)
|
||||||
text = ""
|
text = ""
|
||||||
# Reset all the attributes
|
attributes["code"] = not attributes["code"]
|
||||||
attributes = DEFAULT_ATTRIBUTES.copy()
|
|
||||||
i = i + 1
|
i = i + 1
|
||||||
|
|
||||||
# Italic
|
# Markdown wrapper (emphasis/bold)
|
||||||
elif line[i] == "\x1D" and not attributes["code"]:
|
elif line[i] in wrapper_init_chars and not attributes["code"]:
|
||||||
if text:
|
for l in range(wrapper_max_len, 0, -1):
|
||||||
substrings.append(FormattedString(text, attributes.copy()))
|
if i + l <= len(line) and line[i : i + l] in wrappers:
|
||||||
text = ""
|
descriptor = wrappers[line[i : i + l]]
|
||||||
attributes["italic"] = not attributes["italic"]
|
|
||||||
i = i + 1
|
|
||||||
|
|
||||||
# Underline
|
if attributes[descriptor["key"]]:
|
||||||
elif line[i] == "\x1F" and not attributes["code"]:
|
# Can only turn off if preceded by non-whitespace
|
||||||
if text:
|
if not line[i - 1].isspace():
|
||||||
substrings.append(FormattedString(text, attributes.copy()))
|
if text:
|
||||||
text = ""
|
substrings.append(
|
||||||
attributes["underline"] = not attributes["underline"]
|
FormattedString(text, attributes.copy()))
|
||||||
i = i + 1
|
text = ""
|
||||||
|
attributes[descriptor["key"]] = False
|
||||||
|
i = i + l
|
||||||
|
else:
|
||||||
|
text = text + line[i : i + l]
|
||||||
|
i = i + l
|
||||||
|
|
||||||
|
# Must have a chance of closing this, and be followed
|
||||||
|
# by non-whitespace
|
||||||
|
elif descriptor["last_index"] >= i + l and \
|
||||||
|
not line[i + l].isspace():
|
||||||
|
if text:
|
||||||
|
substrings.append(
|
||||||
|
FormattedString(text, attributes.copy()))
|
||||||
|
text = ""
|
||||||
|
attributes[descriptor["key"]] = True
|
||||||
|
i = i + l
|
||||||
|
|
||||||
|
else:
|
||||||
|
text = text + line[i : i + l]
|
||||||
|
i = i + l
|
||||||
|
|
||||||
|
break
|
||||||
|
|
||||||
|
else:
|
||||||
|
# No wrapper matched here (NOTE: cannot happen if "*" and
|
||||||
|
# "_" are both in wrappers, but for completeness' sake)
|
||||||
|
text = text + line[i]
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
# Normal text
|
# Normal text
|
||||||
else:
|
else:
|
||||||
text = text + line[i]
|
text = text + line[i]
|
||||||
i = i + 1
|
i = i + 1
|
||||||
|
|
||||||
substrings.append(FormattedString(text, attributes))
|
if text:
|
||||||
|
substrings.append(FormattedString(text, attributes))
|
||||||
|
|
||||||
return cls(substrings)
|
return cls(substrings)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -5,7 +5,7 @@ from __future__ import unicode_literals
|
||||||
import webcolors
|
import webcolors
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from hypothesis import given
|
from hypothesis import given
|
||||||
from hypothesis.strategies import sampled_from, text
|
from hypothesis.strategies import sampled_from, text, characters
|
||||||
|
|
||||||
from matrix.colors import (G, Formatted, FormattedString,
|
from matrix.colors import (G, Formatted, FormattedString,
|
||||||
color_html_to_weechat, color_weechat_to_html)
|
color_html_to_weechat, color_weechat_to_html)
|
||||||
|
@ -58,15 +58,16 @@ def test_normalize_spaces_in_inline_code():
|
||||||
assert formatted.to_weechat() == valid_result
|
assert formatted.to_weechat() == valid_result
|
||||||
|
|
||||||
|
|
||||||
# FIXME: this case doesn't and can't work yet (until a proper Markdown parser
|
@given(
|
||||||
# is integrated)
|
text(alphabet=characters(min_codepoint=32,
|
||||||
# @given(text().map(lambda s: '*' + s)
|
blacklist_characters="*_"))
|
||||||
# def test_unpaired_prefix_asterisk_without_space_is_literal(text):
|
.map(lambda s: '*' + s))
|
||||||
# """An unpaired asterisk at the beginning of the line, without a space
|
def test_unpaired_prefix_asterisk_without_space_is_literal(text):
|
||||||
# after it, is considered literal.
|
"""An unpaired asterisk at the beginning of the line, without a space
|
||||||
# """
|
after it, is considered literal.
|
||||||
# formatted = Formatted.from_input_line(text)
|
"""
|
||||||
# assert text == formatted.to_weechat()
|
formatted = Formatted.from_input_line(text)
|
||||||
|
assert text.strip() == formatted.to_weechat()
|
||||||
|
|
||||||
|
|
||||||
def test_input_line_color():
|
def test_input_line_color():
|
||||||
|
@ -79,7 +80,7 @@ def test_input_line_bold():
|
||||||
assert "\x1b[01mHello\x1b[021m" == formatted.to_weechat()
|
assert "\x1b[01mHello\x1b[021m" == formatted.to_weechat()
|
||||||
assert "<strong>Hello</strong>" == formatted.to_html()
|
assert "<strong>Hello</strong>" == formatted.to_html()
|
||||||
|
|
||||||
def test_input_line_bold():
|
def test_input_line_underline():
|
||||||
formatted = Formatted.from_input_line("\x1FHello")
|
formatted = Formatted.from_input_line("\x1FHello")
|
||||||
assert "\x1b[04mHello\x1b[024m" == formatted.to_weechat()
|
assert "\x1b[04mHello\x1b[024m" == formatted.to_weechat()
|
||||||
assert "<u>Hello</u>" == formatted.to_html()
|
assert "<u>Hello</u>" == formatted.to_html()
|
||||||
|
@ -89,6 +90,25 @@ def test_input_line_markdown_emph():
|
||||||
assert "\x1b[03mHello\x1b[023m" == formatted.to_weechat()
|
assert "\x1b[03mHello\x1b[023m" == formatted.to_weechat()
|
||||||
assert "<em>Hello</em>" == formatted.to_html()
|
assert "<em>Hello</em>" == formatted.to_html()
|
||||||
|
|
||||||
|
def test_input_line_markdown_bold():
|
||||||
|
formatted = Formatted.from_input_line("**Hello**")
|
||||||
|
assert "\x1b[01mHello\x1b[021m" == formatted.to_weechat()
|
||||||
|
assert "<strong>Hello</strong>" == formatted.to_html()
|
||||||
|
|
||||||
|
def test_input_line_markdown_various():
|
||||||
|
inp = "**bold* bold *bital etc* bold **bold** * *italic*"
|
||||||
|
formatted = Formatted.from_input_line(inp)
|
||||||
|
assert "<strong>bold* bold </strong>" \
|
||||||
|
"<em><strong>bital etc</strong></em><strong> bold **bold</strong>" \
|
||||||
|
" * <em>italic</em>" \
|
||||||
|
== formatted.to_html()
|
||||||
|
|
||||||
|
def test_input_line_markdown_various2():
|
||||||
|
inp = "norm** `code **code *code` norm `norm"
|
||||||
|
formatted = Formatted.from_input_line(inp)
|
||||||
|
assert "norm** <code>code **code *code</code> norm `norm" \
|
||||||
|
== formatted.to_html()
|
||||||
|
|
||||||
def test_conversion():
|
def test_conversion():
|
||||||
formatted = Formatted.from_input_line("*Hello*")
|
formatted = Formatted.from_input_line("*Hello*")
|
||||||
formatted2 = Formatted.from_html(formatted.to_html())
|
formatted2 = Formatted.from_html(formatted.to_html())
|
||||||
|
|
Loading…
Add table
Reference in a new issue