Split up the utf encoding handling into a separate file.

This commit is contained in:
poljar (Damir Jelić) 2018-01-26 13:12:57 +01:00
parent 62adae5aa3
commit d5c1538db9
2 changed files with 94 additions and 74 deletions

91
matrix/utf.py Normal file
View file

@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import sys
# pylint: disable=redefined-builtin
from builtins import bytes, str
from collections import Mapping, Iterable
from functools import wraps
# These functions were written by Trygve Aaberge for wee-slack and are under a
# MIT License.
# More info can be found in the wee-slack repository under the commit:
# 5e1c7e593d70972afb9a55f29d13adaf145d0166, the repository can be found at:
# https://github.com/wee-slack/wee-slack
class WeechatWrapper(object):
def __init__(self, wrapped_class):
self.wrapped_class = wrapped_class
# Helper method used to encode/decode method calls.
def wrap_for_utf8(self, method):
def hooked(*args, **kwargs):
result = method(*encode_to_utf8(args), **encode_to_utf8(kwargs))
# Prevent wrapped_class from becoming unwrapped
if result == self.wrapped_class:
return self
return decode_from_utf8(result)
return hooked
# Encode and decode everything sent to/received from weechat. We use the
# unicode type internally in wee-slack, but has to send utf8 to weechat.
def __getattr__(self, attr):
orig_attr = self.wrapped_class.__getattribute__(attr)
if callable(orig_attr):
return self.wrap_for_utf8(orig_attr)
return decode_from_utf8(orig_attr)
# Ensure all lines sent to weechat specify a prefix. For lines after the
# first, we want to disable the prefix, which is done by specifying a
# space.
def prnt_date_tags(self, buffer, date, tags, message):
message = message.replace("\n", "\n \t")
return self.wrap_for_utf8(self.wrapped_class.prnt_date_tags)(
buffer,
date,
tags,
message
)
def utf8_decode(function):
"""
Decode all arguments from byte strings to unicode strings. Use this for
functions called from outside of this script, e.g. callbacks from weechat.
"""
@wraps(function)
def wrapper(*args, **kwargs):
# Don't do anything if we're python 3
if sys.hexversion >= 0x3000000:
return function(*args, **kwargs)
return function(*decode_from_utf8(args), **decode_from_utf8(kwargs))
return wrapper
def decode_from_utf8(data):
if isinstance(data, bytes):
return data.decode('utf-8')
if isinstance(data, str):
return data
elif isinstance(data, Mapping):
return type(data)(map(decode_from_utf8, data.items()))
elif isinstance(data, Iterable):
return type(data)(map(decode_from_utf8, data))
return data
def encode_to_utf8(data):
if isinstance(data, str):
return data.encode('utf-8')
if isinstance(data, bytes):
return data
elif isinstance(data, Mapping):
return type(data)(map(encode_to_utf8, data.items()))
elif isinstance(data, Iterable):
return type(data)(map(encode_to_utf8, data))
return data

View file

@ -14,17 +14,17 @@ import sys
# pylint: disable=redefined-builtin
from builtins import bytes, str
from collections import deque, Mapping, Iterable, namedtuple
from collections import deque, namedtuple
from operator import itemgetter
from enum import Enum, unique
from functools import wraps
# pylint: disable=unused-import
from typing import (List, Set, Dict, Tuple, Text, Optional, AnyStr, Deque, Any)
from http_parser.pyparser import HttpParser
from matrix import colors
from matrix import colors
from matrix.utf import WeechatWrapper, utf8_decode
# pylint: disable=import-error
import weechat
@ -42,77 +42,6 @@ CONFIG = None # type: weechat.config
GLOBAL_OPTIONS = None # type: PluginOptions
# Unicode handling
def encode_to_utf8(data):
if isinstance(data, str):
return data.encode('utf-8')
if isinstance(data, bytes):
return data
elif isinstance(data, Mapping):
return type(data)(map(encode_to_utf8, data.items()))
elif isinstance(data, Iterable):
return type(data)(map(encode_to_utf8, data))
return data
def decode_from_utf8(data):
if isinstance(data, bytes):
return data.decode('utf-8')
if isinstance(data, str):
return data
elif isinstance(data, Mapping):
return type(data)(map(decode_from_utf8, data.items()))
elif isinstance(data, Iterable):
return type(data)(map(decode_from_utf8, data))
return data
def utf8_decode(function):
"""
Decode all arguments from byte strings to unicode strings. Use this for
functions called from outside of this script, e.g. callbacks from weechat.
"""
@wraps(function)
def wrapper(*args, **kwargs):
return function(*decode_from_utf8(args), **decode_from_utf8(kwargs))
return wrapper
class WeechatWrapper(object):
def __init__(self, wrapped_class):
self.wrapped_class = wrapped_class
# Helper method used to encode/decode method calls.
def wrap_for_utf8(self, method):
def hooked(*args, **kwargs):
result = method(*encode_to_utf8(args), **encode_to_utf8(kwargs))
# Prevent wrapped_class from becoming unwrapped
if result == self.wrapped_class:
return self
return decode_from_utf8(result)
return hooked
# Encode and decode everything sent to/received from weechat. We use the
# unicode type internally in wee-slack, but has to send utf8 to weechat.
def __getattr__(self, attr):
orig_attr = self.wrapped_class.__getattribute__(attr)
if callable(orig_attr):
return self.wrap_for_utf8(orig_attr)
return decode_from_utf8(orig_attr)
# Ensure all lines sent to weechat specify a prefix. For lines after the
# first, we want to disable the prefix, which is done by specifying a
# space.
def prnt_date_tags(self, buffer, date, tags, message):
message = message.replace("\n", "\n \t")
return self.wrap_for_utf8(self.wrapped_class.prnt_date_tags)(
buffer,
date,
tags,
message
)
@unique
class MessageType(Enum):
LOGIN = 0