333 lines
5.6 KiB
Go
333 lines
5.6 KiB
Go
package pgn
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"fmt"
|
|
)
|
|
|
|
// EOF signals end of input
|
|
const EOF = -1
|
|
|
|
// TokenType defines the type of a token
|
|
type TokenType uint64
|
|
|
|
// The following TokenTypes exist:
|
|
const (
|
|
TokenInvalid TokenType = iota
|
|
TokenEOF
|
|
TokenDiv
|
|
TokenNewline
|
|
TokenWhitespace
|
|
TokenComment
|
|
TokenString
|
|
|
|
TokenBracketLeft
|
|
TokenBracketRight
|
|
TokenParenthesisLeft
|
|
TokenParenthesisRight
|
|
TokenAngleLeft
|
|
TokenAngleRight
|
|
TokenSymbol
|
|
|
|
TokenEscapeMechanism
|
|
)
|
|
|
|
var tokenName = map[TokenType]string{
|
|
TokenInvalid: "INVALID",
|
|
TokenEOF: "EOF",
|
|
TokenDiv: "Div",
|
|
TokenNewline: "Newline",
|
|
TokenWhitespace: "Whitespace",
|
|
TokenComment: "Comment",
|
|
TokenString: "String",
|
|
TokenBracketLeft: "BracketLeft",
|
|
TokenBracketRight: "BracketRight",
|
|
TokenParenthesisLeft: "ParenthesisLeft",
|
|
TokenParenthesisRight: "ParenthesisRight",
|
|
TokenAngleLeft: "AngleLeft",
|
|
TokenAngleRight: "AngleRight",
|
|
TokenSymbol: "Symbol",
|
|
}
|
|
|
|
// Token represents a PGN token.
|
|
type Token struct {
|
|
Line int
|
|
Col int
|
|
Type TokenType
|
|
Value string
|
|
}
|
|
|
|
func (t Token) String() string {
|
|
return fmt.Sprintf(
|
|
"<Token%s(Line: %d, Col: %d, Value: %q)>",
|
|
tokenName[t.Type],
|
|
t.Line,
|
|
t.Col,
|
|
t.Value,
|
|
)
|
|
}
|
|
|
|
// LexFn defines the signature of a lexer function.
|
|
type LexFn func(*Lexer) LexFn
|
|
|
|
// Lexer implements a PGN tokenizer.
|
|
type Lexer struct {
|
|
input *bufio.Reader
|
|
output chan *Token
|
|
err chan error
|
|
line int
|
|
start int
|
|
pos int
|
|
}
|
|
|
|
// NewLexer returns an initialized Lexer.
|
|
func NewLexer(input *bufio.Reader) *Lexer {
|
|
l := &Lexer{
|
|
input: input,
|
|
output: make(chan *Token, 1),
|
|
err: make(chan error, 1),
|
|
line: 1,
|
|
start: 1,
|
|
pos: 1,
|
|
}
|
|
go l.run()
|
|
return l
|
|
}
|
|
|
|
func (l *Lexer) run() *Lexer {
|
|
go func() {
|
|
defer close(l.output)
|
|
defer close(l.err)
|
|
for fn := lexMain; fn != nil; {
|
|
fn = fn(l)
|
|
}
|
|
}()
|
|
return l
|
|
}
|
|
|
|
// Next returns the next Token from the input stream or EOF once the input stream has ended.
|
|
func (l *Lexer) Next() (*Token, error) {
|
|
select {
|
|
case err := <-l.err:
|
|
return nil, err
|
|
case t := <-l.output:
|
|
return t, nil
|
|
}
|
|
}
|
|
|
|
// All returns all parsed tokens as []*Token.
|
|
func (l *Lexer) All() ([]*Token, error) {
|
|
out := []*Token{}
|
|
for {
|
|
t, err := l.Next()
|
|
if err != nil {
|
|
return out, err
|
|
}
|
|
if t == nil || t.Type == TokenEOF {
|
|
return out, nil
|
|
}
|
|
out = append(out, t)
|
|
}
|
|
}
|
|
|
|
func (l *Lexer) next() rune {
|
|
r, _, err := l.input.ReadRune()
|
|
if err != nil {
|
|
return EOF
|
|
}
|
|
l.pos++
|
|
return r
|
|
}
|
|
|
|
func (l *Lexer) undo() {
|
|
l.input.UnreadRune()
|
|
l.pos--
|
|
}
|
|
|
|
func (l *Lexer) peek() rune {
|
|
defer l.undo()
|
|
return l.next()
|
|
}
|
|
|
|
func (l *Lexer) newToken(t TokenType, v string) *Token {
|
|
return &Token{
|
|
Line: l.line,
|
|
Col: l.start,
|
|
Type: t,
|
|
Value: v,
|
|
}
|
|
}
|
|
|
|
func (l *Lexer) emit(t *Token) {
|
|
l.output <- t
|
|
l.start = l.pos
|
|
}
|
|
|
|
func (l *Lexer) emitUnexpected(r rune) LexFn {
|
|
l.err <- fmt.Errorf(
|
|
"unexpected character in line %d at col %d: %v",
|
|
l.line,
|
|
l.pos,
|
|
r,
|
|
)
|
|
return nil
|
|
}
|
|
|
|
////////////////
|
|
//// LEXERS ////
|
|
////////////////
|
|
|
|
func lexMain(l *Lexer) LexFn {
|
|
for {
|
|
r := l.next()
|
|
switch r {
|
|
case EOF:
|
|
l.emit(l.newToken(TokenEOF, "EOF"))
|
|
return nil
|
|
case '\n':
|
|
return lexNewline
|
|
case ' ':
|
|
return lexWhitespace
|
|
case '%':
|
|
if l.pos == 2 {
|
|
return lexEscape
|
|
}
|
|
return l.emitUnexpected(r)
|
|
case ';':
|
|
return lexCommentUntilNewline
|
|
case '{':
|
|
return lexComment
|
|
case '[':
|
|
l.emit(l.newToken(TokenBracketLeft, "["))
|
|
case ']':
|
|
l.emit(l.newToken(TokenBracketRight, "]"))
|
|
case '"':
|
|
return lexString
|
|
default:
|
|
l.undo()
|
|
return lexSymbol
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexNewline(l *Lexer) LexFn {
|
|
out := bytes.NewBuffer(make([]byte, 0, 255))
|
|
out.WriteRune('\n')
|
|
for {
|
|
r := l.next()
|
|
switch r {
|
|
case '\n':
|
|
out.WriteRune('\n')
|
|
default:
|
|
l.undo()
|
|
l.emit(l.newToken(TokenNewline, out.String()))
|
|
l.line += out.Len()
|
|
l.start = 1
|
|
l.pos = 1
|
|
return lexMain
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexWhitespace(l *Lexer) LexFn {
|
|
out := bytes.NewBuffer(make([]byte, 0, 255))
|
|
out.WriteRune(' ')
|
|
for {
|
|
r := l.next()
|
|
switch r {
|
|
case ' ':
|
|
out.WriteRune(' ')
|
|
default:
|
|
l.undo()
|
|
l.emit(l.newToken(TokenWhitespace, out.String()))
|
|
return lexMain
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexEscape(l *Lexer) LexFn {
|
|
for {
|
|
r := l.next()
|
|
switch r {
|
|
case EOF, '\n':
|
|
return lexMain
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexCommentUntilNewline(l *Lexer) LexFn {
|
|
out := bytes.NewBuffer(make([]byte, 0, 8192))
|
|
for {
|
|
r := l.next()
|
|
switch r {
|
|
case EOF, '\n':
|
|
if out.Len() > 0 {
|
|
l.emit(l.newToken(TokenComment, out.String()))
|
|
}
|
|
return lexMain
|
|
default:
|
|
_, err := out.WriteRune(r)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexComment(l *Lexer) LexFn {
|
|
out := bytes.NewBuffer(make([]byte, 0, 8192))
|
|
for {
|
|
r := l.next()
|
|
switch r {
|
|
case EOF:
|
|
l.emit(l.newToken(TokenComment, out.String()))
|
|
return lexMain
|
|
case '\\':
|
|
out.WriteRune(l.next())
|
|
case '}':
|
|
l.emit(l.newToken(TokenComment, out.String()))
|
|
return lexMain
|
|
default:
|
|
out.WriteRune(r)
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexString(l *Lexer) LexFn {
|
|
out := bytes.NewBuffer(make([]byte, 0, 4096))
|
|
for {
|
|
r := l.next()
|
|
switch r {
|
|
case EOF:
|
|
return l.emitUnexpected(r)
|
|
case '\\':
|
|
out.WriteRune(l.next())
|
|
case '"':
|
|
l.emit(l.newToken(TokenString, out.String()))
|
|
return lexMain
|
|
default:
|
|
out.WriteRune(r)
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexSymbol(l *Lexer) LexFn {
|
|
out := bytes.NewBuffer(make([]byte, 0, 255))
|
|
for {
|
|
r := l.next()
|
|
switch r {
|
|
case EOF:
|
|
l.emit(l.newToken(TokenSymbol, out.String()))
|
|
l.undo()
|
|
return lexMain
|
|
case '\n', ' ', '"':
|
|
l.undo()
|
|
l.emit(l.newToken(TokenSymbol, out.String()))
|
|
return lexMain
|
|
default:
|
|
out.WriteRune(r)
|
|
}
|
|
}
|
|
}
|