libchess/pkg/pgn/move/lexer.go

200 lines
4.1 KiB
Go
Raw Permalink Normal View History

2020-05-08 20:55:25 +00:00
package move
import (
"bufio"
"bytes"
"fmt"
"strings"
"code.c-base.org/gochess/libchess/pkg/board"
)
var tokenFactory = make(chan *Token, 128)
func init() {
go func() {
for {
tokenFactory <- &Token{}
}
}()
}
// TokenType defines the type of a token
type TokenType uint8
// The following TokenTypes exist
const (
TokenError TokenType = iota
TokenEOF
TokenPiece
TokenFile
TokenRank
TokenCapture
TokenSquare
TokenCheck
TokenMate
TokenCastles
)
// eof signals the end of a move
const eof = -1
// Token represents a move token.
type Token struct {
Pos int // character column of this token
Type TokenType // type (see above)
Value string // literal value
}
// Lexer implements a lexer for tokenizing PGN formatted moves.
type Lexer struct {
input *bufio.Reader // buffered io for streaming the input
tokens chan Token // output channel
start int // starting position of the current token
pos int // current scanning position
buf *Token
}
// NewLexer returns an initialized Lexer.
func NewLexer(input string) *Lexer {
l := &Lexer{
input: bufio.NewReader(strings.NewReader(input)),
start: 1,
pos: 1,
tokens: make(chan Token, 1),
}
go l.scan()
return l
}
// NextToken returns the next token from the input string.
func (l *Lexer) NextToken() Token {
return <-l.tokens
}
// emit emits the given token to the output channel.
func (l *Lexer) emit(t Token) {
// When encountering a token of type TokenFile *[a-h]*, it needs to be buffered and compared to
// the next token, which may be of type TokenRank *[1-8]* combining them into a token of type
// TokenSquare.
if l.buf == nil {
// check for TokenFile and buffer it
if t.Type == TokenFile {
l.buf = &t
} else {
l.tokens <- t
l.start = l.pos
}
} else {
// grab the last token off the buffer
prev := l.buf
l.buf = nil
// TokenFile followed by TokenRank combines to TokenSquare
if t.Type == TokenRank {
strSq := fmt.Sprintf("%s%s", prev.Value, t.Value)
_, ok := board.StrToSquareMap[strSq]
if !ok {
// technically this should not be reached, but I'm handling it anyways, just in case
l.tokens <- *prev
l.tokens <- t
} else {
// emit TokenSquare instead of individual TokenFile & TokenRank
l.tokens <- Token{
Pos: l.start,
Type: TokenSquare,
Value: strSq,
}
}
}
}
}
// next reads the next rune from the buffered input stream
func (l *Lexer) next() rune {
r, _, err := l.input.ReadRune()
if err != nil {
return eof
}
l.pos++
return r
}
func (l *Lexer) undo() {
l.input.UnreadRune()
l.pos--
}
// newToken is a helper for easily initializing Tokens with the correct values.
func (l *Lexer) newToken(tokType TokenType, v string) Token {
t := <-tokenFactory
t.Pos = l.start
t.Type = tokType
t.Value = v
return *t
}
// scan scans for tokens and emits them to the output channel until the end of the input stream is
// reached.
func (l *Lexer) scan() {
defer close(l.tokens)
for {
r := l.next()
switch r {
case eof:
l.emit(l.newToken(TokenEOF, "eof"))
return
case 'O', '0':
l.undo()
m := lexCastles(l)
if m == "" {
l.emit(l.newToken(TokenError, m))
} else {
l.emit(l.newToken(TokenCastles, m))
}
case 'K', 'Q', 'B', 'N', 'R':
l.emit(l.newToken(TokenPiece, string(r)))
case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h':
l.emit(l.newToken(TokenFile, string(r)))
case '1', '2', '3', '4', '5', '6', '7', '8':
l.emit(l.newToken(TokenRank, string(r)))
case '+':
l.emit(l.newToken(TokenCheck, string(r)))
case '#':
l.emit(l.newToken(TokenMate, string(r)))
case 'x':
l.emit(l.newToken(TokenCapture, string(r)))
case '=':
// noop
default:
l.emit(l.newToken(TokenError, string(r)))
return
}
}
}
func lexCastles(l *Lexer) string {
var (
buf = make([]byte, 0, 5)
out = bytes.NewBuffer(buf)
c = 0
)
for {
r := l.next()
switch {
case c == 5:
m := out.String()
switch m {
case "O-O", "0-0":
return "O-O"
case "O-O-O", "0-0-0":
return "O-O-O"
default:
return ""
}
case r == 'O', r == '-':
out.WriteRune(r)
}
c++
}
}