200 lines
4.1 KiB
Go
200 lines
4.1 KiB
Go
|
package move
|
||
|
|
||
|
import (
|
||
|
"bufio"
|
||
|
"bytes"
|
||
|
"fmt"
|
||
|
"strings"
|
||
|
|
||
|
"code.c-base.org/gochess/libchess/pkg/board"
|
||
|
)
|
||
|
|
||
|
var tokenFactory = make(chan *Token, 128)
|
||
|
|
||
|
func init() {
|
||
|
go func() {
|
||
|
for {
|
||
|
tokenFactory <- &Token{}
|
||
|
}
|
||
|
}()
|
||
|
}
|
||
|
|
||
|
// TokenType defines the type of a token
|
||
|
type TokenType uint8
|
||
|
|
||
|
// The following TokenTypes exist
|
||
|
const (
|
||
|
TokenError TokenType = iota
|
||
|
TokenEOF
|
||
|
TokenPiece
|
||
|
TokenFile
|
||
|
TokenRank
|
||
|
TokenCapture
|
||
|
TokenSquare
|
||
|
TokenCheck
|
||
|
TokenMate
|
||
|
TokenCastles
|
||
|
)
|
||
|
|
||
|
// eof signals the end of a move
|
||
|
const eof = -1
|
||
|
|
||
|
// Token represents a move token.
|
||
|
type Token struct {
|
||
|
Pos int // character column of this token
|
||
|
Type TokenType // type (see above)
|
||
|
Value string // literal value
|
||
|
}
|
||
|
|
||
|
// Lexer implements a lexer for tokenizing PGN formatted moves.
|
||
|
type Lexer struct {
|
||
|
input *bufio.Reader // buffered io for streaming the input
|
||
|
tokens chan Token // output channel
|
||
|
start int // starting position of the current token
|
||
|
pos int // current scanning position
|
||
|
buf *Token
|
||
|
}
|
||
|
|
||
|
// NewLexer returns an initialized Lexer.
|
||
|
func NewLexer(input string) *Lexer {
|
||
|
l := &Lexer{
|
||
|
input: bufio.NewReader(strings.NewReader(input)),
|
||
|
start: 1,
|
||
|
pos: 1,
|
||
|
tokens: make(chan Token, 1),
|
||
|
}
|
||
|
go l.scan()
|
||
|
return l
|
||
|
}
|
||
|
|
||
|
// NextToken returns the next token from the input string.
|
||
|
func (l *Lexer) NextToken() Token {
|
||
|
return <-l.tokens
|
||
|
}
|
||
|
|
||
|
// emit emits the given token to the output channel.
|
||
|
func (l *Lexer) emit(t Token) {
|
||
|
// When encountering a token of type TokenFile *[a-h]*, it needs to be buffered and compared to
|
||
|
// the next token, which may be of type TokenRank *[1-8]* combining them into a token of type
|
||
|
// TokenSquare.
|
||
|
if l.buf == nil {
|
||
|
// check for TokenFile and buffer it
|
||
|
if t.Type == TokenFile {
|
||
|
l.buf = &t
|
||
|
} else {
|
||
|
l.tokens <- t
|
||
|
l.start = l.pos
|
||
|
}
|
||
|
} else {
|
||
|
// grab the last token off the buffer
|
||
|
prev := l.buf
|
||
|
l.buf = nil
|
||
|
// TokenFile followed by TokenRank combines to TokenSquare
|
||
|
if t.Type == TokenRank {
|
||
|
strSq := fmt.Sprintf("%s%s", prev.Value, t.Value)
|
||
|
_, ok := board.StrToSquareMap[strSq]
|
||
|
if !ok {
|
||
|
// technically this should not be reached, but I'm handling it anyways, just in case
|
||
|
l.tokens <- *prev
|
||
|
l.tokens <- t
|
||
|
} else {
|
||
|
// emit TokenSquare instead of individual TokenFile & TokenRank
|
||
|
l.tokens <- Token{
|
||
|
Pos: l.start,
|
||
|
Type: TokenSquare,
|
||
|
Value: strSq,
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// next reads the next rune from the buffered input stream
|
||
|
func (l *Lexer) next() rune {
|
||
|
r, _, err := l.input.ReadRune()
|
||
|
if err != nil {
|
||
|
return eof
|
||
|
}
|
||
|
l.pos++
|
||
|
return r
|
||
|
}
|
||
|
|
||
|
func (l *Lexer) undo() {
|
||
|
l.input.UnreadRune()
|
||
|
l.pos--
|
||
|
}
|
||
|
|
||
|
// newToken is a helper for easily initializing Tokens with the correct values.
|
||
|
func (l *Lexer) newToken(tokType TokenType, v string) Token {
|
||
|
t := <-tokenFactory
|
||
|
t.Pos = l.start
|
||
|
t.Type = tokType
|
||
|
t.Value = v
|
||
|
return *t
|
||
|
}
|
||
|
|
||
|
// scan scans for tokens and emits them to the output channel until the end of the input stream is
|
||
|
// reached.
|
||
|
func (l *Lexer) scan() {
|
||
|
defer close(l.tokens)
|
||
|
for {
|
||
|
r := l.next()
|
||
|
switch r {
|
||
|
case eof:
|
||
|
l.emit(l.newToken(TokenEOF, "eof"))
|
||
|
return
|
||
|
case 'O', '0':
|
||
|
l.undo()
|
||
|
m := lexCastles(l)
|
||
|
if m == "" {
|
||
|
l.emit(l.newToken(TokenError, m))
|
||
|
} else {
|
||
|
l.emit(l.newToken(TokenCastles, m))
|
||
|
}
|
||
|
case 'K', 'Q', 'B', 'N', 'R':
|
||
|
l.emit(l.newToken(TokenPiece, string(r)))
|
||
|
case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h':
|
||
|
l.emit(l.newToken(TokenFile, string(r)))
|
||
|
case '1', '2', '3', '4', '5', '6', '7', '8':
|
||
|
l.emit(l.newToken(TokenRank, string(r)))
|
||
|
case '+':
|
||
|
l.emit(l.newToken(TokenCheck, string(r)))
|
||
|
case '#':
|
||
|
l.emit(l.newToken(TokenMate, string(r)))
|
||
|
case 'x':
|
||
|
l.emit(l.newToken(TokenCapture, string(r)))
|
||
|
case '=':
|
||
|
// noop
|
||
|
default:
|
||
|
l.emit(l.newToken(TokenError, string(r)))
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func lexCastles(l *Lexer) string {
|
||
|
var (
|
||
|
buf = make([]byte, 0, 5)
|
||
|
out = bytes.NewBuffer(buf)
|
||
|
c = 0
|
||
|
)
|
||
|
for {
|
||
|
r := l.next()
|
||
|
switch {
|
||
|
case c == 5:
|
||
|
m := out.String()
|
||
|
switch m {
|
||
|
case "O-O", "0-0":
|
||
|
return "O-O"
|
||
|
case "O-O-O", "0-0-0":
|
||
|
return "O-O-O"
|
||
|
default:
|
||
|
return ""
|
||
|
}
|
||
|
case r == 'O', r == '-':
|
||
|
out.WriteRune(r)
|
||
|
}
|
||
|
c++
|
||
|
}
|
||
|
}
|