Simple PGN parser

This commit is contained in:
baccenfutter 2020-05-08 22:55:25 +02:00
parent 80e9a0d890
commit 5025d29d9e
Signed by: baccenfutter
GPG key ID: 9EF0A3998363DBC9
6 changed files with 1064 additions and 0 deletions

199
pkg/pgn/move/lexer.go Normal file
View file

@ -0,0 +1,199 @@
package move
import (
"bufio"
"bytes"
"fmt"
"strings"
"code.c-base.org/gochess/libchess/pkg/board"
)
var tokenFactory = make(chan *Token, 128)
func init() {
go func() {
for {
tokenFactory <- &Token{}
}
}()
}
// TokenType defines the type of a token
type TokenType uint8
// The following TokenTypes exist
const (
TokenError TokenType = iota
TokenEOF
TokenPiece
TokenFile
TokenRank
TokenCapture
TokenSquare
TokenCheck
TokenMate
TokenCastles
)
// eof signals the end of a move
const eof = -1
// Token represents a move token.
type Token struct {
Pos int // character column of this token
Type TokenType // type (see above)
Value string // literal value
}
// Lexer implements a lexer for tokenizing PGN formatted moves.
type Lexer struct {
input *bufio.Reader // buffered io for streaming the input
tokens chan Token // output channel
start int // starting position of the current token
pos int // current scanning position
buf *Token
}
// NewLexer returns an initialized Lexer.
func NewLexer(input string) *Lexer {
l := &Lexer{
input: bufio.NewReader(strings.NewReader(input)),
start: 1,
pos: 1,
tokens: make(chan Token, 1),
}
go l.scan()
return l
}
// NextToken returns the next token from the input string.
func (l *Lexer) NextToken() Token {
return <-l.tokens
}
// emit emits the given token to the output channel.
func (l *Lexer) emit(t Token) {
// When encountering a token of type TokenFile *[a-h]*, it needs to be buffered and compared to
// the next token, which may be of type TokenRank *[1-8]* combining them into a token of type
// TokenSquare.
if l.buf == nil {
// check for TokenFile and buffer it
if t.Type == TokenFile {
l.buf = &t
} else {
l.tokens <- t
l.start = l.pos
}
} else {
// grab the last token off the buffer
prev := l.buf
l.buf = nil
// TokenFile followed by TokenRank combines to TokenSquare
if t.Type == TokenRank {
strSq := fmt.Sprintf("%s%s", prev.Value, t.Value)
_, ok := board.StrToSquareMap[strSq]
if !ok {
// technically this should not be reached, but I'm handling it anyways, just in case
l.tokens <- *prev
l.tokens <- t
} else {
// emit TokenSquare instead of individual TokenFile & TokenRank
l.tokens <- Token{
Pos: l.start,
Type: TokenSquare,
Value: strSq,
}
}
}
}
}
// next reads the next rune from the buffered input stream
func (l *Lexer) next() rune {
r, _, err := l.input.ReadRune()
if err != nil {
return eof
}
l.pos++
return r
}
func (l *Lexer) undo() {
l.input.UnreadRune()
l.pos--
}
// newToken is a helper for easily initializing Tokens with the correct values.
func (l *Lexer) newToken(tokType TokenType, v string) Token {
t := <-tokenFactory
t.Pos = l.start
t.Type = tokType
t.Value = v
return *t
}
// scan scans for tokens and emits them to the output channel until the end of the input stream is
// reached.
func (l *Lexer) scan() {
defer close(l.tokens)
for {
r := l.next()
switch r {
case eof:
l.emit(l.newToken(TokenEOF, "eof"))
return
case 'O', '0':
l.undo()
m := lexCastles(l)
if m == "" {
l.emit(l.newToken(TokenError, m))
} else {
l.emit(l.newToken(TokenCastles, m))
}
case 'K', 'Q', 'B', 'N', 'R':
l.emit(l.newToken(TokenPiece, string(r)))
case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h':
l.emit(l.newToken(TokenFile, string(r)))
case '1', '2', '3', '4', '5', '6', '7', '8':
l.emit(l.newToken(TokenRank, string(r)))
case '+':
l.emit(l.newToken(TokenCheck, string(r)))
case '#':
l.emit(l.newToken(TokenMate, string(r)))
case 'x':
l.emit(l.newToken(TokenCapture, string(r)))
case '=':
// noop
default:
l.emit(l.newToken(TokenError, string(r)))
return
}
}
}
func lexCastles(l *Lexer) string {
var (
buf = make([]byte, 0, 5)
out = bytes.NewBuffer(buf)
c = 0
)
for {
r := l.next()
switch {
case c == 5:
m := out.String()
switch m {
case "O-O", "0-0":
return "O-O"
case "O-O-O", "0-0-0":
return "O-O-O"
default:
return ""
}
case r == 'O', r == '-':
out.WriteRune(r)
}
c++
}
}

224
pkg/pgn/move/parser.go Normal file
View file

@ -0,0 +1,224 @@
package move
import (
"fmt"
"code.c-base.org/gochess/libchess/pkg/board"
)
// Parser implements a parser for PGN moves.
type Parser struct {
lexer *Lexer
}
// NewParser returns an initialized parser for the given move.
func NewParser(m string) *Parser {
return &Parser{
lexer: NewLexer(m),
}
}
// Move parses the move and returns it or an error.
func (p *Parser) Move() (*board.Move, error) {
var (
stateCastles bool
statePiece bool
stateDisambiguity bool
stateCaptures bool
stateSquare bool
stateCheck bool
move = &board.Move{}
)
parsing:
for {
t := p.lexer.NextToken()
if t.Type == TokenEOF {
if move.To == board.NoSquare {
if !move.HasProp(board.KingSideCastle) && !move.HasProp(board.QueenSideCastle) {
return nil, p.throwToken(t)
}
}
return move, nil
}
if !stateCastles {
stateCastles = true
if parseCastles(t, move) {
continue parsing
}
}
if !statePiece {
statePiece = true
if parsePiece(t, move) {
continue parsing
}
}
if !stateDisambiguity {
stateDisambiguity = true
if parseDisambiguity(t, move) {
continue parsing
}
}
if !stateCaptures {
stateCaptures = true
if parseCaptures(t, move) {
continue parsing
}
}
if !stateSquare {
stateSquare = true
if parseSquare(t, move) {
continue parsing
}
}
if !stateCheck {
stateCheck = true
if parseCheckMate(t, move) {
continue parsing
}
}
}
}
func (p Parser) throwToken(t Token) error {
return fmt.Errorf("invalid token at pos %d: %s", t.Pos, t.Value)
}
///////////////////////
//// PARSE CASTLES ////
///////////////////////
func parseCastles(t Token, m *board.Move) bool {
if t.Type == TokenCastles {
switch t.Value {
case "O-O", "0-0":
m.AddProp(board.KingSideCastle)
return true
case "O-O-O", "0-0-0":
m.AddProp(board.QueenSideCastle)
return true
}
}
return false
}
/////////////////////
//// PARSE PIECE ////
/////////////////////
var legalPieces = map[string]board.PieceType{
"K": board.King,
"Q": board.Queen,
"B": board.Bishop,
"N": board.Knight,
"R": board.Rook,
}
func parsePiece(t Token, m *board.Move) bool {
if t.Type != TokenPiece {
return false
}
p, ok := legalPieces[t.Value]
if ok {
m.Piece = p
return true
}
return false
}
///////////////////////
//// PARSE SQUARES ////
///////////////////////
var (
legalFiles = map[string]board.File{
"a": board.FileA,
"b": board.FileB,
"c": board.FileC,
"d": board.FileD,
"e": board.FileE,
"f": board.FileF,
"g": board.FileG,
"h": board.FileH,
}
legalRanks = map[string]board.Rank{
"1": board.Rank1,
"2": board.Rank2,
"3": board.Rank3,
"4": board.Rank4,
"5": board.Rank5,
"6": board.Rank6,
"7": board.Rank7,
"8": board.Rank8,
}
)
func parseDisambiguity(t Token, m *board.Move) bool {
if t.Type == TokenFile {
f, ok := legalFiles[t.Value]
if ok {
m.FromFile = &f
return true
}
}
if t.Type == TokenRank {
r, ok := legalRanks[t.Value]
if ok {
m.FromRank = &r
return true
}
}
return false
}
func parseSquare(t Token, m *board.Move) bool {
if t.Type == TokenSquare {
m.To = board.StrToSquareMap[t.Value]
return true
}
return false
}
///////////////////////
//// PARSE CAPTURE ////
///////////////////////
var legalCapture = map[string]struct{}{
"x": {},
}
func parseCaptures(t Token, m *board.Move) bool {
if t.Type == TokenCapture {
_, ok := legalCapture[t.Value]
if ok {
m.AddProp(board.Capture)
return true
}
}
return false
}
//////////////////////////
//// PARSE CHECK/MATE ////
//////////////////////////
func parseCheckMate(t Token, m *board.Move) bool {
if t.Type == TokenCheck {
if t.Value == "+" {
m.AddProp(board.Check)
return true
}
}
if t.Type == TokenMate {
if t.Value == "#" {
m.AddProp(board.Mate)
return true
}
}
return false
}