Simple PGN parser

This commit is contained in:
baccenfutter 2020-05-08 22:55:25 +02:00
parent 80e9a0d890
commit 5025d29d9e
Signed by: baccenfutter
GPG key ID: 9EF0A3998363DBC9
6 changed files with 1064 additions and 0 deletions

24
pkg/pgn/doc.go Normal file
View file

@ -0,0 +1,24 @@
// Package pgn implments an importer and exporter for the Portable Game Notation(PGN).
// It provides an interface for efficiently reading and writing PGN files using buffered IO.
//
// PGN is the defacto standard format for storing and exchanging chess games. It is a cleartext
// format that is both human- and machine-readable. Most chess libraries and frameworks have some
// kind of built-in support for reading and/or writing PGN files.
//
// Spec: https://www.chessclub.com/help/pgn-spec
//
// Usage:
//
// f, _ := os.Open("file.pgn")
// parser := pgn.NewParser(bufio.NewReader(f))
// for {
// game, err := parser.Next()
// if err != nil {
// log.Fatal(err)
// }
// fmt.Println(game)
// if game == nil {
// return
// }
// }
package pgn

44
pkg/pgn/game.go Normal file
View file

@ -0,0 +1,44 @@
package pgn
import (
"fmt"
"code.c-base.org/gochess/libchess/pkg/board"
"code.c-base.org/gochess/libchess/pkg/game"
"code.c-base.org/gochess/libchess/pkg/pgn/move"
)
// Game represents a PGN game.
type Game struct {
Tags []game.Tag
Moves []string
}
func newGame() *Game {
return &Game{
Tags: []game.Tag{},
Moves: []string{},
}
}
// Game returns a *game.Game representation of this PGN game.
func (g *Game) Game() (*game.Game, error) {
// parse all moves
moves := make([]*board.Move, len(g.Moves))
for i, _m := range g.Moves {
m, err := move.NewParser(_m).Move()
if err != nil {
return nil, err
}
moves[i] = m
}
// return initialized *game.Game
return &game.Game{
Tags: g.Tags,
Moves: moves,
}, nil
}
func (g Game) String() string {
return fmt.Sprintf("<Game(Tags: %q, Moves: %q)>", g.Tags, g.Moves)
}

332
pkg/pgn/lexer.go Normal file
View file

@ -0,0 +1,332 @@
package pgn
import (
"bufio"
"bytes"
"fmt"
)
// EOF signals end of input
const EOF = -1
// TokenType defines the type of a token
type TokenType uint64
// The following TokenTypes exist:
const (
TokenInvalid TokenType = iota
TokenEOF
TokenDiv
TokenNewline
TokenWhitespace
TokenComment
TokenString
TokenBracketLeft
TokenBracketRight
TokenParenthesisLeft
TokenParenthesisRight
TokenAngleLeft
TokenAngleRight
TokenSymbol
TokenEscapeMechanism
)
var tokenName = map[TokenType]string{
TokenInvalid: "INVALID",
TokenEOF: "EOF",
TokenDiv: "Div",
TokenNewline: "Newline",
TokenWhitespace: "Whitespace",
TokenComment: "Comment",
TokenString: "String",
TokenBracketLeft: "BracketLeft",
TokenBracketRight: "BracketRight",
TokenParenthesisLeft: "ParenthesisLeft",
TokenParenthesisRight: "ParenthesisRight",
TokenAngleLeft: "AngleLeft",
TokenAngleRight: "AngleRight",
TokenSymbol: "Symbol",
}
// Token represents a PGN token.
type Token struct {
Line int
Col int
Type TokenType
Value string
}
func (t Token) String() string {
return fmt.Sprintf(
"<Token%s(Line: %d, Col: %d, Value: %q)>",
tokenName[t.Type],
t.Line,
t.Col,
t.Value,
)
}
// LexFn defines the signature of a lexer function.
type LexFn func(*Lexer) LexFn
// Lexer implements a PGN tokenizer.
type Lexer struct {
input *bufio.Reader
output chan *Token
err chan error
line int
start int
pos int
}
// NewLexer returns an initialized Lexer.
func NewLexer(input *bufio.Reader) *Lexer {
l := &Lexer{
input: input,
output: make(chan *Token, 1),
err: make(chan error, 1),
line: 1,
start: 1,
pos: 1,
}
go l.run()
return l
}
func (l *Lexer) run() *Lexer {
go func() {
defer close(l.output)
defer close(l.err)
for fn := lexMain; fn != nil; {
fn = fn(l)
}
}()
return l
}
// Next returns the next Token from the input stream or EOF once the input stream has ended.
func (l *Lexer) Next() (*Token, error) {
select {
case err := <-l.err:
return nil, err
case t := <-l.output:
return t, nil
}
}
// All returns all parsed tokens as []*Token.
func (l *Lexer) All() ([]*Token, error) {
out := []*Token{}
for {
t, err := l.Next()
if err != nil {
return out, err
}
if t == nil || t.Type == TokenEOF {
return out, nil
}
out = append(out, t)
}
}
func (l *Lexer) next() rune {
r, _, err := l.input.ReadRune()
if err != nil {
return EOF
}
l.pos++
return r
}
func (l *Lexer) undo() {
l.input.UnreadRune()
l.pos--
}
func (l *Lexer) peek() rune {
defer l.undo()
return l.next()
}
func (l *Lexer) newToken(t TokenType, v string) *Token {
return &Token{
Line: l.line,
Col: l.start,
Type: t,
Value: v,
}
}
func (l *Lexer) emit(t *Token) {
l.output <- t
l.start = l.pos
}
func (l *Lexer) emitUnexpected(r rune) LexFn {
l.err <- fmt.Errorf(
"unexpected character in line %d at col %d: %v",
l.line,
l.pos,
r,
)
return nil
}
////////////////
//// LEXERS ////
////////////////
func lexMain(l *Lexer) LexFn {
for {
r := l.next()
switch r {
case EOF:
l.emit(l.newToken(TokenEOF, "EOF"))
return nil
case '\n':
return lexNewline
case ' ':
return lexWhitespace
case '%':
if l.pos == 2 {
return lexEscape
}
return l.emitUnexpected(r)
case ';':
return lexCommentUntilNewline
case '{':
return lexComment
case '[':
l.emit(l.newToken(TokenBracketLeft, "["))
case ']':
l.emit(l.newToken(TokenBracketRight, "]"))
case '"':
return lexString
default:
l.undo()
return lexSymbol
}
}
}
func lexNewline(l *Lexer) LexFn {
out := bytes.NewBuffer(make([]byte, 0, 255))
out.WriteRune('\n')
for {
r := l.next()
switch r {
case '\n':
out.WriteRune('\n')
default:
l.undo()
l.emit(l.newToken(TokenNewline, out.String()))
l.line += out.Len()
l.start = 1
l.pos = 1
return lexMain
}
}
}
func lexWhitespace(l *Lexer) LexFn {
out := bytes.NewBuffer(make([]byte, 0, 255))
out.WriteRune(' ')
for {
r := l.next()
switch r {
case ' ':
out.WriteRune(' ')
default:
l.undo()
l.emit(l.newToken(TokenWhitespace, out.String()))
return lexMain
}
}
}
func lexEscape(l *Lexer) LexFn {
for {
r := l.next()
switch r {
case EOF, '\n':
return lexMain
}
}
}
func lexCommentUntilNewline(l *Lexer) LexFn {
out := bytes.NewBuffer(make([]byte, 0, 8192))
for {
r := l.next()
switch r {
case EOF, '\n':
if out.Len() > 0 {
l.emit(l.newToken(TokenComment, out.String()))
}
return lexMain
default:
_, err := out.WriteRune(r)
if err != nil {
panic(err)
}
}
}
}
func lexComment(l *Lexer) LexFn {
out := bytes.NewBuffer(make([]byte, 0, 8192))
for {
r := l.next()
switch r {
case EOF:
l.emit(l.newToken(TokenComment, out.String()))
return lexMain
case '\\':
out.WriteRune(l.next())
case '}':
l.emit(l.newToken(TokenComment, out.String()))
return lexMain
default:
out.WriteRune(r)
}
}
}
func lexString(l *Lexer) LexFn {
out := bytes.NewBuffer(make([]byte, 0, 4096))
for {
r := l.next()
switch r {
case EOF:
return l.emitUnexpected(r)
case '\\':
out.WriteRune(l.next())
case '"':
l.emit(l.newToken(TokenString, out.String()))
return lexMain
default:
out.WriteRune(r)
}
}
}
func lexSymbol(l *Lexer) LexFn {
out := bytes.NewBuffer(make([]byte, 0, 255))
for {
r := l.next()
switch r {
case EOF:
l.emit(l.newToken(TokenSymbol, out.String()))
l.undo()
return lexMain
case '\n', ' ', '"':
l.undo()
l.emit(l.newToken(TokenSymbol, out.String()))
return lexMain
default:
out.WriteRune(r)
}
}
}

199
pkg/pgn/move/lexer.go Normal file
View file

@ -0,0 +1,199 @@
package move
import (
"bufio"
"bytes"
"fmt"
"strings"
"code.c-base.org/gochess/libchess/pkg/board"
)
var tokenFactory = make(chan *Token, 128)
func init() {
go func() {
for {
tokenFactory <- &Token{}
}
}()
}
// TokenType defines the type of a token
type TokenType uint8
// The following TokenTypes exist
const (
TokenError TokenType = iota
TokenEOF
TokenPiece
TokenFile
TokenRank
TokenCapture
TokenSquare
TokenCheck
TokenMate
TokenCastles
)
// eof signals the end of a move
const eof = -1
// Token represents a move token.
type Token struct {
Pos int // character column of this token
Type TokenType // type (see above)
Value string // literal value
}
// Lexer implements a lexer for tokenizing PGN formatted moves.
type Lexer struct {
input *bufio.Reader // buffered io for streaming the input
tokens chan Token // output channel
start int // starting position of the current token
pos int // current scanning position
buf *Token
}
// NewLexer returns an initialized Lexer.
func NewLexer(input string) *Lexer {
l := &Lexer{
input: bufio.NewReader(strings.NewReader(input)),
start: 1,
pos: 1,
tokens: make(chan Token, 1),
}
go l.scan()
return l
}
// NextToken returns the next token from the input string.
func (l *Lexer) NextToken() Token {
return <-l.tokens
}
// emit emits the given token to the output channel.
func (l *Lexer) emit(t Token) {
// When encountering a token of type TokenFile *[a-h]*, it needs to be buffered and compared to
// the next token, which may be of type TokenRank *[1-8]* combining them into a token of type
// TokenSquare.
if l.buf == nil {
// check for TokenFile and buffer it
if t.Type == TokenFile {
l.buf = &t
} else {
l.tokens <- t
l.start = l.pos
}
} else {
// grab the last token off the buffer
prev := l.buf
l.buf = nil
// TokenFile followed by TokenRank combines to TokenSquare
if t.Type == TokenRank {
strSq := fmt.Sprintf("%s%s", prev.Value, t.Value)
_, ok := board.StrToSquareMap[strSq]
if !ok {
// technically this should not be reached, but I'm handling it anyways, just in case
l.tokens <- *prev
l.tokens <- t
} else {
// emit TokenSquare instead of individual TokenFile & TokenRank
l.tokens <- Token{
Pos: l.start,
Type: TokenSquare,
Value: strSq,
}
}
}
}
}
// next reads the next rune from the buffered input stream
func (l *Lexer) next() rune {
r, _, err := l.input.ReadRune()
if err != nil {
return eof
}
l.pos++
return r
}
func (l *Lexer) undo() {
l.input.UnreadRune()
l.pos--
}
// newToken is a helper for easily initializing Tokens with the correct values.
func (l *Lexer) newToken(tokType TokenType, v string) Token {
t := <-tokenFactory
t.Pos = l.start
t.Type = tokType
t.Value = v
return *t
}
// scan scans for tokens and emits them to the output channel until the end of the input stream is
// reached.
func (l *Lexer) scan() {
defer close(l.tokens)
for {
r := l.next()
switch r {
case eof:
l.emit(l.newToken(TokenEOF, "eof"))
return
case 'O', '0':
l.undo()
m := lexCastles(l)
if m == "" {
l.emit(l.newToken(TokenError, m))
} else {
l.emit(l.newToken(TokenCastles, m))
}
case 'K', 'Q', 'B', 'N', 'R':
l.emit(l.newToken(TokenPiece, string(r)))
case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h':
l.emit(l.newToken(TokenFile, string(r)))
case '1', '2', '3', '4', '5', '6', '7', '8':
l.emit(l.newToken(TokenRank, string(r)))
case '+':
l.emit(l.newToken(TokenCheck, string(r)))
case '#':
l.emit(l.newToken(TokenMate, string(r)))
case 'x':
l.emit(l.newToken(TokenCapture, string(r)))
case '=':
// noop
default:
l.emit(l.newToken(TokenError, string(r)))
return
}
}
}
func lexCastles(l *Lexer) string {
var (
buf = make([]byte, 0, 5)
out = bytes.NewBuffer(buf)
c = 0
)
for {
r := l.next()
switch {
case c == 5:
m := out.String()
switch m {
case "O-O", "0-0":
return "O-O"
case "O-O-O", "0-0-0":
return "O-O-O"
default:
return ""
}
case r == 'O', r == '-':
out.WriteRune(r)
}
c++
}
}

224
pkg/pgn/move/parser.go Normal file
View file

@ -0,0 +1,224 @@
package move
import (
"fmt"
"code.c-base.org/gochess/libchess/pkg/board"
)
// Parser implements a parser for PGN moves.
type Parser struct {
lexer *Lexer
}
// NewParser returns an initialized parser for the given move.
func NewParser(m string) *Parser {
return &Parser{
lexer: NewLexer(m),
}
}
// Move parses the move and returns it or an error.
func (p *Parser) Move() (*board.Move, error) {
var (
stateCastles bool
statePiece bool
stateDisambiguity bool
stateCaptures bool
stateSquare bool
stateCheck bool
move = &board.Move{}
)
parsing:
for {
t := p.lexer.NextToken()
if t.Type == TokenEOF {
if move.To == board.NoSquare {
if !move.HasProp(board.KingSideCastle) && !move.HasProp(board.QueenSideCastle) {
return nil, p.throwToken(t)
}
}
return move, nil
}
if !stateCastles {
stateCastles = true
if parseCastles(t, move) {
continue parsing
}
}
if !statePiece {
statePiece = true
if parsePiece(t, move) {
continue parsing
}
}
if !stateDisambiguity {
stateDisambiguity = true
if parseDisambiguity(t, move) {
continue parsing
}
}
if !stateCaptures {
stateCaptures = true
if parseCaptures(t, move) {
continue parsing
}
}
if !stateSquare {
stateSquare = true
if parseSquare(t, move) {
continue parsing
}
}
if !stateCheck {
stateCheck = true
if parseCheckMate(t, move) {
continue parsing
}
}
}
}
func (p Parser) throwToken(t Token) error {
return fmt.Errorf("invalid token at pos %d: %s", t.Pos, t.Value)
}
///////////////////////
//// PARSE CASTLES ////
///////////////////////
func parseCastles(t Token, m *board.Move) bool {
if t.Type == TokenCastles {
switch t.Value {
case "O-O", "0-0":
m.AddProp(board.KingSideCastle)
return true
case "O-O-O", "0-0-0":
m.AddProp(board.QueenSideCastle)
return true
}
}
return false
}
/////////////////////
//// PARSE PIECE ////
/////////////////////
var legalPieces = map[string]board.PieceType{
"K": board.King,
"Q": board.Queen,
"B": board.Bishop,
"N": board.Knight,
"R": board.Rook,
}
func parsePiece(t Token, m *board.Move) bool {
if t.Type != TokenPiece {
return false
}
p, ok := legalPieces[t.Value]
if ok {
m.Piece = p
return true
}
return false
}
///////////////////////
//// PARSE SQUARES ////
///////////////////////
var (
legalFiles = map[string]board.File{
"a": board.FileA,
"b": board.FileB,
"c": board.FileC,
"d": board.FileD,
"e": board.FileE,
"f": board.FileF,
"g": board.FileG,
"h": board.FileH,
}
legalRanks = map[string]board.Rank{
"1": board.Rank1,
"2": board.Rank2,
"3": board.Rank3,
"4": board.Rank4,
"5": board.Rank5,
"6": board.Rank6,
"7": board.Rank7,
"8": board.Rank8,
}
)
func parseDisambiguity(t Token, m *board.Move) bool {
if t.Type == TokenFile {
f, ok := legalFiles[t.Value]
if ok {
m.FromFile = &f
return true
}
}
if t.Type == TokenRank {
r, ok := legalRanks[t.Value]
if ok {
m.FromRank = &r
return true
}
}
return false
}
func parseSquare(t Token, m *board.Move) bool {
if t.Type == TokenSquare {
m.To = board.StrToSquareMap[t.Value]
return true
}
return false
}
///////////////////////
//// PARSE CAPTURE ////
///////////////////////
var legalCapture = map[string]struct{}{
"x": {},
}
func parseCaptures(t Token, m *board.Move) bool {
if t.Type == TokenCapture {
_, ok := legalCapture[t.Value]
if ok {
m.AddProp(board.Capture)
return true
}
}
return false
}
//////////////////////////
//// PARSE CHECK/MATE ////
//////////////////////////
func parseCheckMate(t Token, m *board.Move) bool {
if t.Type == TokenCheck {
if t.Value == "+" {
m.AddProp(board.Check)
return true
}
}
if t.Type == TokenMate {
if t.Value == "#" {
m.AddProp(board.Mate)
return true
}
}
return false
}

241
pkg/pgn/parser.go Normal file
View file

@ -0,0 +1,241 @@
package pgn
import (
"bufio"
"fmt"
"strings"
"code.c-base.org/gochess/libchess/pkg/game"
)
// PoolParsers defines how may parsers are prenitialized.
const PoolParsers = 8
// ParseFn defines the signature of a parser function.
type ParseFn func(*Parser) ParseFn
// Parser implements a PGN parser.
type Parser struct {
lexer *Lexer
errors chan error
games chan *Game
game *Game
tokenBuf *Token
useBuf bool
}
var parserFactory = make(chan *Parser, PoolParsers)
func init() {
go func() {
for {
parserFactory <- &Parser{
errors: make(chan error),
games: make(chan *Game),
game: newGame(),
}
}
}()
}
// NewParser returns an initialized parser
func NewParser(input *bufio.Reader) *Parser {
p := <-parserFactory
p.lexer = NewLexer(input)
go p.run()
return p
}
func (p *Parser) run() {
defer close(p.errors)
defer close(p.games)
for fn := parseTagSection; fn != nil; {
fn = fn(p)
}
}
// Next returns the next parsed game from the input stream or an error.
func (p *Parser) Next() (*Game, error) {
select {
case err := <-p.errors:
return nil, err
case g := <-p.games:
return g, nil
}
}
func (p *Parser) next() (*Token, error) {
if p.useBuf {
p.useBuf = false
return p.tokenBuf, nil
}
t, err := p.lexer.Next()
if err != nil {
return nil, err
}
p.tokenBuf = t
return t, nil
}
func (p *Parser) undo() {
p.useBuf = true
}
func (p *Parser) throwUnexpected(t *Token) {
p.errors <- fmt.Errorf(
"parsing error: unexpected token in line %d at %d: %q",
t.Line,
t.Col,
t.Value,
)
}
func throwUnexpectedEOF(p *Parser) ParseFn {
p.errors <- fmt.Errorf(
"parsing error: unexpected EOF",
)
return nil
}
func (p *Parser) emit() {
p.games <- p.game
p.game = newGame()
}
func parseTagSection(p *Parser) ParseFn {
for {
// grab next token
t, err := p.next()
// bail out on error
if err != nil {
p.errors <- err
return nil
}
// handle for EOF
if t == nil || t.Type == TokenEOF {
p.emit()
return nil
}
switch t.Type {
case TokenNewline, TokenWhitespace:
// noop
case TokenBracketLeft:
return parseTag
case TokenSymbol:
p.undo()
return parseMovetext
default:
p.throwUnexpected(t)
return nil
}
}
}
func parseTag(p *Parser) ParseFn {
tag := game.Tag{}
findSymbol:
for {
t, err := p.next()
if err != nil {
p.errors <- err
return nil
}
if t == nil || t.Type == TokenEOF {
return throwUnexpectedEOF
}
switch t.Type {
case TokenNewline, TokenWhitespace, TokenComment:
// noop
case TokenSymbol:
tag.Key = t.Value
break findSymbol
default:
p.throwUnexpected(t)
return nil
}
}
findValue:
for {
t, err := p.next()
if err != nil {
p.errors <- err
return nil
}
if t == nil || t.Type == TokenEOF {
return throwUnexpectedEOF
}
switch t.Type {
case TokenNewline, TokenWhitespace, TokenComment:
// noop
case TokenString:
tag.Value = t.Value
break findValue
default:
p.throwUnexpected(t)
return nil
}
}
for {
t, err := p.next()
if err != nil {
p.errors <- err
return nil
}
if t == nil || t.Type == TokenEOF {
return throwUnexpectedEOF
}
switch t.Type {
case TokenNewline, TokenWhitespace, TokenComment:
// noop
case TokenBracketRight:
p.game.Tags = append(p.game.Tags, tag)
return parseTagSection
default:
p.throwUnexpected(t)
return nil
}
}
}
func parseMovetext(p *Parser) ParseFn {
isTermination := func(s string) bool {
switch s {
case "0-1", "1-0", "1/2", "1/2-1/2", "*":
return true
default:
return false
}
}
for {
t, err := p.next()
if err != nil {
p.errors <- err
return nil
}
if t == nil || t.Type == TokenEOF {
p.undo()
return parseTagSection
}
switch t.Type {
case TokenNewline, TokenWhitespace, TokenComment:
// noop
case TokenSymbol:
if strings.Contains(t.Value, ".") {
continue
}
if !isTermination(t.Value) {
p.game.Moves = append(p.game.Moves, t.Value)
}
case TokenBracketLeft:
p.emit()
p.undo()
return parseTagSection
default:
p.throwUnexpected(t)
return nil
}
}
}