diff --git a/pkg/pgn/doc.go b/pkg/pgn/doc.go new file mode 100644 index 0000000..e5f9d1d --- /dev/null +++ b/pkg/pgn/doc.go @@ -0,0 +1,24 @@ +// Package pgn implments an importer and exporter for the Portable Game Notation(PGN). +// It provides an interface for efficiently reading and writing PGN files using buffered IO. +// +// PGN is the defacto standard format for storing and exchanging chess games. It is a cleartext +// format that is both human- and machine-readable. Most chess libraries and frameworks have some +// kind of built-in support for reading and/or writing PGN files. +// +// Spec: https://www.chessclub.com/help/pgn-spec +// +// Usage: +// +// f, _ := os.Open("file.pgn") +// parser := pgn.NewParser(bufio.NewReader(f)) +// for { +// game, err := parser.Next() +// if err != nil { +// log.Fatal(err) +// } +// fmt.Println(game) +// if game == nil { +// return +// } +// } +package pgn diff --git a/pkg/pgn/game.go b/pkg/pgn/game.go new file mode 100644 index 0000000..1976ef2 --- /dev/null +++ b/pkg/pgn/game.go @@ -0,0 +1,44 @@ +package pgn + +import ( + "fmt" + + "code.c-base.org/gochess/libchess/pkg/board" + "code.c-base.org/gochess/libchess/pkg/game" + "code.c-base.org/gochess/libchess/pkg/pgn/move" +) + +// Game represents a PGN game. +type Game struct { + Tags []game.Tag + Moves []string +} + +func newGame() *Game { + return &Game{ + Tags: []game.Tag{}, + Moves: []string{}, + } +} + +// Game returns a *game.Game representation of this PGN game. +func (g *Game) Game() (*game.Game, error) { + // parse all moves + moves := make([]*board.Move, len(g.Moves)) + for i, _m := range g.Moves { + m, err := move.NewParser(_m).Move() + if err != nil { + return nil, err + } + moves[i] = m + } + // return initialized *game.Game + return &game.Game{ + Tags: g.Tags, + Moves: moves, + }, nil +} + +func (g Game) String() string { + return fmt.Sprintf("", g.Tags, g.Moves) +} diff --git a/pkg/pgn/lexer.go b/pkg/pgn/lexer.go new file mode 100644 index 0000000..3252bf4 --- /dev/null +++ b/pkg/pgn/lexer.go @@ -0,0 +1,332 @@ +package pgn + +import ( + "bufio" + "bytes" + "fmt" +) + +// EOF signals end of input +const EOF = -1 + +// TokenType defines the type of a token +type TokenType uint64 + +// The following TokenTypes exist: +const ( + TokenInvalid TokenType = iota + TokenEOF + TokenDiv + TokenNewline + TokenWhitespace + TokenComment + TokenString + + TokenBracketLeft + TokenBracketRight + TokenParenthesisLeft + TokenParenthesisRight + TokenAngleLeft + TokenAngleRight + TokenSymbol + + TokenEscapeMechanism +) + +var tokenName = map[TokenType]string{ + TokenInvalid: "INVALID", + TokenEOF: "EOF", + TokenDiv: "Div", + TokenNewline: "Newline", + TokenWhitespace: "Whitespace", + TokenComment: "Comment", + TokenString: "String", + TokenBracketLeft: "BracketLeft", + TokenBracketRight: "BracketRight", + TokenParenthesisLeft: "ParenthesisLeft", + TokenParenthesisRight: "ParenthesisRight", + TokenAngleLeft: "AngleLeft", + TokenAngleRight: "AngleRight", + TokenSymbol: "Symbol", +} + +// Token represents a PGN token. +type Token struct { + Line int + Col int + Type TokenType + Value string +} + +func (t Token) String() string { + return fmt.Sprintf( + "", + tokenName[t.Type], + t.Line, + t.Col, + t.Value, + ) +} + +// LexFn defines the signature of a lexer function. +type LexFn func(*Lexer) LexFn + +// Lexer implements a PGN tokenizer. +type Lexer struct { + input *bufio.Reader + output chan *Token + err chan error + line int + start int + pos int +} + +// NewLexer returns an initialized Lexer. +func NewLexer(input *bufio.Reader) *Lexer { + l := &Lexer{ + input: input, + output: make(chan *Token, 1), + err: make(chan error, 1), + line: 1, + start: 1, + pos: 1, + } + go l.run() + return l +} + +func (l *Lexer) run() *Lexer { + go func() { + defer close(l.output) + defer close(l.err) + for fn := lexMain; fn != nil; { + fn = fn(l) + } + }() + return l +} + +// Next returns the next Token from the input stream or EOF once the input stream has ended. +func (l *Lexer) Next() (*Token, error) { + select { + case err := <-l.err: + return nil, err + case t := <-l.output: + return t, nil + } +} + +// All returns all parsed tokens as []*Token. +func (l *Lexer) All() ([]*Token, error) { + out := []*Token{} + for { + t, err := l.Next() + if err != nil { + return out, err + } + if t == nil || t.Type == TokenEOF { + return out, nil + } + out = append(out, t) + } +} + +func (l *Lexer) next() rune { + r, _, err := l.input.ReadRune() + if err != nil { + return EOF + } + l.pos++ + return r +} + +func (l *Lexer) undo() { + l.input.UnreadRune() + l.pos-- +} + +func (l *Lexer) peek() rune { + defer l.undo() + return l.next() +} + +func (l *Lexer) newToken(t TokenType, v string) *Token { + return &Token{ + Line: l.line, + Col: l.start, + Type: t, + Value: v, + } +} + +func (l *Lexer) emit(t *Token) { + l.output <- t + l.start = l.pos +} + +func (l *Lexer) emitUnexpected(r rune) LexFn { + l.err <- fmt.Errorf( + "unexpected character in line %d at col %d: %v", + l.line, + l.pos, + r, + ) + return nil +} + +//////////////// +//// LEXERS //// +//////////////// + +func lexMain(l *Lexer) LexFn { + for { + r := l.next() + switch r { + case EOF: + l.emit(l.newToken(TokenEOF, "EOF")) + return nil + case '\n': + return lexNewline + case ' ': + return lexWhitespace + case '%': + if l.pos == 2 { + return lexEscape + } + return l.emitUnexpected(r) + case ';': + return lexCommentUntilNewline + case '{': + return lexComment + case '[': + l.emit(l.newToken(TokenBracketLeft, "[")) + case ']': + l.emit(l.newToken(TokenBracketRight, "]")) + case '"': + return lexString + default: + l.undo() + return lexSymbol + } + } +} + +func lexNewline(l *Lexer) LexFn { + out := bytes.NewBuffer(make([]byte, 0, 255)) + out.WriteRune('\n') + for { + r := l.next() + switch r { + case '\n': + out.WriteRune('\n') + default: + l.undo() + l.emit(l.newToken(TokenNewline, out.String())) + l.line += out.Len() + l.start = 1 + l.pos = 1 + return lexMain + } + } +} + +func lexWhitespace(l *Lexer) LexFn { + out := bytes.NewBuffer(make([]byte, 0, 255)) + out.WriteRune(' ') + for { + r := l.next() + switch r { + case ' ': + out.WriteRune(' ') + default: + l.undo() + l.emit(l.newToken(TokenWhitespace, out.String())) + return lexMain + } + } +} + +func lexEscape(l *Lexer) LexFn { + for { + r := l.next() + switch r { + case EOF, '\n': + return lexMain + } + } +} + +func lexCommentUntilNewline(l *Lexer) LexFn { + out := bytes.NewBuffer(make([]byte, 0, 8192)) + for { + r := l.next() + switch r { + case EOF, '\n': + if out.Len() > 0 { + l.emit(l.newToken(TokenComment, out.String())) + } + return lexMain + default: + _, err := out.WriteRune(r) + if err != nil { + panic(err) + } + } + } +} + +func lexComment(l *Lexer) LexFn { + out := bytes.NewBuffer(make([]byte, 0, 8192)) + for { + r := l.next() + switch r { + case EOF: + l.emit(l.newToken(TokenComment, out.String())) + return lexMain + case '\\': + out.WriteRune(l.next()) + case '}': + l.emit(l.newToken(TokenComment, out.String())) + return lexMain + default: + out.WriteRune(r) + } + } +} + +func lexString(l *Lexer) LexFn { + out := bytes.NewBuffer(make([]byte, 0, 4096)) + for { + r := l.next() + switch r { + case EOF: + return l.emitUnexpected(r) + case '\\': + out.WriteRune(l.next()) + case '"': + l.emit(l.newToken(TokenString, out.String())) + return lexMain + default: + out.WriteRune(r) + } + } +} + +func lexSymbol(l *Lexer) LexFn { + out := bytes.NewBuffer(make([]byte, 0, 255)) + for { + r := l.next() + switch r { + case EOF: + l.emit(l.newToken(TokenSymbol, out.String())) + l.undo() + return lexMain + case '\n', ' ', '"': + l.undo() + l.emit(l.newToken(TokenSymbol, out.String())) + return lexMain + default: + out.WriteRune(r) + } + } +} diff --git a/pkg/pgn/move/lexer.go b/pkg/pgn/move/lexer.go new file mode 100644 index 0000000..9c05042 --- /dev/null +++ b/pkg/pgn/move/lexer.go @@ -0,0 +1,199 @@ +package move + +import ( + "bufio" + "bytes" + "fmt" + "strings" + + "code.c-base.org/gochess/libchess/pkg/board" +) + +var tokenFactory = make(chan *Token, 128) + +func init() { + go func() { + for { + tokenFactory <- &Token{} + } + }() +} + +// TokenType defines the type of a token +type TokenType uint8 + +// The following TokenTypes exist +const ( + TokenError TokenType = iota + TokenEOF + TokenPiece + TokenFile + TokenRank + TokenCapture + TokenSquare + TokenCheck + TokenMate + TokenCastles +) + +// eof signals the end of a move +const eof = -1 + +// Token represents a move token. +type Token struct { + Pos int // character column of this token + Type TokenType // type (see above) + Value string // literal value +} + +// Lexer implements a lexer for tokenizing PGN formatted moves. +type Lexer struct { + input *bufio.Reader // buffered io for streaming the input + tokens chan Token // output channel + start int // starting position of the current token + pos int // current scanning position + buf *Token +} + +// NewLexer returns an initialized Lexer. +func NewLexer(input string) *Lexer { + l := &Lexer{ + input: bufio.NewReader(strings.NewReader(input)), + start: 1, + pos: 1, + tokens: make(chan Token, 1), + } + go l.scan() + return l +} + +// NextToken returns the next token from the input string. +func (l *Lexer) NextToken() Token { + return <-l.tokens +} + +// emit emits the given token to the output channel. +func (l *Lexer) emit(t Token) { + // When encountering a token of type TokenFile *[a-h]*, it needs to be buffered and compared to + // the next token, which may be of type TokenRank *[1-8]* combining them into a token of type + // TokenSquare. + if l.buf == nil { + // check for TokenFile and buffer it + if t.Type == TokenFile { + l.buf = &t + } else { + l.tokens <- t + l.start = l.pos + } + } else { + // grab the last token off the buffer + prev := l.buf + l.buf = nil + // TokenFile followed by TokenRank combines to TokenSquare + if t.Type == TokenRank { + strSq := fmt.Sprintf("%s%s", prev.Value, t.Value) + _, ok := board.StrToSquareMap[strSq] + if !ok { + // technically this should not be reached, but I'm handling it anyways, just in case + l.tokens <- *prev + l.tokens <- t + } else { + // emit TokenSquare instead of individual TokenFile & TokenRank + l.tokens <- Token{ + Pos: l.start, + Type: TokenSquare, + Value: strSq, + } + } + } + } +} + +// next reads the next rune from the buffered input stream +func (l *Lexer) next() rune { + r, _, err := l.input.ReadRune() + if err != nil { + return eof + } + l.pos++ + return r +} + +func (l *Lexer) undo() { + l.input.UnreadRune() + l.pos-- +} + +// newToken is a helper for easily initializing Tokens with the correct values. +func (l *Lexer) newToken(tokType TokenType, v string) Token { + t := <-tokenFactory + t.Pos = l.start + t.Type = tokType + t.Value = v + return *t +} + +// scan scans for tokens and emits them to the output channel until the end of the input stream is +// reached. +func (l *Lexer) scan() { + defer close(l.tokens) + for { + r := l.next() + switch r { + case eof: + l.emit(l.newToken(TokenEOF, "eof")) + return + case 'O', '0': + l.undo() + m := lexCastles(l) + if m == "" { + l.emit(l.newToken(TokenError, m)) + } else { + l.emit(l.newToken(TokenCastles, m)) + } + case 'K', 'Q', 'B', 'N', 'R': + l.emit(l.newToken(TokenPiece, string(r))) + case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h': + l.emit(l.newToken(TokenFile, string(r))) + case '1', '2', '3', '4', '5', '6', '7', '8': + l.emit(l.newToken(TokenRank, string(r))) + case '+': + l.emit(l.newToken(TokenCheck, string(r))) + case '#': + l.emit(l.newToken(TokenMate, string(r))) + case 'x': + l.emit(l.newToken(TokenCapture, string(r))) + case '=': + // noop + default: + l.emit(l.newToken(TokenError, string(r))) + return + } + } +} + +func lexCastles(l *Lexer) string { + var ( + buf = make([]byte, 0, 5) + out = bytes.NewBuffer(buf) + c = 0 + ) + for { + r := l.next() + switch { + case c == 5: + m := out.String() + switch m { + case "O-O", "0-0": + return "O-O" + case "O-O-O", "0-0-0": + return "O-O-O" + default: + return "" + } + case r == 'O', r == '-': + out.WriteRune(r) + } + c++ + } +} diff --git a/pkg/pgn/move/parser.go b/pkg/pgn/move/parser.go new file mode 100644 index 0000000..76a3c6f --- /dev/null +++ b/pkg/pgn/move/parser.go @@ -0,0 +1,224 @@ +package move + +import ( + "fmt" + + "code.c-base.org/gochess/libchess/pkg/board" +) + +// Parser implements a parser for PGN moves. +type Parser struct { + lexer *Lexer +} + +// NewParser returns an initialized parser for the given move. +func NewParser(m string) *Parser { + return &Parser{ + lexer: NewLexer(m), + } +} + +// Move parses the move and returns it or an error. +func (p *Parser) Move() (*board.Move, error) { + var ( + stateCastles bool + statePiece bool + stateDisambiguity bool + stateCaptures bool + stateSquare bool + stateCheck bool + move = &board.Move{} + ) +parsing: + for { + t := p.lexer.NextToken() + if t.Type == TokenEOF { + if move.To == board.NoSquare { + if !move.HasProp(board.KingSideCastle) && !move.HasProp(board.QueenSideCastle) { + return nil, p.throwToken(t) + } + } + return move, nil + } + + if !stateCastles { + stateCastles = true + if parseCastles(t, move) { + continue parsing + } + } + + if !statePiece { + statePiece = true + if parsePiece(t, move) { + continue parsing + } + } + + if !stateDisambiguity { + stateDisambiguity = true + if parseDisambiguity(t, move) { + continue parsing + } + } + + if !stateCaptures { + stateCaptures = true + if parseCaptures(t, move) { + continue parsing + } + } + + if !stateSquare { + stateSquare = true + if parseSquare(t, move) { + continue parsing + } + } + + if !stateCheck { + stateCheck = true + if parseCheckMate(t, move) { + continue parsing + } + } + } +} + +func (p Parser) throwToken(t Token) error { + return fmt.Errorf("invalid token at pos %d: %s", t.Pos, t.Value) +} + +/////////////////////// +//// PARSE CASTLES //// +/////////////////////// + +func parseCastles(t Token, m *board.Move) bool { + if t.Type == TokenCastles { + switch t.Value { + case "O-O", "0-0": + m.AddProp(board.KingSideCastle) + return true + case "O-O-O", "0-0-0": + m.AddProp(board.QueenSideCastle) + return true + } + } + return false +} + +///////////////////// +//// PARSE PIECE //// +///////////////////// + +var legalPieces = map[string]board.PieceType{ + "K": board.King, + "Q": board.Queen, + "B": board.Bishop, + "N": board.Knight, + "R": board.Rook, +} + +func parsePiece(t Token, m *board.Move) bool { + if t.Type != TokenPiece { + return false + } + p, ok := legalPieces[t.Value] + if ok { + m.Piece = p + return true + } + return false +} + +/////////////////////// +//// PARSE SQUARES //// +/////////////////////// + +var ( + legalFiles = map[string]board.File{ + "a": board.FileA, + "b": board.FileB, + "c": board.FileC, + "d": board.FileD, + "e": board.FileE, + "f": board.FileF, + "g": board.FileG, + "h": board.FileH, + } + legalRanks = map[string]board.Rank{ + "1": board.Rank1, + "2": board.Rank2, + "3": board.Rank3, + "4": board.Rank4, + "5": board.Rank5, + "6": board.Rank6, + "7": board.Rank7, + "8": board.Rank8, + } +) + +func parseDisambiguity(t Token, m *board.Move) bool { + if t.Type == TokenFile { + f, ok := legalFiles[t.Value] + if ok { + m.FromFile = &f + return true + } + } + if t.Type == TokenRank { + r, ok := legalRanks[t.Value] + if ok { + m.FromRank = &r + return true + } + } + return false +} + +func parseSquare(t Token, m *board.Move) bool { + if t.Type == TokenSquare { + m.To = board.StrToSquareMap[t.Value] + return true + } + return false +} + +/////////////////////// +//// PARSE CAPTURE //// +/////////////////////// + +var legalCapture = map[string]struct{}{ + "x": {}, +} + +func parseCaptures(t Token, m *board.Move) bool { + if t.Type == TokenCapture { + _, ok := legalCapture[t.Value] + if ok { + m.AddProp(board.Capture) + return true + } + } + return false +} + +////////////////////////// +//// PARSE CHECK/MATE //// +////////////////////////// + +func parseCheckMate(t Token, m *board.Move) bool { + if t.Type == TokenCheck { + if t.Value == "+" { + m.AddProp(board.Check) + return true + } + } + if t.Type == TokenMate { + if t.Value == "#" { + m.AddProp(board.Mate) + return true + } + } + return false +} diff --git a/pkg/pgn/parser.go b/pkg/pgn/parser.go new file mode 100644 index 0000000..007e818 --- /dev/null +++ b/pkg/pgn/parser.go @@ -0,0 +1,241 @@ +package pgn + +import ( + "bufio" + "fmt" + "strings" + + "code.c-base.org/gochess/libchess/pkg/game" +) + +// PoolParsers defines how may parsers are prenitialized. +const PoolParsers = 8 + +// ParseFn defines the signature of a parser function. +type ParseFn func(*Parser) ParseFn + +// Parser implements a PGN parser. +type Parser struct { + lexer *Lexer + errors chan error + games chan *Game + game *Game + + tokenBuf *Token + useBuf bool +} + +var parserFactory = make(chan *Parser, PoolParsers) + +func init() { + go func() { + for { + parserFactory <- &Parser{ + errors: make(chan error), + games: make(chan *Game), + game: newGame(), + } + } + }() +} + +// NewParser returns an initialized parser +func NewParser(input *bufio.Reader) *Parser { + p := <-parserFactory + p.lexer = NewLexer(input) + go p.run() + return p +} + +func (p *Parser) run() { + defer close(p.errors) + defer close(p.games) + for fn := parseTagSection; fn != nil; { + fn = fn(p) + } +} + +// Next returns the next parsed game from the input stream or an error. +func (p *Parser) Next() (*Game, error) { + select { + case err := <-p.errors: + return nil, err + case g := <-p.games: + return g, nil + } +} + +func (p *Parser) next() (*Token, error) { + if p.useBuf { + p.useBuf = false + return p.tokenBuf, nil + } + t, err := p.lexer.Next() + if err != nil { + return nil, err + } + p.tokenBuf = t + return t, nil +} + +func (p *Parser) undo() { + p.useBuf = true +} + +func (p *Parser) throwUnexpected(t *Token) { + p.errors <- fmt.Errorf( + "parsing error: unexpected token in line %d at %d: %q", + t.Line, + t.Col, + t.Value, + ) +} + +func throwUnexpectedEOF(p *Parser) ParseFn { + p.errors <- fmt.Errorf( + "parsing error: unexpected EOF", + ) + return nil +} + +func (p *Parser) emit() { + p.games <- p.game + p.game = newGame() +} + +func parseTagSection(p *Parser) ParseFn { + for { + // grab next token + t, err := p.next() + // bail out on error + if err != nil { + p.errors <- err + return nil + } + // handle for EOF + if t == nil || t.Type == TokenEOF { + p.emit() + return nil + } + switch t.Type { + case TokenNewline, TokenWhitespace: + // noop + case TokenBracketLeft: + return parseTag + case TokenSymbol: + p.undo() + return parseMovetext + default: + p.throwUnexpected(t) + return nil + } + } +} + +func parseTag(p *Parser) ParseFn { + tag := game.Tag{} +findSymbol: + for { + t, err := p.next() + if err != nil { + p.errors <- err + return nil + } + if t == nil || t.Type == TokenEOF { + return throwUnexpectedEOF + } + switch t.Type { + case TokenNewline, TokenWhitespace, TokenComment: + // noop + case TokenSymbol: + tag.Key = t.Value + break findSymbol + default: + p.throwUnexpected(t) + return nil + } + } + +findValue: + for { + t, err := p.next() + if err != nil { + p.errors <- err + return nil + } + if t == nil || t.Type == TokenEOF { + return throwUnexpectedEOF + } + switch t.Type { + case TokenNewline, TokenWhitespace, TokenComment: + // noop + case TokenString: + tag.Value = t.Value + break findValue + default: + p.throwUnexpected(t) + return nil + } + } + + for { + t, err := p.next() + if err != nil { + p.errors <- err + return nil + } + if t == nil || t.Type == TokenEOF { + return throwUnexpectedEOF + } + switch t.Type { + case TokenNewline, TokenWhitespace, TokenComment: + // noop + case TokenBracketRight: + p.game.Tags = append(p.game.Tags, tag) + return parseTagSection + default: + p.throwUnexpected(t) + return nil + } + } +} + +func parseMovetext(p *Parser) ParseFn { + isTermination := func(s string) bool { + switch s { + case "0-1", "1-0", "1/2", "1/2-1/2", "*": + return true + default: + return false + } + } + for { + t, err := p.next() + if err != nil { + p.errors <- err + return nil + } + if t == nil || t.Type == TokenEOF { + p.undo() + return parseTagSection + } + switch t.Type { + case TokenNewline, TokenWhitespace, TokenComment: + // noop + case TokenSymbol: + if strings.Contains(t.Value, ".") { + continue + } + if !isTermination(t.Value) { + p.game.Moves = append(p.game.Moves, t.Value) + } + case TokenBracketLeft: + p.emit() + p.undo() + return parseTagSection + default: + p.throwUnexpected(t) + return nil + } + } +}