package move import ( "bufio" "bytes" "fmt" "strings" "code.c-base.org/gochess/libchess/pkg/board" ) var tokenFactory = make(chan *Token, 128) func init() { go func() { for { tokenFactory <- &Token{} } }() } // TokenType defines the type of a token type TokenType uint8 // The following TokenTypes exist const ( TokenError TokenType = iota TokenEOF TokenPiece TokenFile TokenRank TokenCapture TokenSquare TokenCheck TokenMate TokenCastles ) // eof signals the end of a move const eof = -1 // Token represents a move token. type Token struct { Pos int // character column of this token Type TokenType // type (see above) Value string // literal value } // Lexer implements a lexer for tokenizing PGN formatted moves. type Lexer struct { input *bufio.Reader // buffered io for streaming the input tokens chan Token // output channel start int // starting position of the current token pos int // current scanning position buf *Token } // NewLexer returns an initialized Lexer. func NewLexer(input string) *Lexer { l := &Lexer{ input: bufio.NewReader(strings.NewReader(input)), start: 1, pos: 1, tokens: make(chan Token, 1), } go l.scan() return l } // NextToken returns the next token from the input string. func (l *Lexer) NextToken() Token { return <-l.tokens } // emit emits the given token to the output channel. func (l *Lexer) emit(t Token) { // When encountering a token of type TokenFile *[a-h]*, it needs to be buffered and compared to // the next token, which may be of type TokenRank *[1-8]* combining them into a token of type // TokenSquare. if l.buf == nil { // check for TokenFile and buffer it if t.Type == TokenFile { l.buf = &t } else { l.tokens <- t l.start = l.pos } } else { // grab the last token off the buffer prev := l.buf l.buf = nil // TokenFile followed by TokenRank combines to TokenSquare if t.Type == TokenRank { strSq := fmt.Sprintf("%s%s", prev.Value, t.Value) _, ok := board.StrToSquareMap[strSq] if !ok { // technically this should not be reached, but I'm handling it anyways, just in case l.tokens <- *prev l.tokens <- t } else { // emit TokenSquare instead of individual TokenFile & TokenRank l.tokens <- Token{ Pos: l.start, Type: TokenSquare, Value: strSq, } } } } } // next reads the next rune from the buffered input stream func (l *Lexer) next() rune { r, _, err := l.input.ReadRune() if err != nil { return eof } l.pos++ return r } func (l *Lexer) undo() { l.input.UnreadRune() l.pos-- } // newToken is a helper for easily initializing Tokens with the correct values. func (l *Lexer) newToken(tokType TokenType, v string) Token { t := <-tokenFactory t.Pos = l.start t.Type = tokType t.Value = v return *t } // scan scans for tokens and emits them to the output channel until the end of the input stream is // reached. func (l *Lexer) scan() { defer close(l.tokens) for { r := l.next() switch r { case eof: l.emit(l.newToken(TokenEOF, "eof")) return case 'O', '0': l.undo() m := lexCastles(l) if m == "" { l.emit(l.newToken(TokenError, m)) } else { l.emit(l.newToken(TokenCastles, m)) } case 'K', 'Q', 'B', 'N', 'R': l.emit(l.newToken(TokenPiece, string(r))) case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h': l.emit(l.newToken(TokenFile, string(r))) case '1', '2', '3', '4', '5', '6', '7', '8': l.emit(l.newToken(TokenRank, string(r))) case '+': l.emit(l.newToken(TokenCheck, string(r))) case '#': l.emit(l.newToken(TokenMate, string(r))) case 'x': l.emit(l.newToken(TokenCapture, string(r))) case '=': // noop default: l.emit(l.newToken(TokenError, string(r))) return } } } func lexCastles(l *Lexer) string { var ( buf = make([]byte, 0, 5) out = bytes.NewBuffer(buf) c = 0 ) for { r := l.next() switch { case c == 5: m := out.String() switch m { case "O-O", "0-0": return "O-O" case "O-O-O", "0-0-0": return "O-O-O" default: return "" } case r == 'O', r == '-': out.WriteRune(r) } c++ } }