package pgn import ( "bufio" "bytes" "fmt" ) // EOF signals end of input const EOF = -1 // TokenType defines the type of a token type TokenType uint64 // The following TokenTypes exist: const ( TokenInvalid TokenType = iota TokenEOF TokenDiv TokenNewline TokenWhitespace TokenComment TokenString TokenBracketLeft TokenBracketRight TokenParenthesisLeft TokenParenthesisRight TokenAngleLeft TokenAngleRight TokenSymbol TokenEscapeMechanism ) var tokenName = map[TokenType]string{ TokenInvalid: "INVALID", TokenEOF: "EOF", TokenDiv: "Div", TokenNewline: "Newline", TokenWhitespace: "Whitespace", TokenComment: "Comment", TokenString: "String", TokenBracketLeft: "BracketLeft", TokenBracketRight: "BracketRight", TokenParenthesisLeft: "ParenthesisLeft", TokenParenthesisRight: "ParenthesisRight", TokenAngleLeft: "AngleLeft", TokenAngleRight: "AngleRight", TokenSymbol: "Symbol", } // Token represents a PGN token. type Token struct { Line int Col int Type TokenType Value string } func (t Token) String() string { return fmt.Sprintf( "", tokenName[t.Type], t.Line, t.Col, t.Value, ) } // LexFn defines the signature of a lexer function. type LexFn func(*Lexer) LexFn // Lexer implements a PGN tokenizer. type Lexer struct { input *bufio.Reader output chan *Token err chan error line int start int pos int } // NewLexer returns an initialized Lexer. func NewLexer(input *bufio.Reader) *Lexer { l := &Lexer{ input: input, output: make(chan *Token, 1), err: make(chan error, 1), line: 1, start: 1, pos: 1, } go l.run() return l } func (l *Lexer) run() *Lexer { go func() { defer close(l.output) defer close(l.err) for fn := lexMain; fn != nil; { fn = fn(l) } }() return l } // Next returns the next Token from the input stream or EOF once the input stream has ended. func (l *Lexer) Next() (*Token, error) { select { case err := <-l.err: return nil, err case t := <-l.output: return t, nil } } // All returns all parsed tokens as []*Token. func (l *Lexer) All() ([]*Token, error) { out := []*Token{} for { t, err := l.Next() if err != nil { return out, err } if t == nil || t.Type == TokenEOF { return out, nil } out = append(out, t) } } func (l *Lexer) next() rune { r, _, err := l.input.ReadRune() if err != nil { return EOF } l.pos++ return r } func (l *Lexer) undo() { l.input.UnreadRune() l.pos-- } func (l *Lexer) peek() rune { defer l.undo() return l.next() } func (l *Lexer) newToken(t TokenType, v string) *Token { return &Token{ Line: l.line, Col: l.start, Type: t, Value: v, } } func (l *Lexer) emit(t *Token) { l.output <- t l.start = l.pos } func (l *Lexer) emitUnexpected(r rune) LexFn { l.err <- fmt.Errorf( "unexpected character in line %d at col %d: %v", l.line, l.pos, r, ) return nil } //////////////// //// LEXERS //// //////////////// func lexMain(l *Lexer) LexFn { for { r := l.next() switch r { case EOF: l.emit(l.newToken(TokenEOF, "EOF")) return nil case '\n': return lexNewline case ' ': return lexWhitespace case '%': if l.pos == 2 { return lexEscape } return l.emitUnexpected(r) case ';': return lexCommentUntilNewline case '{': return lexComment case '[': l.emit(l.newToken(TokenBracketLeft, "[")) case ']': l.emit(l.newToken(TokenBracketRight, "]")) case '"': return lexString default: l.undo() return lexSymbol } } } func lexNewline(l *Lexer) LexFn { out := bytes.NewBuffer(make([]byte, 0, 255)) out.WriteRune('\n') for { r := l.next() switch r { case '\n': out.WriteRune('\n') default: l.undo() l.emit(l.newToken(TokenNewline, out.String())) l.line += out.Len() l.start = 1 l.pos = 1 return lexMain } } } func lexWhitespace(l *Lexer) LexFn { out := bytes.NewBuffer(make([]byte, 0, 255)) out.WriteRune(' ') for { r := l.next() switch r { case ' ': out.WriteRune(' ') default: l.undo() l.emit(l.newToken(TokenWhitespace, out.String())) return lexMain } } } func lexEscape(l *Lexer) LexFn { for { r := l.next() switch r { case EOF, '\n': return lexMain } } } func lexCommentUntilNewline(l *Lexer) LexFn { out := bytes.NewBuffer(make([]byte, 0, 8192)) for { r := l.next() switch r { case EOF, '\n': if out.Len() > 0 { l.emit(l.newToken(TokenComment, out.String())) } return lexMain default: _, err := out.WriteRune(r) if err != nil { panic(err) } } } } func lexComment(l *Lexer) LexFn { out := bytes.NewBuffer(make([]byte, 0, 8192)) for { r := l.next() switch r { case EOF: l.emit(l.newToken(TokenComment, out.String())) return lexMain case '\\': out.WriteRune(l.next()) case '}': l.emit(l.newToken(TokenComment, out.String())) return lexMain default: out.WriteRune(r) } } } func lexString(l *Lexer) LexFn { out := bytes.NewBuffer(make([]byte, 0, 4096)) for { r := l.next() switch r { case EOF: return l.emitUnexpected(r) case '\\': out.WriteRune(l.next()) case '"': l.emit(l.newToken(TokenString, out.String())) return lexMain default: out.WriteRune(r) } } } func lexSymbol(l *Lexer) LexFn { out := bytes.NewBuffer(make([]byte, 0, 255)) for { r := l.next() switch r { case EOF: l.emit(l.newToken(TokenSymbol, out.String())) l.undo() return lexMain case '\n', ' ', '"': l.undo() l.emit(l.newToken(TokenSymbol, out.String())) return lexMain default: out.WriteRune(r) } } }