diff options
| author | Felix Hanley <felix@userspace.com.au> | 2018-11-20 13:24:55 +0000 |
|---|---|---|
| committer | Felix Hanley <felix@userspace.com.au> | 2018-11-20 13:24:55 +0000 |
| commit | 49860f0960cb4f61567b308501c355c6e1cbd2a2 (patch) | |
| tree | a17bebe05ff6488c1b3994274b3ea34cce5e7c17 /lexer.go | |
| parent | b36cae1e6b724ad75c98a6b69a6235686d910633 (diff) | |
| download | lexer-49860f0960cb4f61567b308501c355c6e1cbd2a2.tar.gz lexer-49860f0960cb4f61567b308501c355c6e1cbd2a2.tar.bz2 | |
Add extra tests and line counting
Diffstat (limited to 'lexer.go')
| -rw-r--r-- | lexer.go | 66 |
1 files changed, 43 insertions, 23 deletions
@@ -1,22 +1,31 @@ package lexer import ( + "bytes" "fmt" "strings" "unicode" "unicode/utf8" ) +// StateFunc captures the movement from one state to the next. type StateFunc func(*Lexer) StateFunc +// TokenType identifies the tokens emitted. type TokenType int const ( - EOFRune rune = -1 + // EOFRune is a convenience for EOF + EOFRune rune = -1 + // ErrorToken is returned on error ErrorToken TokenType = -1 - EOFToken TokenType = 0 + // EOFToken is return on EOF + EOFToken TokenType = 0 ) +var lineSep = []byte{'\n'} + +// Token is returned by the lexer. type Token struct { Type TokenType Value string @@ -24,13 +33,16 @@ type Token struct { Line int } +// String implements Stringer func (t Token) String() string { return fmt.Sprintf("[%d] %s", t.Type, t.Value) } +// Lexer represents the lexer machine. type Lexer struct { source string start int + line int position int lastWidth int startState StateFunc @@ -44,6 +56,7 @@ func New(src string, start StateFunc) *Lexer { source: src, startState: start, start: 0, + line: 1, position: 0, history: newStack(), } @@ -60,6 +73,7 @@ func (l *Lexer) Start() { go l.run() } +// StartSync starts the lexer synchronously. func (l *Lexer) StartSync() { // Take half the string length as a buffer size. buffSize := len(l.source) / 2 @@ -75,7 +89,6 @@ func (l *Lexer) run() { for state != nil { state = state(l) } - //fmt.Println("nil state") close(l.tokens) } @@ -91,18 +104,42 @@ func (l *Lexer) Emit(t TokenType) { Type: t, Value: l.Current(), Position: l.position, + Line: l.line, } - //fmt.Printf("emitting: %v\n", tok) l.tokens <- tok + l.checkLines() l.start = l.position l.history.clear() } +func (l *Lexer) checkLines() { + val := l.Current() + l.line += bytes.Count([]byte(val), lineSep) +} + +// Next pulls the next rune from the Lexer and returns it, moving the position +// forward in the source. +func (l *Lexer) Next() rune { + var r rune + var s int + str := l.source[l.position:] + if len(str) == 0 { + r, s = EOFRune, 0 + } else { + r, s = utf8.DecodeRuneInString(str) + } + l.position += s + l.history.push(r) + + return r +} + // Ignore clears the history stack and then sets the current beginning position // to the current position in the source which effectively ignores the section // of the source being analyzed. func (l *Lexer) Ignore() { l.history.clear() + l.checkLines() l.start = l.position } @@ -129,23 +166,6 @@ func (l *Lexer) Backup() { } } -// Next pulls the next rune from the Lexer and returns it, moving the position -// forward in the source. -func (l *Lexer) Next() rune { - var r rune - var s int - str := l.source[l.position:] - if len(str) == 0 { - r, s = EOFRune, 0 - } else { - r, s = utf8.DecodeRuneInString(str) - } - l.position += s - l.history.push(r) - - return r -} - // Accept receives a string containing all acceptable strings and will contine // over each consecutive character in the source until a token not in the given // string is encountered. This should be used to quickly pull token parts. @@ -166,6 +186,7 @@ func (l *Lexer) AcceptRun(valid string) (n int) { return n } +// SkipWhitespace continues over all unicode whitespace. func (l *Lexer) SkipWhitespace() { for { r := l.Next() @@ -185,13 +206,12 @@ func (l *Lexer) SkipWhitespace() { // NextToken returns the next token from the lexer and done func (l *Lexer) NextToken() (*Token, bool) { if tok, ok := <-l.tokens; ok { - //fmt.Printf("next token: %v, ok: %t\n", tok, ok) return &tok, false } return nil, true } -func (l *Lexer) ErrorState(format string, args ...interface{}) StateFunc { +func (l *Lexer) Error(format string, args ...interface{}) StateFunc { l.tokens <- Token{ Type: ErrorToken, Value: fmt.Sprintf(format, args...), |
