diff options
| author | Felix Hanley <felix@userspace.com.au> | 2018-11-20 11:51:08 +0000 |
|---|---|---|
| committer | Felix Hanley <felix@userspace.com.au> | 2018-11-20 11:51:08 +0000 |
| commit | b36cae1e6b724ad75c98a6b69a6235686d910633 (patch) | |
| tree | a717a47e36a9cd8aec9fad7b9b04a205e7f9065b | |
| download | lexer-b36cae1e6b724ad75c98a6b69a6235686d910633.tar.gz lexer-b36cae1e6b724ad75c98a6b69a6235686d910633.tar.bz2 | |
Split lexer from parent project
| -rw-r--r-- | lexer.go | 201 | ||||
| -rw-r--r-- | stack.go | 38 |
2 files changed, 239 insertions, 0 deletions
diff --git a/lexer.go b/lexer.go new file mode 100644 index 0000000..71ca1f4 --- /dev/null +++ b/lexer.go @@ -0,0 +1,201 @@ +package lexer + +import ( + "fmt" + "strings" + "unicode" + "unicode/utf8" +) + +type StateFunc func(*Lexer) StateFunc + +type TokenType int + +const ( + EOFRune rune = -1 + ErrorToken TokenType = -1 + EOFToken TokenType = 0 +) + +type Token struct { + Type TokenType + Value string + Position int + Line int +} + +func (t Token) String() string { + return fmt.Sprintf("[%d] %s", t.Type, t.Value) +} + +type Lexer struct { + source string + start int + position int + lastWidth int + startState StateFunc + tokens chan Token + history stack +} + +// New creates a returns a lexer ready to parse the given source code. +func New(src string, start StateFunc) *Lexer { + return &Lexer{ + source: src, + startState: start, + start: 0, + position: 0, + history: newStack(), + } +} + +// Start begins executing the Lexer in an asynchronous manner (using a goroutine). +func (l *Lexer) Start() { + // Take half the string length as a buffer size. + buffSize := len(l.source) / 2 + if buffSize <= 0 { + buffSize = 1 + } + l.tokens = make(chan Token, buffSize) + go l.run() +} + +func (l *Lexer) StartSync() { + // Take half the string length as a buffer size. + buffSize := len(l.source) / 2 + if buffSize <= 0 { + buffSize = 1 + } + l.tokens = make(chan Token, buffSize) + l.run() +} + +func (l *Lexer) run() { + state := l.startState + for state != nil { + state = state(l) + } + //fmt.Println("nil state") + close(l.tokens) +} + +// Current returns the value being being analyzed at this moment. +func (l *Lexer) Current() string { + return l.source[l.start:l.position] +} + +// Emit will receive a token type and push a new token with the current analyzed +// value into the tokens channel. +func (l *Lexer) Emit(t TokenType) { + tok := Token{ + Type: t, + Value: l.Current(), + Position: l.position, + } + //fmt.Printf("emitting: %v\n", tok) + l.tokens <- tok + l.start = l.position + l.history.clear() +} + +// Ignore clears the history stack and then sets the current beginning position +// to the current position in the source which effectively ignores the section +// of the source being analyzed. +func (l *Lexer) Ignore() { + l.history.clear() + l.start = l.position +} + +// Peek performs a Next operation immediately followed by a Backup returning the +// peeked rune. +func (l *Lexer) Peek() rune { + r := l.Next() + l.Backup() + + return r +} + +// Backup will take the last rune read (if any) and history back. Backups can +// occur more than once per call to Next but you can never history past the +// last point a token was emitted. +func (l *Lexer) Backup() { + r := l.history.pop() + if r > EOFRune { + size := utf8.RuneLen(r) + l.position -= size + if l.position < l.start { + l.position = l.start + } + } +} + +// Next pulls the next rune from the Lexer and returns it, moving the position +// forward in the source. +func (l *Lexer) Next() rune { + var r rune + var s int + str := l.source[l.position:] + if len(str) == 0 { + r, s = EOFRune, 0 + } else { + r, s = utf8.DecodeRuneInString(str) + } + l.position += s + l.history.push(r) + + return r +} + +// Accept receives a string containing all acceptable strings and will contine +// over each consecutive character in the source until a token not in the given +// string is encountered. This should be used to quickly pull token parts. +func (l *Lexer) Accept(valid string) bool { + if strings.IndexRune(valid, l.Next()) >= 0 { + return true + } + l.Backup() // last next wasn't a match + return false +} + +// AcceptRun consumes a run of runes from the valid set. +func (l *Lexer) AcceptRun(valid string) (n int) { + for strings.IndexRune(valid, l.Next()) >= 0 { + n++ + } + l.Backup() // last next wasn't a match + return n +} + +func (l *Lexer) SkipWhitespace() { + for { + r := l.Next() + + if !unicode.IsSpace(r) { + l.Backup() + break + } + + if r == EOFRune { + l.Emit(EOFToken) + break + } + } +} + +// NextToken returns the next token from the lexer and done +func (l *Lexer) NextToken() (*Token, bool) { + if tok, ok := <-l.tokens; ok { + //fmt.Printf("next token: %v, ok: %t\n", tok, ok) + return &tok, false + } + return nil, true +} + +func (l *Lexer) ErrorState(format string, args ...interface{}) StateFunc { + l.tokens <- Token{ + Type: ErrorToken, + Value: fmt.Sprintf(format, args...), + Position: l.position, + } + return nil +} diff --git a/stack.go b/stack.go new file mode 100644 index 0000000..2ecc44d --- /dev/null +++ b/stack.go @@ -0,0 +1,38 @@ +package lexer + +type stackNode struct { + r rune + next *stackNode +} + +type stack struct { + start *stackNode +} + +func newStack() stack { + return stack{} +} + +func (s *stack) push(r rune) { + node := &stackNode{r: r} + if s.start == nil { + s.start = node + } else { + node.next = s.start + s.start = node + } +} + +func (s *stack) pop() rune { + if s.start == nil { + return EOFRune + } + + n := s.start + s.start = n.next + return n.r +} + +func (s *stack) clear() { + s.start = nil +} |
