Add extra tests and line counting

author: Felix Hanley <felix@userspace.com.au> 2018-11-20 13:24:55 +0000
committer: Felix Hanley <felix@userspace.com.au> 2018-11-20 13:24:55 +0000
commit: 49860f0960cb4f61567b308501c355c6e1cbd2a2 (patch)
tree: a17bebe05ff6488c1b3994274b3ea34cce5e7c17 /lexer.go
parent: b36cae1e6b724ad75c98a6b69a6235686d910633 (diff)
download: lexer-49860f0960cb4f61567b308501c355c6e1cbd2a2.tar.gz
lexer-49860f0960cb4f61567b308501c355c6e1cbd2a2.tar.bz2
1 files changed, 43 insertions, 23 deletions
diff --git a/lexer.go b/lexer.go
index 71ca1f4..1cba323 100644
--- a/lexer.go
+++ b/lexer.go
@@ -1,22 +1,31 @@
 package lexer
 
 import (
+	"bytes"
 	"fmt"
 	"strings"
 	"unicode"
 	"unicode/utf8"
 )
 
+// StateFunc captures the movement from one state to the next.
 type StateFunc func(*Lexer) StateFunc
 
+// TokenType identifies the tokens emitted.
 type TokenType int
 
 const (
-	EOFRune    rune      = -1
+	// EOFRune is a convenience for EOF
+	EOFRune rune = -1
+	// ErrorToken is returned on error
 	ErrorToken TokenType = -1
-	EOFToken   TokenType = 0
+	// EOFToken is return on EOF
+	EOFToken TokenType = 0
 )
 
+var lineSep = []byte{'\n'}
+
+// Token is returned by the lexer.
 type Token struct {
 	Type     TokenType
 	Value    string
@@ -24,13 +33,16 @@ type Token struct {
 	Line     int
 }
 
+// String implements Stringer
 func (t Token) String() string {
 	return fmt.Sprintf("[%d] %s", t.Type, t.Value)
 }
 
+// Lexer represents the lexer machine.
 type Lexer struct {
 	source     string
 	start      int
+	line       int
 	position   int
 	lastWidth  int
 	startState StateFunc
@@ -44,6 +56,7 @@ func New(src string, start StateFunc) *Lexer {
 		source:     src,
 		startState: start,
 		start:      0,
+		line:       1,
 		position:   0,
 		history:    newStack(),
 	}
@@ -60,6 +73,7 @@ func (l *Lexer) Start() {
 	go l.run()
 }
 
+// StartSync starts the lexer synchronously.
 func (l *Lexer) StartSync() {
 	// Take half the string length as a buffer size.
 	buffSize := len(l.source) / 2
@@ -75,7 +89,6 @@ func (l *Lexer) run() {
 	for state != nil {
 		state = state(l)
 	}
-	//fmt.Println("nil state")
 	close(l.tokens)
 }
 
@@ -91,18 +104,42 @@ func (l *Lexer) Emit(t TokenType) {
 		Type:     t,
 		Value:    l.Current(),
 		Position: l.position,
+		Line:     l.line,
 	}
-	//fmt.Printf("emitting: %v\n", tok)
 	l.tokens <- tok
+	l.checkLines()
 	l.start = l.position
 	l.history.clear()
 }
 
+func (l *Lexer) checkLines() {
+	val := l.Current()
+	l.line += bytes.Count([]byte(val), lineSep)
+}
+
+// Next pulls the next rune from the Lexer and returns it, moving the position
+// forward in the source.
+func (l *Lexer) Next() rune {
+	var r rune
+	var s int
+	str := l.source[l.position:]
+	if len(str) == 0 {
+		r, s = EOFRune, 0
+	} else {
+		r, s = utf8.DecodeRuneInString(str)
+	}
+	l.position += s
+	l.history.push(r)
+
+	return r
+}
+
 // Ignore clears the history stack and then sets the current beginning position
 // to the current position in the source which effectively ignores the section
 // of the source being analyzed.
 func (l *Lexer) Ignore() {
 	l.history.clear()
+	l.checkLines()
 	l.start = l.position
 }
 
@@ -129,23 +166,6 @@ func (l *Lexer) Backup() {
 	}
 }
 
-// Next pulls the next rune from the Lexer and returns it, moving the position
-// forward in the source.
-func (l *Lexer) Next() rune {
-	var r rune
-	var s int
-	str := l.source[l.position:]
-	if len(str) == 0 {
-		r, s = EOFRune, 0
-	} else {
-		r, s = utf8.DecodeRuneInString(str)
-	}
-	l.position += s
-	l.history.push(r)
-
-	return r
-}
-
 // Accept receives a string containing all acceptable strings and will contine
 // over each consecutive character in the source until a token not in the given
 // string is encountered. This should be used to quickly pull token parts.
@@ -166,6 +186,7 @@ func (l *Lexer) AcceptRun(valid string) (n int) {
 	return n
 }
 
+// SkipWhitespace continues over all unicode whitespace.
 func (l *Lexer) SkipWhitespace() {
 	for {
 		r := l.Next()
@@ -185,13 +206,12 @@ func (l *Lexer) SkipWhitespace() {
 // NextToken returns the next token from the lexer and done
 func (l *Lexer) NextToken() (*Token, bool) {
 	if tok, ok := <-l.tokens; ok {
-		//fmt.Printf("next token: %v, ok: %t\n", tok, ok)
 		return &tok, false
 	}
 	return nil, true
 }
 
-func (l *Lexer) ErrorState(format string, args ...interface{}) StateFunc {
+func (l *Lexer) Error(format string, args ...interface{}) StateFunc {
 	l.tokens <- Token{
 		Type:     ErrorToken,
 		Value:    fmt.Sprintf(format, args...),
author	Felix Hanley <felix@userspace.com.au>	2018-11-20 13:24:55 +0000
committer	Felix Hanley <felix@userspace.com.au>	2018-11-20 13:24:55 +0000
commit	49860f0960cb4f61567b308501c355c6e1cbd2a2 (patch)
tree	a17bebe05ff6488c1b3994274b3ea34cce5e7c17 /lexer.go
parent	b36cae1e6b724ad75c98a6b69a6235686d910633 (diff)
download	lexer-49860f0960cb4f61567b308501c355c6e1cbd2a2.tar.gz lexer-49860f0960cb4f61567b308501c355c6e1cbd2a2.tar.bz2