aboutsummaryrefslogtreecommitdiff
path: root/lexer.go
diff options
context:
space:
mode:
authorFelix Hanley <felix@userspace.com.au>2019-08-30 14:30:28 +0000
committerFelix Hanley <felix@userspace.com.au>2019-08-30 14:30:28 +0000
commit0032516676b486b395086b40641a34f2cafeca99 (patch)
treef2d6cabe8248a12a0cad1fda61d36af93b32c359 /lexer.go
downloadbechars-0032516676b486b395086b40641a34f2cafeca99.tar.gz
bechars-0032516676b486b395086b40641a34f2cafeca99.tar.bz2
Generation of all but unicode tested
Diffstat (limited to 'lexer.go')
-rw-r--r--lexer.go106
1 files changed, 106 insertions, 0 deletions
diff --git a/lexer.go b/lexer.go
new file mode 100644
index 0000000..a8383a9
--- /dev/null
+++ b/lexer.go
@@ -0,0 +1,106 @@
+package brechars
+
+import (
+ "src.userspace.com.au/felix/lexer"
+)
+
+const (
+ _ lexer.TokenType = iota
+ tBREStart
+ tBREEnd
+ tRangeStart
+ tRangeDash
+ tRangeEnd
+ tCharacter
+ tClass
+ tNot
+)
+
+func startState(l *lexer.Lexer) lexer.StateFunc {
+ l.SkipWhitespace()
+ r := l.Next()
+ if r != '[' {
+ return l.Error("expecting [")
+ }
+ l.Emit(tBREStart)
+ return breFirstState
+}
+
+// Handle the first characters of the BRE.
+func breFirstState(l *lexer.Lexer) lexer.StateFunc {
+ switch l.Next() {
+ case '^':
+ l.Emit(tNot)
+ // - or ] After ^ is literal
+ if l.Accept("-]") {
+ l.Emit(tCharacter)
+ }
+ return breState
+ case ']':
+ // Check for empty BRE
+ if l.Peek() == lexer.EOFRune {
+ l.Emit(tBREEnd)
+ return nil
+ }
+ l.Emit(tCharacter)
+ return breState
+ case '-':
+ l.Emit(tCharacter)
+ return breState
+ default:
+ l.Backup()
+ return breState
+ }
+}
+
+func breState(l *lexer.Lexer) lexer.StateFunc {
+ switch r := l.Next(); {
+ case r == ']':
+ l.Emit(tBREEnd)
+ return nil
+ case r == ':':
+ return classState
+ case r == '-':
+ return l.Error("parse error, unexpected '-'")
+ case r == '\\':
+ if l.Accept("ux") {
+ return unicodeState
+ }
+ l.Emit(tCharacter)
+ return breState
+ case r == lexer.EOFRune:
+ return l.Error("parse error, unexpected EOF")
+ default:
+ if l.Peek() == '-' {
+ l.Emit(tRangeStart)
+ l.Accept("-")
+ l.Emit(tRangeDash)
+ if l.Accept("-][^") {
+ return l.Error("parse error, invalid range end")
+ }
+ l.Next()
+ l.Emit(tRangeEnd)
+ } else {
+ l.Emit(tCharacter)
+ }
+ return breState
+ }
+}
+
+func classState(l *lexer.Lexer) lexer.StateFunc {
+ // TODO
+ l.AcceptRun("abcdefghijklmnopqrstuvwxyz")
+ if !l.Accept(":") {
+ return l.Error("parse error, expecting ':'")
+ }
+ l.Emit(tClass)
+ return breState
+}
+
+func unicodeState(l *lexer.Lexer) lexer.StateFunc {
+ // TODO valid code point
+ if n := l.AcceptRun("0123456789abcdef"); n > 0 {
+ l.Emit(tCharacter)
+ }
+ return breState
+}