aboutsummaryrefslogtreecommitdiff
path: root/lexer.go
blob: db828bbe324aec8d6d471f77fac2e2a593167fbc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
package bechars

import (
	"src.userspace.com.au/felix/lexer"
)

const (
	_ lexer.TokenType = iota
	tBREStart
	tBREEnd
	tRangeStart
	tRangeDash
	tRangeEnd
	tCharacter
	tClass
	tNot
)

func startState(l *lexer.Lexer) lexer.StateFunc {
	l.SkipWhitespace()
	r := l.Next()
	if r != '[' {
		return l.Error("expecting [")
	}
	l.Emit(tBREStart)
	return beFirstState
}

// Handle the first characters of the BRE.
func beFirstState(l *lexer.Lexer) lexer.StateFunc {
	switch l.Next() {
	case '^':
		l.Emit(tNot)
		// - or ] After ^ is literal
		if l.Accept("-]") {
			l.Emit(tCharacter)
		}
		return beState
	case ']':
		// Check for empty BRE
		if l.Peek() == lexer.EOFRune {
			l.Emit(tBREEnd)
			return nil
		}
		l.Emit(tCharacter)
		return beState
	case '-':
		l.Emit(tCharacter)
		return beState
	default:
		l.Backup()
		return beState
	}
}

func beState(l *lexer.Lexer) lexer.StateFunc {
	switch r := l.Next(); {
	case r == ']':
		l.Emit(tBREEnd)
		return nil
	case r == ':':
		return classState
	case r == '-':
		return l.Error("parse error, unexpected '-'")
	case r == '\\':
		if l.Accept("ux") {
			return unicodeState
		}
		l.Emit(tCharacter)
		return beState
	case r == lexer.EOFRune:
		return l.Error("parse error, unexpected EOF")
	default:
		if l.Peek() == '-' {
			l.Emit(tRangeStart)
			l.Accept("-")
			l.Emit(tRangeDash)
			if l.Accept("-][^") {
				return l.Error("parse error, invalid range end")
			}
			l.Next()
			l.Emit(tRangeEnd)
		} else {
			l.Emit(tCharacter)
		}
		return beState
	}
}

func classState(l *lexer.Lexer) lexer.StateFunc {
	// TODO
	l.AcceptRun("abcdefghijklmnopqrstuvwxyz")
	if !l.Accept(":") {
		return l.Error("parse error, expecting ':'")
	}
	l.Emit(tClass)
	return beState
}

func unicodeState(l *lexer.Lexer) lexer.StateFunc {
	// TODO valid code point
	if n := l.AcceptRun("0123456789abcdef"); n > 0 {
		l.Emit(tCharacter)
	}
	return beState
}