1 files changed, 265 insertions, 0 deletions
diff --git a/vendor/github.com/zenazn/goji/web/bytecode_compiler.go b/vendor/github.com/zenazn/goji/web/bytecode_compiler.go
new file mode 100644
index 0000000..b6f52b1
--- /dev/null
+++ b/vendor/github.com/zenazn/goji/web/bytecode_compiler.go
@@ -0,0 +1,265 @@
+package web
+
+/*
+This file implements a fast router by encoding a list of routes first into a
+pseudo-trie, then encoding that pseudo-trie into a state machine realized as
+a routing bytecode.
+
+The most interesting part of this router is not its speed (it is quite fast),
+but the guarantees it provides. In a naive router, routes are examined one after
+another until a match is found, and this is the programming model we want to
+support. For any given request ("GET /hello/carl"), there is a list of
+"plausible" routes: routes which match the method ("GET"), and which have a
+prefix that is a prefix of the requested path ("/" and "/hello/", for instance,
+but not "/foobar"). Patterns also have some amount of arbitrary code associated
+with them, which tells us whether or not the route matched. Just like the naive
+router, our goal is to call each plausible pattern, in the order they were
+added, until we find one that matches. The "fast" part here is being smart about
+which non-plausible routes we can skip.
+
+First, we sort routes using a pairwise comparison function: sorting occurs as
+normal on the prefixes, with the caveat that a route may not be moved past a
+route that might also match the same string. Among other things, this means
+we're forced to use particularly dumb sorting algorithms, but it only has to
+happen once, and there probably aren't even that many routes to begin with. This
+logic appears inline in the router's handle() function.
+
+We then build a pseudo-trie from the sorted list of routes. It's not quite a
+normal trie because there are certain routes we cannot reorder around other
+routes (since we're providing identical semantics to the naive router), but it's
+close enough and the basic idea is the same.
+
+Finally, we lower this psuedo-trie from its tree representation to a state
+machine bytecode. The bytecode is pretty simple: it contains up to three bytes,
+a choice of a bunch of flags, and an index. The state machine is pretty simple:
+if the bytes match the next few bytes after the cursor, the instruction matches,
+and the state machine advances to the next instruction. If it does not match, it
+jumps to the instruction at the index. Various flags modify this basic behavior,
+the documentation for which can be found below.
+
+The thing we're optimizing for here over pretty much everything else is memory
+locality. We make an effort to lay out both the trie child selection logic and
+the matching of long strings consecutively in memory, making both operations
+very cheap. In fact, our matching logic isn't particularly asymptotically good,
+but in practice the benefits of memory locality outweigh just about everything
+else.
+
+Unfortunately, the code implementing all of this is pretty bad (both inefficient
+and hard to read). Maybe someday I'll come and take a second pass at it.
+*/
+type state struct {
+	mode smMode
+	bs   [3]byte
+	i    int32
+}
+type stateMachine []state
+
+type smMode uint8
+
+// Many combinations of smModes don't make sense, but since this is interal to
+// the library I don't feel like documenting them.
+const (
+	// The two low bits of the mode are used as a length of how many bytes
+	// of bs are used. If the length is 0, the node is treated as a
+	// wildcard.
+	smLengthMask smMode = 3
+)
+
+const (
+	// Jump to the given index on a match. Ordinarily, the state machine
+	// will jump to the state given by the index if the characters do not
+	// match.
+	smJumpOnMatch smMode = 4 << iota
+	// The index is the index of a route to try. If running the route fails,
+	// the state machine advances by one.
+	smRoute
+	// Reset the state machine's cursor into the input string to the state's
+	// index value.
+	smSetCursor
+	// If this bit is set, the machine transitions into a non-accepting
+	// state if it matches.
+	smFail
+)
+
+type trie struct {
+	prefix   string
+	children []trieSegment
+}
+
+// A trie segment is a route matching this point (or -1), combined with a list
+// of trie children that follow that route.
+type trieSegment struct {
+	route    int
+	children []trie
+}
+
+func buildTrie(routes []route, dp, dr int) trie {
+	var t trie
+	ts := trieSegment{-1, nil}
+	for i, r := range routes {
+		if len(r.prefix) != dp {
+			continue
+		}
+
+		if i == 0 {
+			ts.route = 0
+		} else {
+			subroutes := routes[ts.route+1 : i]
+			ts.children = buildTrieSegment(subroutes, dp, dr+ts.route+1)
+			t.children = append(t.children, ts)
+			ts = trieSegment{i, nil}
+		}
+	}
+
+	// This could be a little DRYer...
+	subroutes := routes[ts.route+1:]
+	ts.children = buildTrieSegment(subroutes, dp, dr+ts.route+1)
+	t.children = append(t.children, ts)
+
+	for i := range t.children {
+		if t.children[i].route != -1 {
+			t.children[i].route += dr
+		}
+	}
+
+	return t
+}
+
+func commonPrefix(s1, s2 string) string {
+	if len(s1) > len(s2) {
+		return commonPrefix(s2, s1)
+	}
+	for i := 0; i < len(s1); i++ {
+		if s1[i] != s2[i] {
+			return s1[:i]
+		}
+	}
+	return s1
+}
+
+func buildTrieSegment(routes []route, dp, dr int) []trie {
+	if len(routes) == 0 {
+		return nil
+	}
+	var tries []trie
+
+	start := 0
+	p := routes[0].prefix[dp:]
+	for i := 1; i < len(routes); i++ {
+		ip := routes[i].prefix[dp:]
+		cp := commonPrefix(p, ip)
+		if len(cp) == 0 {
+			t := buildTrie(routes[start:i], dp+len(p), dr+start)
+			t.prefix = p
+			tries = append(tries, t)
+			start = i
+			p = ip
+		} else {
+			p = cp
+		}
+	}
+
+	t := buildTrie(routes[start:], dp+len(p), dr+start)
+	t.prefix = p
+	return append(tries, t)
+}
+
+// This is a bit confusing, since the encode method on a trie deals exclusively
+// with trieSegments (i.e., its children), and vice versa.
+//
+// These methods are also hideously inefficient, both in terms of memory usage
+// and algorithmic complexity. If it ever becomes a problem, maybe we can do
+// something smarter than stupid O(N^2) appends, but to be honest, I bet N is
+// small (it almost always is :P) and we only do it once at boot anyways.
+
+func (t trie) encode(dp, off int) stateMachine {
+	ms := make([]stateMachine, len(t.children))
+	subs := make([]stateMachine, len(t.children))
+	var l, msl, subl int
+
+	for i, ts := range t.children {
+		ms[i], subs[i] = ts.encode(dp, 0)
+		msl += len(ms[i])
+		l += len(ms[i]) + len(subs[i])
+	}
+
+	l++
+
+	m := make(stateMachine, 0, l)
+	for i, mm := range ms {
+		for j := range mm {
+			if mm[j].mode&(smRoute|smSetCursor) != 0 {
+				continue
+			}
+
+			mm[j].i += int32(off + msl + subl + 1)
+		}
+		m = append(m, mm...)
+		subl += len(subs[i])
+	}
+
+	m = append(m, state{mode: smJumpOnMatch, i: -1})
+
+	msl = 0
+	for i, sub := range subs {
+		msl += len(ms[i])
+		for j := range sub {
+			if sub[j].mode&(smRoute|smSetCursor) != 0 {
+				continue
+			}
+			if sub[j].i == -1 {
+				sub[j].i = int32(off + msl)
+			} else {
+				sub[j].i += int32(off + len(m))
+			}
+		}
+		m = append(m, sub...)
+	}
+
+	return m
+}
+
+func (ts trieSegment) encode(dp, off int) (me stateMachine, sub stateMachine) {
+	o := 1
+	if ts.route != -1 {
+		o++
+	}
+	me = make(stateMachine, len(ts.children)+o)
+
+	me[0] = state{mode: smSetCursor, i: int32(dp)}
+	if ts.route != -1 {
+		me[1] = state{mode: smRoute, i: int32(ts.route)}
+	}
+
+	for i, t := range ts.children {
+		p := t.prefix
+
+		bc := copy(me[i+o].bs[:], p)
+		me[i+o].mode = smMode(bc) | smJumpOnMatch
+		me[i+o].i = int32(off + len(sub))
+
+		for len(p) > bc {
+			var bs [3]byte
+			p = p[bc:]
+			bc = copy(bs[:], p)
+			sub = append(sub, state{bs: bs, mode: smMode(bc), i: -1})
+		}
+
+		sub = append(sub, t.encode(dp+len(t.prefix), off+len(sub))...)
+	}
+	return
+}
+
+func compile(routes []route) stateMachine {
+	if len(routes) == 0 {
+		return nil
+	}
+	t := buildTrie(routes, 0, 0)
+	m := t.encode(0, 0)
+	for i := range m {
+		if m[i].i == -1 {
+			m[i].mode = m[i].mode | smFail
+		}
+	}
+	return m
+}