summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFelix Hanley <felix@userspace.com.au>2018-11-16 06:35:28 +0000
committerFelix Hanley <felix@userspace.com.au>2018-11-16 06:35:28 +0000
commit44da796e192961b614e3540c4c1ec52f4bc0a290 (patch)
treec0dcc69363c401d123bfdba0662d990d6804df77
parentd6882bd9403c588415c1906a7015d16e92aa1ad3 (diff)
downloadquery-44da796e192961b614e3540c4c1ec52f4bc0a290.tar.gz
query-44da796e192961b614e3540c4c1ec52f4bc0a290.tar.bz2
Add start of jsonpath parser
-rw-r--r--Makefile21
-rw-r--r--jsonpath/lexer.go236
-rw-r--r--jsonpath/lexer_test.go172
-rw-r--r--jsonpath/nodes.go68
-rw-r--r--jsonpath/parse_test.go39
-rw-r--r--jsonpath/parser.go170
-rw-r--r--jsonpath/selector.go55
7 files changed, 761 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..77601b5
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,21 @@
+
+pkgs := $(shell go list ./...)
+
+.PHONY: help lint test clean
+
+ifdef GOPATH
+GO111MODULE=on
+endif
+
+test: ## Run tests with coverage
+ go test -short -cover -coverprofile coverage.out $(pkgs)
+ go tool cover -html=coverage.out -o coverage.html
+
+lint:
+ golint $(pkgs)
+
+clean: ## Clean all test files
+ rm -rf coverage*
+
+help: ## This help
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) |sort |awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
diff --git a/jsonpath/lexer.go b/jsonpath/lexer.go
new file mode 100644
index 0000000..04fc87a
--- /dev/null
+++ b/jsonpath/lexer.go
@@ -0,0 +1,236 @@
+package jsonpath
+
+import (
+ //"fmt"
+ "strings"
+
+ "src.userspace.com.au/query/lexer"
+)
+
+const (
+ _ lexer.TokenType = iota
+
+ TAbsolute // $
+ TCurrent // @
+ TChildDot // .
+ TRecursive // ..
+ TChildStart // [
+ TChildEnd // ]
+ TQuotedName // ' or "
+ TName // A word-like
+ TWildcard // *
+ TFilterStart // ?(
+ TFilterEnd // )
+ TPredicateStart // [
+ TPredicateEnd // ]
+ TNumber // [-]0-9 and hex also
+ TRange // :
+ TIndex // [234]
+ TUnion // [,]
+ TSlice // [start,end,step]
+ TScriptStart // (
+ TScriptEnd // )
+)
+
+// helpful for debugging
+var tokenNames = map[lexer.TokenType]string{
+ lexer.ErrorToken: "TError",
+ lexer.EOFToken: "EOF",
+ TAbsolute: "TAbsolute",
+ TCurrent: "TCurrent",
+ TChildDot: "TChildDot",
+ TRecursive: "TRecursive",
+ TChildStart: "TChildStart",
+ TChildEnd: "TChildEnd",
+ TQuotedName: "TQuoteName",
+ TName: "TName",
+ TWildcard: "TWildcard",
+ TFilterStart: "TFilterStart",
+ TFilterEnd: "TFilterEnd",
+ TPredicateStart: "TPredicateStart",
+ TPredicateEnd: "TPredicateEnd",
+ TNumber: "TNumber",
+ TRange: "TRange",
+ TIndex: "TIndex",
+ TUnion: "TUnion",
+ TSlice: "TSlice",
+ TScriptStart: "TScriptStart",
+ TScriptEnd: "TScriptEnd",
+}
+
+const (
+ alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ numeric = "0123456789"
+ alphanumeric = alpha + numeric
+ signednumeric = "-" + numeric
+)
+
+func pathState(l *lexer.Lexer) lexer.StateFunc {
+ l.SkipWhitespace()
+ if l.Accept("$") {
+ l.Emit(TAbsolute)
+ return stepState
+ }
+ return nil
+}
+
+// stepState starts with a . or bracket
+func stepState(l *lexer.Lexer) lexer.StateFunc {
+ for {
+ switch t := l.Next(); {
+ case t == '.':
+ if l.Peek() == '.' {
+ l.Emit(TRecursive)
+ return stepState
+ }
+ l.Emit(TChildDot)
+ return childState
+ case t == '[':
+ l.Emit(TChildStart)
+ return childState
+
+ case t == lexer.EOFRune:
+ l.Emit(lexer.EOFToken)
+ return nil
+
+ default:
+ return l.ErrorState("abrupt end to stepState")
+ }
+ }
+}
+
+// childState has started with . or [
+func childState(l *lexer.Lexer) lexer.StateFunc {
+ for {
+ switch t := l.Next(); {
+
+ case t == '*':
+ l.Emit(TWildcard)
+ return childState
+
+ case t == '\'' || t == '"':
+ // TODO what other characters?
+ l.AcceptRun(alphanumeric + "-_")
+ l.Accept(string(t))
+ l.Emit(TQuotedName)
+ return childState
+
+ case strings.IndexRune(alpha, t) != -1:
+ // Key lookup
+ l.AcceptRun(alphanumeric)
+ l.Emit(TName)
+ return childState
+
+ case t == '[':
+ l.Emit(TPredicateStart)
+ return predicateState
+
+ case t == ']':
+ l.Emit(TChildEnd)
+ return stepState
+
+ case t == '.':
+ l.Backup()
+ l.Emit(TChildEnd)
+ return stepState
+
+ case t == lexer.EOFRune:
+ l.Emit(lexer.EOFToken)
+ return nil
+
+ default:
+ return l.ErrorState("abrupt end to childState")
+ }
+ }
+}
+
+func predicateState(l *lexer.Lexer) lexer.StateFunc {
+ for {
+ switch t := l.Next(); {
+ case t == '?':
+ if l.Accept("(") {
+ l.Emit(TFilterStart)
+ return filterState
+ }
+ return l.ErrorState("expecting (")
+
+ case t == '(':
+ l.Emit(TScriptStart)
+ return scriptState
+
+ case t == ']':
+ l.Emit(TPredicateEnd)
+ return stepState
+
+ default:
+ l.Backup()
+ return predicateExprState
+ }
+ }
+}
+
+func predicateExprState(l *lexer.Lexer) lexer.StateFunc {
+ open := true
+ inRange := false
+ for {
+ switch t := l.Next(); {
+
+ case t == '-' || '0' <= t && t <= '9':
+ l.AcceptRun(numeric)
+ l.Emit(TNumber)
+ open = false
+
+ case t == ':':
+ if inRange {
+ return l.ErrorState("invalid range")
+ }
+ if open {
+ l.Backup()
+ l.Emit(TNumber)
+ l.Accept(":")
+ }
+ l.Emit(TRange)
+ inRange = true
+ open = true
+
+ case t == ',':
+ l.Emit(TUnion)
+
+ case t == ']':
+ l.Backup()
+ if open && inRange {
+ l.Emit(TNumber)
+ }
+ return predicateState
+
+ default:
+ return l.ErrorState("invalid predicate expression")
+ }
+ }
+}
+
+// filterState TODO
+func filterState(l *lexer.Lexer) lexer.StateFunc {
+ for {
+ switch t := l.Next(); {
+ case t == ')':
+ l.Emit(TFilterEnd)
+ return predicateState
+ default:
+ return l.ErrorState("invalid filter expression")
+ }
+ }
+}
+
+// scriptState TODO
+func scriptState(l *lexer.Lexer) lexer.StateFunc {
+ for {
+ switch t := l.Next(); {
+ case t == ')':
+ l.Emit(TScriptEnd)
+ return predicateState
+ default:
+ return l.ErrorState("invalid script expression")
+ }
+ }
+}
diff --git a/jsonpath/lexer_test.go b/jsonpath/lexer_test.go
new file mode 100644
index 0000000..504d413
--- /dev/null
+++ b/jsonpath/lexer_test.go
@@ -0,0 +1,172 @@
+package jsonpath
+
+import (
+ "testing"
+
+ "src.userspace.com.au/query/lexer"
+)
+
+func TestValidStates(t *testing.T) {
+ tests := []struct {
+ path string
+ tokens []lexer.Token
+ }{
+ {
+ path: "$.test",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "test"},
+ },
+ },
+ {
+ path: "$[test]",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildStart, Value: "["},
+ lexer.Token{Type: TName, Value: "test"},
+ lexer.Token{Type: TChildEnd, Value: "]"},
+ },
+ },
+ {
+ path: "$[one][two]",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildStart, Value: "["},
+ lexer.Token{Type: TName, Value: "one"},
+ lexer.Token{Type: TChildEnd, Value: "]"},
+ lexer.Token{Type: TChildStart, Value: "["},
+ lexer.Token{Type: TName, Value: "two"},
+ lexer.Token{Type: TChildEnd, Value: "]"},
+ },
+ },
+ {
+ path: "$.one.two",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "one"},
+ lexer.Token{Type: TChildEnd, Value: ""},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "two"},
+ },
+ },
+ {
+ path: "$[*]",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildStart, Value: "["},
+ lexer.Token{Type: TWildcard, Value: "*"},
+ lexer.Token{Type: TChildEnd, Value: "]"},
+ },
+ },
+ {
+ path: "$[one][*]",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildStart, Value: "["},
+ lexer.Token{Type: TName, Value: "one"},
+ lexer.Token{Type: TChildEnd, Value: "]"},
+ lexer.Token{Type: TChildStart, Value: "["},
+ lexer.Token{Type: TWildcard, Value: "*"},
+ lexer.Token{Type: TChildEnd, Value: "]"},
+ },
+ },
+ {
+ path: "$.one[1,2,3]",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "one"},
+ lexer.Token{Type: TPredicateStart, Value: "["},
+ lexer.Token{Type: TNumber, Value: "1"},
+ lexer.Token{Type: TUnion, Value: ","},
+ lexer.Token{Type: TNumber, Value: "2"},
+ lexer.Token{Type: TUnion, Value: ","},
+ lexer.Token{Type: TNumber, Value: "3"},
+ lexer.Token{Type: TPredicateEnd, Value: "]"},
+ },
+ },
+ {
+ path: "$.one[1:3]",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "one"},
+ lexer.Token{Type: TPredicateStart, Value: "["},
+ lexer.Token{Type: TNumber, Value: "1"},
+ lexer.Token{Type: TRange, Value: ":"},
+ lexer.Token{Type: TNumber, Value: "3"},
+ lexer.Token{Type: TPredicateEnd, Value: "]"},
+ },
+ },
+ {
+ path: "$.one[:3]",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "one"},
+ lexer.Token{Type: TPredicateStart, Value: "["},
+ lexer.Token{Type: TNumber, Value: ""},
+ lexer.Token{Type: TRange, Value: ":"},
+ lexer.Token{Type: TNumber, Value: "3"},
+ lexer.Token{Type: TPredicateEnd, Value: "]"},
+ },
+ },
+ {
+ path: "$.one[3:]",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "one"},
+ lexer.Token{Type: TPredicateStart, Value: "["},
+ lexer.Token{Type: TNumber, Value: "3"},
+ lexer.Token{Type: TRange, Value: ":"},
+ lexer.Token{Type: TNumber, Value: ""},
+ lexer.Token{Type: TPredicateEnd, Value: "]"},
+ },
+ },
+ {
+ path: "$..one",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TRecursive, Value: "."},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "one"},
+ },
+ },
+ {
+ path: "$['one']",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildStart, Value: "["},
+ lexer.Token{Type: TQuotedName, Value: "'one'"},
+ lexer.Token{Type: TChildEnd, Value: "]"},
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Log("testing path: ", tt.path)
+ l := lexer.New(tt.path, pathState)
+ l.Start()
+
+ func() {
+ for i, expected := range tt.tokens {
+ actual, done := l.NextToken()
+ if done || actual == nil {
+ t.Errorf("Lexer(%q) finished early, expecting %v", tt.path, expected)
+ return
+ }
+ if actual.Type != expected.Type {
+ t.Errorf("Lexer(%q) token %d => %s, expected %s", tt.path, i, tokenNames[actual.Type], tokenNames[expected.Type])
+ return
+ }
+ if actual.Value != expected.Value {
+ t.Errorf("Lexer(%q) token %d => %v, expected %v", tt.path, i, actual, expected)
+ return
+ }
+ }
+ }()
+ }
+}
diff --git a/jsonpath/nodes.go b/jsonpath/nodes.go
new file mode 100644
index 0000000..816002c
--- /dev/null
+++ b/jsonpath/nodes.go
@@ -0,0 +1,68 @@
+package jsonpath
+
+import (
+ "fmt"
+)
+
+type jsonpath struct {
+ absolute bool
+ steps []step
+}
+
+func (jp jsonpath) String() string {
+ out := ""
+ if jp.absolute {
+ out = "$"
+ }
+ for _, s := range jp.steps {
+ out += s.String()
+ }
+ return out
+}
+
+type step struct {
+ recursive bool
+ selector selector
+ predicate *predicate
+}
+
+func (s step) String() string {
+ if s.recursive {
+ return ".."
+ }
+ if s.predicate != nil {
+ return fmt.Sprintf("%s%s", s.selector.String(), s.predicate.String())
+ }
+ return s.selector.String()
+}
+
+type selector struct {
+ wildcard bool
+ value string
+}
+
+func (s selector) String() string {
+ if s.wildcard {
+ return "[*]"
+ }
+ return fmt.Sprintf("[%s]", s.value)
+}
+
+type predicate struct {
+ pType string
+ start int
+ end int
+ filter string
+}
+
+func (p predicate) String() string {
+ switch p.pType {
+ case "index":
+ return fmt.Sprintf("[%d]", p.start)
+ case "range":
+ return fmt.Sprintf("[%d:%d]", p.start, p.end)
+ case "union":
+ // TODO
+ }
+ return ""
+}
diff --git a/jsonpath/parse_test.go b/jsonpath/parse_test.go
new file mode 100644
index 0000000..db41efe
--- /dev/null
+++ b/jsonpath/parse_test.go
@@ -0,0 +1,39 @@
+package jsonpath
+
+import (
+ "strings"
+ "testing"
+
+ "src.userspace.com.au/query/json"
+)
+
+func TestParse(t *testing.T) {
+ tests := []struct {
+ path, src, expect string
+ }{
+ {
+ path: "$.test",
+ src: `{"test":"one"}`,
+ expect: "one",
+ },
+ }
+
+ p := Parser{}
+
+ for _, tt := range tests {
+ doc, err := json.Parse(strings.NewReader(tt.src))
+ if err != nil {
+ t.Errorf("json.Parse(%q) failed: %s", tt.src, err)
+ }
+
+ sel, err := p.Parse(tt.path)
+ if err != nil {
+ t.Errorf("Parse(%q) failed: %s", tt.path, err)
+ }
+ actual := sel.MatchFirst(doc)
+ actualText := actual.InnerText()
+ if actualText != tt.expect {
+ t.Errorf("MatchFirst(%s) => %s, expected %s", tt.src, actualText, tt.expect)
+ }
+ }
+}
diff --git a/jsonpath/parser.go b/jsonpath/parser.go
new file mode 100644
index 0000000..748479f
--- /dev/null
+++ b/jsonpath/parser.go
@@ -0,0 +1,170 @@
+package jsonpath
+
+import (
+ "fmt"
+ "strings"
+
+ //base "src.userspace.com.au/query"
+ "src.userspace.com.au/query/json"
+ "src.userspace.com.au/query/lexer"
+)
+
+type Parser struct {
+ l *lexer.Lexer
+ pos int
+ tok *lexer.Token
+}
+
+func NewParser() (*Parser, error) {
+ return &Parser{}, nil
+}
+
+func (p *Parser) next() (done bool) {
+ p.tok, done = p.l.NextToken()
+ if p.tok != nil {
+ p.pos = p.tok.Position
+ }
+ fmt.Printf("%s(%d): '%s'\n", tokenNames[p.tok.Type], p.tok.Type, p.tok.Value)
+ return p.tok != nil && !done
+}
+
+func (p *Parser) Parse(input string) (Selector, error) {
+ p.l = lexer.New(input, pathState)
+ p.l.Start()
+
+ // First token
+ p.next()
+ if p.tok.Type != TAbsolute {
+ return nil, fmt.Errorf("expected root, got %s", p.tok.Value)
+ }
+
+ result, err := p.parseQualifiedSelector()
+ if err != nil {
+ return nil, err
+ }
+ return childSelector(rootSelector, result), nil
+}
+
+// parseQualifiedSelector
+func (p *Parser) parseQualifiedSelector() (result Selector, err error) {
+ p.next()
+
+ switch p.tok.Type {
+ case TRecursive:
+ nr, _ := p.parseStepSelector()
+ result = recursiveSelector(nr)
+
+ case TChildDot, TChildStart:
+ result, err = p.parseStepSelector()
+
+ default:
+ return nil, fmt.Errorf("expected . or .. or something, got %s", p.tok.Value)
+ }
+ return result, nil
+}
+
+func (p *Parser) parseStepSelector() (result Selector, err error) {
+ p.next()
+ result, err = p.parseNodeTestSelector()
+ if err != nil {
+ return nil, err
+ }
+ p.next()
+ if p.tok.Type == TPredicateStart {
+ // TODO
+ }
+ return result, nil
+}
+
+func (p *Parser) parseNodeTestSelector() (result Selector, err error) {
+ switch p.tok.Type {
+ case TName:
+ /*
+ switch p.tok.Value {
+ case "object", "array", "string", "number", "boolean", "null":
+ // TODO
+ //result = typeSelector(p.tok.Value)
+ default:
+ }
+ */
+ result = nameSelector(p.tok.Value)
+ case TWildcard:
+ result = wildcardSelector
+ default:
+ fmt.Println("here: ", tokenNames[p.tok.Type])
+ }
+ return result, err
+}
+
+func (p *Parser) parseChildSelector() Selector {
+ var result Selector
+ p.next()
+ switch p.tok.Type {
+ case TQuotedName:
+ result = nameSelector(strings.Trim(p.tok.Value, `"'`))
+ case TName:
+ result = nameSelector(p.tok.Value)
+ }
+ p.next()
+ return result
+}
+
+// rootSelector checks node is root
+func rootSelector(n *json.Node) bool {
+ result := (n.Type == json.DocumentNode)
+ fmt.Printf("rootSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result)
+ return result
+}
+
+// wildcardSelector returns true
+func wildcardSelector(n *json.Node) bool {
+ return true
+}
+
+// childSelector creates a selector for c being a child of p
+func childSelector(p, c Selector) Selector {
+ return func(n *json.Node) bool {
+ fmt.Printf("childSelector => type: %s, val: %s\n", json.NodeNames[n.Type], n.Data)
+ result := (c(n) && n.Parent != nil && p(n.Parent))
+ fmt.Printf("childSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result)
+ return result
+ }
+}
+
+// nameSelector generates selector for object key == k
+func nameSelector(k string) Selector {
+ return func(n *json.Node) bool {
+ result := (n.Type == json.ElementNode && n.Data == k)
+ fmt.Printf("nameSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result)
+ return result
+ }
+}
+
+// recursiveSelector matches any node below which matches a
+func recursiveSelector(a Selector) Selector {
+ return func(n *json.Node) bool {
+ if n.Type != json.ElementNode {
+ return false
+ }
+ return hasRecursiveMatch(n, a)
+ }
+}
+
+func hasRecursiveMatch(n *json.Node, a Selector) bool {
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if a(c) || (c.Type == json.ElementNode && hasRecursiveMatch(c, a)) {
+ return true
+ }
+ }
+ return false
+}
+
+// typeSelector matches a node with type t
+func typeSelector(t string) Selector {
+ return func(n *json.Node) bool {
+ if n.DataType == t {
+ return true
+ }
+ return false
+ }
+}
diff --git a/jsonpath/selector.go b/jsonpath/selector.go
new file mode 100644
index 0000000..71c3ed2
--- /dev/null
+++ b/jsonpath/selector.go
@@ -0,0 +1,55 @@
+package jsonpath
+
+import (
+ "src.userspace.com.au/query/json"
+)
+
+type Selector func(*json.Node) bool
+
+// MatchAll returns a slice of the nodes that match the selector,
+// from n and its children.
+func (s Selector) MatchAll(n *json.Node) []*json.Node {
+ return s.matchAllInto(n, nil)
+}
+
+func (s Selector) matchAllInto(n *json.Node, storage []*json.Node) []*json.Node {
+ if s(n) {
+ storage = append(storage, n)
+ }
+
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ storage = s.matchAllInto(child, storage)
+ }
+
+ return storage
+}
+
+// Match returns true if the node matches the selector.
+func (s Selector) Match(n *json.Node) bool {
+ return s(n)
+}
+
+// MatchFirst returns the first node that matches s, from n and its children.
+func (s Selector) MatchFirst(n *json.Node) *json.Node {
+ if s.Match(n) {
+ return n
+ }
+
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ m := s.MatchFirst(c)
+ if m != nil {
+ return m
+ }
+ }
+ return nil
+}
+
+// Filter returns the nodes in nodes that match the selector.
+func (s Selector) Filter(nodes []*json.Node) (result []*json.Node) {
+ for _, n := range nodes {
+ if s(n) {
+ result = append(result, n)
+ }
+ }
+ return result
+}