diff options
| author | Felix Hanley <felix@userspace.com.au> | 2018-11-16 06:35:28 +0000 |
|---|---|---|
| committer | Felix Hanley <felix@userspace.com.au> | 2018-11-16 06:35:28 +0000 |
| commit | 44da796e192961b614e3540c4c1ec52f4bc0a290 (patch) | |
| tree | c0dcc69363c401d123bfdba0662d990d6804df77 | |
| parent | d6882bd9403c588415c1906a7015d16e92aa1ad3 (diff) | |
| download | query-44da796e192961b614e3540c4c1ec52f4bc0a290.tar.gz query-44da796e192961b614e3540c4c1ec52f4bc0a290.tar.bz2 | |
Add start of jsonpath parser
| -rw-r--r-- | Makefile | 21 | ||||
| -rw-r--r-- | jsonpath/lexer.go | 236 | ||||
| -rw-r--r-- | jsonpath/lexer_test.go | 172 | ||||
| -rw-r--r-- | jsonpath/nodes.go | 68 | ||||
| -rw-r--r-- | jsonpath/parse_test.go | 39 | ||||
| -rw-r--r-- | jsonpath/parser.go | 170 | ||||
| -rw-r--r-- | jsonpath/selector.go | 55 |
7 files changed, 761 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..77601b5 --- /dev/null +++ b/Makefile @@ -0,0 +1,21 @@ + +pkgs := $(shell go list ./...) + +.PHONY: help lint test clean + +ifdef GOPATH +GO111MODULE=on +endif + +test: ## Run tests with coverage + go test -short -cover -coverprofile coverage.out $(pkgs) + go tool cover -html=coverage.out -o coverage.html + +lint: + golint $(pkgs) + +clean: ## Clean all test files + rm -rf coverage* + +help: ## This help + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) |sort |awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' diff --git a/jsonpath/lexer.go b/jsonpath/lexer.go new file mode 100644 index 0000000..04fc87a --- /dev/null +++ b/jsonpath/lexer.go @@ -0,0 +1,236 @@ +package jsonpath + +import ( + //"fmt" + "strings" + + "src.userspace.com.au/query/lexer" +) + +const ( + _ lexer.TokenType = iota + + TAbsolute // $ + TCurrent // @ + TChildDot // . + TRecursive // .. + TChildStart // [ + TChildEnd // ] + TQuotedName // ' or " + TName // A word-like + TWildcard // * + TFilterStart // ?( + TFilterEnd // ) + TPredicateStart // [ + TPredicateEnd // ] + TNumber // [-]0-9 and hex also + TRange // : + TIndex // [234] + TUnion // [,] + TSlice // [start,end,step] + TScriptStart // ( + TScriptEnd // ) +) + +// helpful for debugging +var tokenNames = map[lexer.TokenType]string{ + lexer.ErrorToken: "TError", + lexer.EOFToken: "EOF", + TAbsolute: "TAbsolute", + TCurrent: "TCurrent", + TChildDot: "TChildDot", + TRecursive: "TRecursive", + TChildStart: "TChildStart", + TChildEnd: "TChildEnd", + TQuotedName: "TQuoteName", + TName: "TName", + TWildcard: "TWildcard", + TFilterStart: "TFilterStart", + TFilterEnd: "TFilterEnd", + TPredicateStart: "TPredicateStart", + TPredicateEnd: "TPredicateEnd", + TNumber: "TNumber", + TRange: "TRange", + TIndex: "TIndex", + TUnion: "TUnion", + TSlice: "TSlice", + TScriptStart: "TScriptStart", + TScriptEnd: "TScriptEnd", +} + +const ( + alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + numeric = "0123456789" + alphanumeric = alpha + numeric + signednumeric = "-" + numeric +) + +func pathState(l *lexer.Lexer) lexer.StateFunc { + l.SkipWhitespace() + if l.Accept("$") { + l.Emit(TAbsolute) + return stepState + } + return nil +} + +// stepState starts with a . or bracket +func stepState(l *lexer.Lexer) lexer.StateFunc { + for { + switch t := l.Next(); { + case t == '.': + if l.Peek() == '.' { + l.Emit(TRecursive) + return stepState + } + l.Emit(TChildDot) + return childState + case t == '[': + l.Emit(TChildStart) + return childState + + case t == lexer.EOFRune: + l.Emit(lexer.EOFToken) + return nil + + default: + return l.ErrorState("abrupt end to stepState") + } + } +} + +// childState has started with . or [ +func childState(l *lexer.Lexer) lexer.StateFunc { + for { + switch t := l.Next(); { + + case t == '*': + l.Emit(TWildcard) + return childState + + case t == '\'' || t == '"': + // TODO what other characters? + l.AcceptRun(alphanumeric + "-_") + l.Accept(string(t)) + l.Emit(TQuotedName) + return childState + + case strings.IndexRune(alpha, t) != -1: + // Key lookup + l.AcceptRun(alphanumeric) + l.Emit(TName) + return childState + + case t == '[': + l.Emit(TPredicateStart) + return predicateState + + case t == ']': + l.Emit(TChildEnd) + return stepState + + case t == '.': + l.Backup() + l.Emit(TChildEnd) + return stepState + + case t == lexer.EOFRune: + l.Emit(lexer.EOFToken) + return nil + + default: + return l.ErrorState("abrupt end to childState") + } + } +} + +func predicateState(l *lexer.Lexer) lexer.StateFunc { + for { + switch t := l.Next(); { + case t == '?': + if l.Accept("(") { + l.Emit(TFilterStart) + return filterState + } + return l.ErrorState("expecting (") + + case t == '(': + l.Emit(TScriptStart) + return scriptState + + case t == ']': + l.Emit(TPredicateEnd) + return stepState + + default: + l.Backup() + return predicateExprState + } + } +} + +func predicateExprState(l *lexer.Lexer) lexer.StateFunc { + open := true + inRange := false + for { + switch t := l.Next(); { + + case t == '-' || '0' <= t && t <= '9': + l.AcceptRun(numeric) + l.Emit(TNumber) + open = false + + case t == ':': + if inRange { + return l.ErrorState("invalid range") + } + if open { + l.Backup() + l.Emit(TNumber) + l.Accept(":") + } + l.Emit(TRange) + inRange = true + open = true + + case t == ',': + l.Emit(TUnion) + + case t == ']': + l.Backup() + if open && inRange { + l.Emit(TNumber) + } + return predicateState + + default: + return l.ErrorState("invalid predicate expression") + } + } +} + +// filterState TODO +func filterState(l *lexer.Lexer) lexer.StateFunc { + for { + switch t := l.Next(); { + case t == ')': + l.Emit(TFilterEnd) + return predicateState + default: + return l.ErrorState("invalid filter expression") + } + } +} + +// scriptState TODO +func scriptState(l *lexer.Lexer) lexer.StateFunc { + for { + switch t := l.Next(); { + case t == ')': + l.Emit(TScriptEnd) + return predicateState + default: + return l.ErrorState("invalid script expression") + } + } +} diff --git a/jsonpath/lexer_test.go b/jsonpath/lexer_test.go new file mode 100644 index 0000000..504d413 --- /dev/null +++ b/jsonpath/lexer_test.go @@ -0,0 +1,172 @@ +package jsonpath + +import ( + "testing" + + "src.userspace.com.au/query/lexer" +) + +func TestValidStates(t *testing.T) { + tests := []struct { + path string + tokens []lexer.Token + }{ + { + path: "$.test", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "test"}, + }, + }, + { + path: "$[test]", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildStart, Value: "["}, + lexer.Token{Type: TName, Value: "test"}, + lexer.Token{Type: TChildEnd, Value: "]"}, + }, + }, + { + path: "$[one][two]", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildStart, Value: "["}, + lexer.Token{Type: TName, Value: "one"}, + lexer.Token{Type: TChildEnd, Value: "]"}, + lexer.Token{Type: TChildStart, Value: "["}, + lexer.Token{Type: TName, Value: "two"}, + lexer.Token{Type: TChildEnd, Value: "]"}, + }, + }, + { + path: "$.one.two", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "one"}, + lexer.Token{Type: TChildEnd, Value: ""}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "two"}, + }, + }, + { + path: "$[*]", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildStart, Value: "["}, + lexer.Token{Type: TWildcard, Value: "*"}, + lexer.Token{Type: TChildEnd, Value: "]"}, + }, + }, + { + path: "$[one][*]", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildStart, Value: "["}, + lexer.Token{Type: TName, Value: "one"}, + lexer.Token{Type: TChildEnd, Value: "]"}, + lexer.Token{Type: TChildStart, Value: "["}, + lexer.Token{Type: TWildcard, Value: "*"}, + lexer.Token{Type: TChildEnd, Value: "]"}, + }, + }, + { + path: "$.one[1,2,3]", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "one"}, + lexer.Token{Type: TPredicateStart, Value: "["}, + lexer.Token{Type: TNumber, Value: "1"}, + lexer.Token{Type: TUnion, Value: ","}, + lexer.Token{Type: TNumber, Value: "2"}, + lexer.Token{Type: TUnion, Value: ","}, + lexer.Token{Type: TNumber, Value: "3"}, + lexer.Token{Type: TPredicateEnd, Value: "]"}, + }, + }, + { + path: "$.one[1:3]", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "one"}, + lexer.Token{Type: TPredicateStart, Value: "["}, + lexer.Token{Type: TNumber, Value: "1"}, + lexer.Token{Type: TRange, Value: ":"}, + lexer.Token{Type: TNumber, Value: "3"}, + lexer.Token{Type: TPredicateEnd, Value: "]"}, + }, + }, + { + path: "$.one[:3]", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "one"}, + lexer.Token{Type: TPredicateStart, Value: "["}, + lexer.Token{Type: TNumber, Value: ""}, + lexer.Token{Type: TRange, Value: ":"}, + lexer.Token{Type: TNumber, Value: "3"}, + lexer.Token{Type: TPredicateEnd, Value: "]"}, + }, + }, + { + path: "$.one[3:]", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "one"}, + lexer.Token{Type: TPredicateStart, Value: "["}, + lexer.Token{Type: TNumber, Value: "3"}, + lexer.Token{Type: TRange, Value: ":"}, + lexer.Token{Type: TNumber, Value: ""}, + lexer.Token{Type: TPredicateEnd, Value: "]"}, + }, + }, + { + path: "$..one", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TRecursive, Value: "."}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "one"}, + }, + }, + { + path: "$['one']", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildStart, Value: "["}, + lexer.Token{Type: TQuotedName, Value: "'one'"}, + lexer.Token{Type: TChildEnd, Value: "]"}, + }, + }, + } + + for _, tt := range tests { + t.Log("testing path: ", tt.path) + l := lexer.New(tt.path, pathState) + l.Start() + + func() { + for i, expected := range tt.tokens { + actual, done := l.NextToken() + if done || actual == nil { + t.Errorf("Lexer(%q) finished early, expecting %v", tt.path, expected) + return + } + if actual.Type != expected.Type { + t.Errorf("Lexer(%q) token %d => %s, expected %s", tt.path, i, tokenNames[actual.Type], tokenNames[expected.Type]) + return + } + if actual.Value != expected.Value { + t.Errorf("Lexer(%q) token %d => %v, expected %v", tt.path, i, actual, expected) + return + } + } + }() + } +} diff --git a/jsonpath/nodes.go b/jsonpath/nodes.go new file mode 100644 index 0000000..816002c --- /dev/null +++ b/jsonpath/nodes.go @@ -0,0 +1,68 @@ +package jsonpath + +import ( + "fmt" +) + +type jsonpath struct { + absolute bool + steps []step +} + +func (jp jsonpath) String() string { + out := "" + if jp.absolute { + out = "$" + } + for _, s := range jp.steps { + out += s.String() + } + return out +} + +type step struct { + recursive bool + selector selector + predicate *predicate +} + +func (s step) String() string { + if s.recursive { + return ".." + } + if s.predicate != nil { + return fmt.Sprintf("%s%s", s.selector.String(), s.predicate.String()) + } + return s.selector.String() +} + +type selector struct { + wildcard bool + value string +} + +func (s selector) String() string { + if s.wildcard { + return "[*]" + } + return fmt.Sprintf("[%s]", s.value) +} + +type predicate struct { + pType string + start int + end int + filter string +} + +func (p predicate) String() string { + switch p.pType { + case "index": + return fmt.Sprintf("[%d]", p.start) + case "range": + return fmt.Sprintf("[%d:%d]", p.start, p.end) + case "union": + // TODO + } + return "" +} diff --git a/jsonpath/parse_test.go b/jsonpath/parse_test.go new file mode 100644 index 0000000..db41efe --- /dev/null +++ b/jsonpath/parse_test.go @@ -0,0 +1,39 @@ +package jsonpath + +import ( + "strings" + "testing" + + "src.userspace.com.au/query/json" +) + +func TestParse(t *testing.T) { + tests := []struct { + path, src, expect string + }{ + { + path: "$.test", + src: `{"test":"one"}`, + expect: "one", + }, + } + + p := Parser{} + + for _, tt := range tests { + doc, err := json.Parse(strings.NewReader(tt.src)) + if err != nil { + t.Errorf("json.Parse(%q) failed: %s", tt.src, err) + } + + sel, err := p.Parse(tt.path) + if err != nil { + t.Errorf("Parse(%q) failed: %s", tt.path, err) + } + actual := sel.MatchFirst(doc) + actualText := actual.InnerText() + if actualText != tt.expect { + t.Errorf("MatchFirst(%s) => %s, expected %s", tt.src, actualText, tt.expect) + } + } +} diff --git a/jsonpath/parser.go b/jsonpath/parser.go new file mode 100644 index 0000000..748479f --- /dev/null +++ b/jsonpath/parser.go @@ -0,0 +1,170 @@ +package jsonpath + +import ( + "fmt" + "strings" + + //base "src.userspace.com.au/query" + "src.userspace.com.au/query/json" + "src.userspace.com.au/query/lexer" +) + +type Parser struct { + l *lexer.Lexer + pos int + tok *lexer.Token +} + +func NewParser() (*Parser, error) { + return &Parser{}, nil +} + +func (p *Parser) next() (done bool) { + p.tok, done = p.l.NextToken() + if p.tok != nil { + p.pos = p.tok.Position + } + fmt.Printf("%s(%d): '%s'\n", tokenNames[p.tok.Type], p.tok.Type, p.tok.Value) + return p.tok != nil && !done +} + +func (p *Parser) Parse(input string) (Selector, error) { + p.l = lexer.New(input, pathState) + p.l.Start() + + // First token + p.next() + if p.tok.Type != TAbsolute { + return nil, fmt.Errorf("expected root, got %s", p.tok.Value) + } + + result, err := p.parseQualifiedSelector() + if err != nil { + return nil, err + } + return childSelector(rootSelector, result), nil +} + +// parseQualifiedSelector +func (p *Parser) parseQualifiedSelector() (result Selector, err error) { + p.next() + + switch p.tok.Type { + case TRecursive: + nr, _ := p.parseStepSelector() + result = recursiveSelector(nr) + + case TChildDot, TChildStart: + result, err = p.parseStepSelector() + + default: + return nil, fmt.Errorf("expected . or .. or something, got %s", p.tok.Value) + } + return result, nil +} + +func (p *Parser) parseStepSelector() (result Selector, err error) { + p.next() + result, err = p.parseNodeTestSelector() + if err != nil { + return nil, err + } + p.next() + if p.tok.Type == TPredicateStart { + // TODO + } + return result, nil +} + +func (p *Parser) parseNodeTestSelector() (result Selector, err error) { + switch p.tok.Type { + case TName: + /* + switch p.tok.Value { + case "object", "array", "string", "number", "boolean", "null": + // TODO + //result = typeSelector(p.tok.Value) + default: + } + */ + result = nameSelector(p.tok.Value) + case TWildcard: + result = wildcardSelector + default: + fmt.Println("here: ", tokenNames[p.tok.Type]) + } + return result, err +} + +func (p *Parser) parseChildSelector() Selector { + var result Selector + p.next() + switch p.tok.Type { + case TQuotedName: + result = nameSelector(strings.Trim(p.tok.Value, `"'`)) + case TName: + result = nameSelector(p.tok.Value) + } + p.next() + return result +} + +// rootSelector checks node is root +func rootSelector(n *json.Node) bool { + result := (n.Type == json.DocumentNode) + fmt.Printf("rootSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result) + return result +} + +// wildcardSelector returns true +func wildcardSelector(n *json.Node) bool { + return true +} + +// childSelector creates a selector for c being a child of p +func childSelector(p, c Selector) Selector { + return func(n *json.Node) bool { + fmt.Printf("childSelector => type: %s, val: %s\n", json.NodeNames[n.Type], n.Data) + result := (c(n) && n.Parent != nil && p(n.Parent)) + fmt.Printf("childSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result) + return result + } +} + +// nameSelector generates selector for object key == k +func nameSelector(k string) Selector { + return func(n *json.Node) bool { + result := (n.Type == json.ElementNode && n.Data == k) + fmt.Printf("nameSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result) + return result + } +} + +// recursiveSelector matches any node below which matches a +func recursiveSelector(a Selector) Selector { + return func(n *json.Node) bool { + if n.Type != json.ElementNode { + return false + } + return hasRecursiveMatch(n, a) + } +} + +func hasRecursiveMatch(n *json.Node, a Selector) bool { + for c := n.FirstChild; c != nil; c = c.NextSibling { + if a(c) || (c.Type == json.ElementNode && hasRecursiveMatch(c, a)) { + return true + } + } + return false +} + +// typeSelector matches a node with type t +func typeSelector(t string) Selector { + return func(n *json.Node) bool { + if n.DataType == t { + return true + } + return false + } +} diff --git a/jsonpath/selector.go b/jsonpath/selector.go new file mode 100644 index 0000000..71c3ed2 --- /dev/null +++ b/jsonpath/selector.go @@ -0,0 +1,55 @@ +package jsonpath + +import ( + "src.userspace.com.au/query/json" +) + +type Selector func(*json.Node) bool + +// MatchAll returns a slice of the nodes that match the selector, +// from n and its children. +func (s Selector) MatchAll(n *json.Node) []*json.Node { + return s.matchAllInto(n, nil) +} + +func (s Selector) matchAllInto(n *json.Node, storage []*json.Node) []*json.Node { + if s(n) { + storage = append(storage, n) + } + + for child := n.FirstChild; child != nil; child = child.NextSibling { + storage = s.matchAllInto(child, storage) + } + + return storage +} + +// Match returns true if the node matches the selector. +func (s Selector) Match(n *json.Node) bool { + return s(n) +} + +// MatchFirst returns the first node that matches s, from n and its children. +func (s Selector) MatchFirst(n *json.Node) *json.Node { + if s.Match(n) { + return n + } + + for c := n.FirstChild; c != nil; c = c.NextSibling { + m := s.MatchFirst(c) + if m != nil { + return m + } + } + return nil +} + +// Filter returns the nodes in nodes that match the selector. +func (s Selector) Filter(nodes []*json.Node) (result []*json.Node) { + for _, n := range nodes { + if s(n) { + result = append(result, n) + } + } + return result +} |
