diff options
Diffstat (limited to 'jsonpath')
| -rw-r--r-- | jsonpath/jsonpath.ebnf | 233 | ||||
| -rw-r--r-- | jsonpath/lexer.go | 14 | ||||
| -rw-r--r-- | jsonpath/lexer_test.go | 15 | ||||
| -rw-r--r-- | jsonpath/nodes.go | 68 | ||||
| -rw-r--r-- | jsonpath/parse_test.go | 39 | ||||
| -rw-r--r-- | jsonpath/parser.go | 180 | ||||
| -rw-r--r-- | jsonpath/parser_test.go | 92 |
7 files changed, 479 insertions, 162 deletions
diff --git a/jsonpath/jsonpath.ebnf b/jsonpath/jsonpath.ebnf new file mode 100644 index 0000000..f84df94 --- /dev/null +++ b/jsonpath/jsonpath.ebnf @@ -0,0 +1,233 @@ +path + = absolute path + | relative path + ; + +absolute path + = "$", [ qualified path ] + ; + +qualified path + = recursive location + | relative location + ; + +recursive location + = "..", relative path + ; + +relative location + = ".", relative path + ; + +relative path + = step, [ qualified path ] + ; + +step + = node test, [ predicate ] + ; + +node test + = node type, "(", ")" + | name test + ; + +name test + = "*" + | name + ; + +node type + = "object" + | "array" + | "string" + | "number" + | "boolean" + | "null" + ; + +name + = "'", quoted name character, { quoted name character }, "'" + = name character, { name character } + ; + +quoted name character + = ? any unicode character except ''' (single quote), unlesss it is quoted with a '\' (backslash) ? + ; + +name character + = ? any unicode character except '.' and '[' ? + ; + +predicate + = "[", predicate expression , "]" + ; + +predicate expression + = wildcard + | subscript + | slice + | union + | filter + ; + +wildcard + = "*" + ; + +subscript + = signed integer + ; + +slice + = [ signed integer ], ":", [ signed integer ], [ ":", [ non-zero signed integer ] ] + ; + +union + = integer, ",", integer + | union expression, ",", union expression + ; + +union expression + = relative path + | filter expression + ; + +filter + = "?(", filter expression, ")" + ; + +filter expression + = or expr + ; + +or expr + = and expr, [ "or", or expr ] + ; + +and expr + = equality expr, [ "and", and expr ] + ; + +equality expr + = relational expr, [ equality op, equality expr ] + ; + +equality op + = "=" + | "!=" + ; + +relational expr + = additive expr, [ relational op, relational expr ] + ; + +relational op + = ">" + | "<" + | ">=" + | "<=" + ; + +additive expr + = multiplicative expr, [ additive op, additive expr ] + ; + +additive op + = "+" + | "-" + ; + +multiplictive expr + = unary expr, [ multiplicative op, multiplicative expr ] + ; + +multiplicative op + = "*" + | "/" + | "%" + ; + +unary expr + = "@", qualified path + | number + | string + | boolean + | "null" + ; + +number + = signed integer, [ fractional part ], [ exponent ] + ; + +fractional part + = ".", digit, { digit } + ; + +exponent + = e, digit, { digit } + ; + +e + = "e" | "E", [ "+" | "-" ] + ; + +string + = """, { character | escape }, """ + ; + +character + = ? any unicode character except " or \ or control characters ? + ; + +escape + = "\"" + | "\\" + | "\/" + | "\b" + | "\f" + | "\n" + | "\r" + | "\t" + | "\u", 4 * hex digit + ; + +hex digit + = 2 * ( digit | hex lower | hex upper ) + ; + +boolean + = "true" + | "false" + ; + +integer + = [ "+" ], ( "0" | digit one to nine, { digit } ) + ; + +signed integer + = [ "-" ], integer + ; + +non-zero signed integer + = [ "-" ], digit one to nine, { digit } + ; + +digit one to nine + = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + ; + +digit + = "0" | digit one to nine + ; + +hex lower + = "a" | "b" | "c" | "d" | "e" | "f" + ; + +hex upper + = "A" | "B" | "C" | "D" | "E" | "F" + ; + +(* vim: set ft=ebnf : *) diff --git a/jsonpath/lexer.go b/jsonpath/lexer.go index 04fc87a..902b5bf 100644 --- a/jsonpath/lexer.go +++ b/jsonpath/lexer.go @@ -79,12 +79,14 @@ func stepState(l *lexer.Lexer) lexer.StateFunc { for { switch t := l.Next(); { case t == '.': + // Don't emit dot as it is used for subsequent child if l.Peek() == '.' { l.Emit(TRecursive) return stepState } l.Emit(TChildDot) return childState + case t == '[': l.Emit(TChildStart) return childState @@ -109,7 +111,7 @@ func childState(l *lexer.Lexer) lexer.StateFunc { return childState case t == '\'' || t == '"': - // TODO what other characters? + // FIXME what other characters? l.AcceptRun(alphanumeric + "-_") l.Accept(string(t)) l.Emit(TQuotedName) @@ -121,7 +123,12 @@ func childState(l *lexer.Lexer) lexer.StateFunc { l.Emit(TName) return childState + case t == '.': + l.Backup() + return stepState + case t == '[': + // FIXME predicate or another child l.Emit(TPredicateStart) return predicateState @@ -129,11 +136,6 @@ func childState(l *lexer.Lexer) lexer.StateFunc { l.Emit(TChildEnd) return stepState - case t == '.': - l.Backup() - l.Emit(TChildEnd) - return stepState - case t == lexer.EOFRune: l.Emit(lexer.EOFToken) return nil diff --git a/jsonpath/lexer_test.go b/jsonpath/lexer_test.go index 504d413..ba43a31 100644 --- a/jsonpath/lexer_test.go +++ b/jsonpath/lexer_test.go @@ -46,7 +46,6 @@ func TestValidStates(t *testing.T) { lexer.Token{Type: TAbsolute, Value: "$"}, lexer.Token{Type: TChildDot, Value: "."}, lexer.Token{Type: TName, Value: "one"}, - lexer.Token{Type: TChildEnd, Value: ""}, lexer.Token{Type: TChildDot, Value: "."}, lexer.Token{Type: TName, Value: "two"}, }, @@ -144,6 +143,20 @@ func TestValidStates(t *testing.T) { lexer.Token{Type: TChildEnd, Value: "]"}, }, }, + { + path: "$.a.b.c.d", + tokens: []lexer.Token{ + lexer.Token{Type: TAbsolute, Value: "$"}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "a"}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "b"}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "c"}, + lexer.Token{Type: TChildDot, Value: "."}, + lexer.Token{Type: TName, Value: "d"}, + }, + }, } for _, tt := range tests { diff --git a/jsonpath/nodes.go b/jsonpath/nodes.go deleted file mode 100644 index 816002c..0000000 --- a/jsonpath/nodes.go +++ /dev/null @@ -1,68 +0,0 @@ -package jsonpath - -import ( - "fmt" -) - -type jsonpath struct { - absolute bool - steps []step -} - -func (jp jsonpath) String() string { - out := "" - if jp.absolute { - out = "$" - } - for _, s := range jp.steps { - out += s.String() - } - return out -} - -type step struct { - recursive bool - selector selector - predicate *predicate -} - -func (s step) String() string { - if s.recursive { - return ".." - } - if s.predicate != nil { - return fmt.Sprintf("%s%s", s.selector.String(), s.predicate.String()) - } - return s.selector.String() -} - -type selector struct { - wildcard bool - value string -} - -func (s selector) String() string { - if s.wildcard { - return "[*]" - } - return fmt.Sprintf("[%s]", s.value) -} - -type predicate struct { - pType string - start int - end int - filter string -} - -func (p predicate) String() string { - switch p.pType { - case "index": - return fmt.Sprintf("[%d]", p.start) - case "range": - return fmt.Sprintf("[%d:%d]", p.start, p.end) - case "union": - // TODO - } - return "" -} diff --git a/jsonpath/parse_test.go b/jsonpath/parse_test.go deleted file mode 100644 index db41efe..0000000 --- a/jsonpath/parse_test.go +++ /dev/null @@ -1,39 +0,0 @@ -package jsonpath - -import ( - "strings" - "testing" - - "src.userspace.com.au/query/json" -) - -func TestParse(t *testing.T) { - tests := []struct { - path, src, expect string - }{ - { - path: "$.test", - src: `{"test":"one"}`, - expect: "one", - }, - } - - p := Parser{} - - for _, tt := range tests { - doc, err := json.Parse(strings.NewReader(tt.src)) - if err != nil { - t.Errorf("json.Parse(%q) failed: %s", tt.src, err) - } - - sel, err := p.Parse(tt.path) - if err != nil { - t.Errorf("Parse(%q) failed: %s", tt.path, err) - } - actual := sel.MatchFirst(doc) - actualText := actual.InnerText() - if actualText != tt.expect { - t.Errorf("MatchFirst(%s) => %s, expected %s", tt.src, actualText, tt.expect) - } - } -} diff --git a/jsonpath/parser.go b/jsonpath/parser.go index 748479f..5f0cd3a 100644 --- a/jsonpath/parser.go +++ b/jsonpath/parser.go @@ -2,9 +2,9 @@ package jsonpath import ( "fmt" + "strconv" "strings" - //base "src.userspace.com.au/query" "src.userspace.com.au/query/json" "src.userspace.com.au/query/lexer" ) @@ -24,95 +24,151 @@ func (p *Parser) next() (done bool) { if p.tok != nil { p.pos = p.tok.Position } - fmt.Printf("%s(%d): '%s'\n", tokenNames[p.tok.Type], p.tok.Type, p.tok.Value) return p.tok != nil && !done } func (p *Parser) Parse(input string) (Selector, error) { + var sel, nr Selector + var err error + p.l = lexer.New(input, pathState) p.l.Start() // First token p.next() + if p.tok.Type != TAbsolute { + // TODO does jsonpath have relative searches return nil, fmt.Errorf("expected root, got %s", p.tok.Value) } - result, err := p.parseQualifiedSelector() - if err != nil { - return nil, err + p.next() + + if p.tok.Type == lexer.EOFToken { + return rootSelector, nil + } + + sel = rootSelector + + for { + switch p.tok.Type { + case TRecursive: + p.next() + p.next() + if nr, err = p.parseStepSelector(); err != nil { + return nil, err + } + sel = recursiveSelector(nr) + + case TChildDot: + p.next() + if nr, err = p.parseStepSelector(); err != nil { + return nil, err + } + sel = childSelector(sel, nr) + default: + return sel, nil + } } - return childSelector(rootSelector, result), nil + panic("unreachable") } -// parseQualifiedSelector -func (p *Parser) parseQualifiedSelector() (result Selector, err error) { +func (p *Parser) parseStepSelector() (Selector, error) { + var sel, nr Selector + var err error + + sel = p.parseNodeTestSelector() p.next() switch p.tok.Type { - case TRecursive: - nr, _ := p.parseStepSelector() - result = recursiveSelector(nr) + case TPredicateStart: + p.next() + if nr, err = p.parsePredicateExprSelector(); err != nil { + return nil, err + } + sel = childSelector(sel, nr) - case TChildDot, TChildStart: - result, err = p.parseStepSelector() + case lexer.EOFToken: + return sel, nil default: - return nil, fmt.Errorf("expected . or .. or something, got %s", p.tok.Value) } - return result, nil + return sel, nil } -func (p *Parser) parseStepSelector() (result Selector, err error) { - p.next() - result, err = p.parseNodeTestSelector() - if err != nil { - return nil, err - } - p.next() - if p.tok.Type == TPredicateStart { - // TODO - } - return result, nil -} - -func (p *Parser) parseNodeTestSelector() (result Selector, err error) { +func (p *Parser) parseNodeTestSelector() (sel Selector) { switch p.tok.Type { case TName: /* switch p.tok.Value { case "object", "array", "string", "number", "boolean", "null": // TODO - //result = typeSelector(p.tok.Value) + //sel = typeSelector(p.tok.Value) default: } */ - result = nameSelector(p.tok.Value) + sel = nameSelector(p.tok.Value) + + case TQuotedName: + sel = nameSelector(strings.Trim(p.tok.Value, `"'`)) + case TWildcard: - result = wildcardSelector + sel = wildcardSelector + default: - fmt.Println("here: ", tokenNames[p.tok.Type]) } - return result, err + return sel } -func (p *Parser) parseChildSelector() Selector { - var result Selector - p.next() - switch p.tok.Type { - case TQuotedName: - result = nameSelector(strings.Trim(p.tok.Value, `"'`)) - case TName: - result = nameSelector(p.tok.Value) +func (p *Parser) parsePredicateExprSelector() (Selector, error) { + var err error + + if p.tok.Type != TNumber { + return nil, fmt.Errorf("expecting number") + } + + num, err := strconv.ParseInt(p.tok.Value, 10, 64) + if err != nil { + return nil, err + } + return arrayIndexSelector(num), nil + /* TODO + var els []int64 + if p.tok.Type == TPredicateEnd { + return arrayIndexSelector(num), nil } + + els = append(els, num) + p.next() - return result + + if p.tok.Type == TRange { + // FIXME + p.next() + num, err := strconv.ParseInt(p.tok.Value, 10, 64) + if err != nil { + return nil, err + } + els = append(els, num) + // We have start and finish range + } + if p.tok.Type == TUnion { + // FIXME + } + } + return childSelector(rootSelector, arrayIndexSelector(idx)), nil + return sel, nil + */ } // rootSelector checks node is root func rootSelector(n *json.Node) bool { - result := (n.Type == json.DocumentNode) - fmt.Printf("rootSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result) + result := false + if n.Parent != nil && n.Parent.Type == json.DocumentNode { + result = true + } else { + result = (n.Type == json.DocumentNode) + } return result } @@ -124,9 +180,15 @@ func wildcardSelector(n *json.Node) bool { // childSelector creates a selector for c being a child of p func childSelector(p, c Selector) Selector { return func(n *json.Node) bool { - fmt.Printf("childSelector => type: %s, val: %s\n", json.NodeNames[n.Type], n.Data) + /* + for child := n.FirstChild; child != nil; child = child.NextSibling { + if p(n) && c(child) { + return true + } + } + return false + */ result := (c(n) && n.Parent != nil && p(n.Parent)) - fmt.Printf("childSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result) return result } } @@ -135,7 +197,6 @@ func childSelector(p, c Selector) Selector { func nameSelector(k string) Selector { return func(n *json.Node) bool { result := (n.Type == json.ElementNode && n.Data == k) - fmt.Printf("nameSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result) return result } } @@ -159,9 +220,32 @@ func hasRecursiveMatch(n *json.Node, a Selector) bool { return false } +// arrayIndexSelector generates selector for node being idx index of parent +func arrayIndexSelector(idx int64) Selector { + return func(n *json.Node) bool { + if n.DataType != "arrayitem" { + return false + } + if n.Parent == nil { + return false + } + + parent := n.Parent + i := int64(0) + for c := parent.FirstChild; c != nil && i <= idx; c = c.NextSibling { + if i == idx && c == n { + return true + } + i++ + } + return false + } +} + // typeSelector matches a node with type t func typeSelector(t string) Selector { return func(n *json.Node) bool { + // FIXME if n.DataType == t { return true } diff --git a/jsonpath/parser_test.go b/jsonpath/parser_test.go new file mode 100644 index 0000000..2406651 --- /dev/null +++ b/jsonpath/parser_test.go @@ -0,0 +1,92 @@ +package jsonpath + +import ( + //"fmt" + "strings" + "testing" + + "src.userspace.com.au/query/json" +) + +func TestParse(t *testing.T) { + tests := []struct { + path, src, expect string + }{ + { + path: "$", + src: `{"test":"one"}`, + expect: "one", + }, + { + path: "$.test", + src: `{"test":"one"}`, + expect: "one", + }, + { + path: "$.a.b", + src: `{"a":{"b":"two"}}`, + expect: "two", + }, + { + path: "$.a.b.c", + src: `{"a":{"b":{"c":"two"}}}`, + expect: "two", + }, + { + path: "$.a.b", + src: `{"fail":{"a":"one"},"a":{"b":"three"}}`, + expect: "three", + }, + { + path: "$.test.test", + src: `{"fail":{"test1":"one"},"test1":{"test3":"two"}}`, + expect: "", + }, + { + path: "$..test", + src: `{"fail":{"test1":{"test":"two"}}}`, + expect: "two", + }, + { + path: "$.a.b.c.d", + src: `{"a":{"b":{"c":{"d":"blah"}}}}`, + expect: "blah", + }, + { + path: "$.test[2]", + src: `{"test":[1,"two","three"]}`, + expect: "three", + }, + { + path: "$.*", + src: `{"test":"one"}`, + expect: "one", + }, + } + + p := Parser{} + + for _, tt := range tests { + doc, err := json.Parse(strings.NewReader(tt.src)) + if err != nil { + t.Fatalf("json.Parse(%q) failed: %s", tt.src, err) + } + //doc.PrintTree(0) + + sel, err := p.Parse(tt.path) + if err != nil { + t.Fatalf("Parse(%q) failed: %s", tt.path, err) + } + actualText := "" + actual := sel.MatchFirst(doc) + if actual != nil { + actualText = actual.InnerText() + } + + if tt.expect == "" && actualText != "" { + t.Fatalf("MatchFirst(%s) => %s, expected nothing", tt.src, actualText) + } else if actualText != tt.expect { + t.Fatalf("MatchFirst(%s) => %s, expected %s", tt.src, actualText, tt.expect) + } + } +} |
