summaryrefslogtreecommitdiff
path: root/jsonpath
diff options
context:
space:
mode:
Diffstat (limited to 'jsonpath')
-rw-r--r--jsonpath/jsonpath.ebnf233
-rw-r--r--jsonpath/lexer.go14
-rw-r--r--jsonpath/lexer_test.go15
-rw-r--r--jsonpath/nodes.go68
-rw-r--r--jsonpath/parse_test.go39
-rw-r--r--jsonpath/parser.go180
-rw-r--r--jsonpath/parser_test.go92
7 files changed, 479 insertions, 162 deletions
diff --git a/jsonpath/jsonpath.ebnf b/jsonpath/jsonpath.ebnf
new file mode 100644
index 0000000..f84df94
--- /dev/null
+++ b/jsonpath/jsonpath.ebnf
@@ -0,0 +1,233 @@
+path
+ = absolute path
+ | relative path
+ ;
+
+absolute path
+ = "$", [ qualified path ]
+ ;
+
+qualified path
+ = recursive location
+ | relative location
+ ;
+
+recursive location
+ = "..", relative path
+ ;
+
+relative location
+ = ".", relative path
+ ;
+
+relative path
+ = step, [ qualified path ]
+ ;
+
+step
+ = node test, [ predicate ]
+ ;
+
+node test
+ = node type, "(", ")"
+ | name test
+ ;
+
+name test
+ = "*"
+ | name
+ ;
+
+node type
+ = "object"
+ | "array"
+ | "string"
+ | "number"
+ | "boolean"
+ | "null"
+ ;
+
+name
+ = "'", quoted name character, { quoted name character }, "'"
+ = name character, { name character }
+ ;
+
+quoted name character
+ = ? any unicode character except ''' (single quote), unlesss it is quoted with a '\' (backslash) ?
+ ;
+
+name character
+ = ? any unicode character except '.' and '[' ?
+ ;
+
+predicate
+ = "[", predicate expression , "]"
+ ;
+
+predicate expression
+ = wildcard
+ | subscript
+ | slice
+ | union
+ | filter
+ ;
+
+wildcard
+ = "*"
+ ;
+
+subscript
+ = signed integer
+ ;
+
+slice
+ = [ signed integer ], ":", [ signed integer ], [ ":", [ non-zero signed integer ] ]
+ ;
+
+union
+ = integer, ",", integer
+ | union expression, ",", union expression
+ ;
+
+union expression
+ = relative path
+ | filter expression
+ ;
+
+filter
+ = "?(", filter expression, ")"
+ ;
+
+filter expression
+ = or expr
+ ;
+
+or expr
+ = and expr, [ "or", or expr ]
+ ;
+
+and expr
+ = equality expr, [ "and", and expr ]
+ ;
+
+equality expr
+ = relational expr, [ equality op, equality expr ]
+ ;
+
+equality op
+ = "="
+ | "!="
+ ;
+
+relational expr
+ = additive expr, [ relational op, relational expr ]
+ ;
+
+relational op
+ = ">"
+ | "<"
+ | ">="
+ | "<="
+ ;
+
+additive expr
+ = multiplicative expr, [ additive op, additive expr ]
+ ;
+
+additive op
+ = "+"
+ | "-"
+ ;
+
+multiplictive expr
+ = unary expr, [ multiplicative op, multiplicative expr ]
+ ;
+
+multiplicative op
+ = "*"
+ | "/"
+ | "%"
+ ;
+
+unary expr
+ = "@", qualified path
+ | number
+ | string
+ | boolean
+ | "null"
+ ;
+
+number
+ = signed integer, [ fractional part ], [ exponent ]
+ ;
+
+fractional part
+ = ".", digit, { digit }
+ ;
+
+exponent
+ = e, digit, { digit }
+ ;
+
+e
+ = "e" | "E", [ "+" | "-" ]
+ ;
+
+string
+ = """, { character | escape }, """
+ ;
+
+character
+ = ? any unicode character except " or \ or control characters ?
+ ;
+
+escape
+ = "\""
+ | "\\"
+ | "\/"
+ | "\b"
+ | "\f"
+ | "\n"
+ | "\r"
+ | "\t"
+ | "\u", 4 * hex digit
+ ;
+
+hex digit
+ = 2 * ( digit | hex lower | hex upper )
+ ;
+
+boolean
+ = "true"
+ | "false"
+ ;
+
+integer
+ = [ "+" ], ( "0" | digit one to nine, { digit } )
+ ;
+
+signed integer
+ = [ "-" ], integer
+ ;
+
+non-zero signed integer
+ = [ "-" ], digit one to nine, { digit }
+ ;
+
+digit one to nine
+ = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+ ;
+
+digit
+ = "0" | digit one to nine
+ ;
+
+hex lower
+ = "a" | "b" | "c" | "d" | "e" | "f"
+ ;
+
+hex upper
+ = "A" | "B" | "C" | "D" | "E" | "F"
+ ;
+
+(* vim: set ft=ebnf : *)
diff --git a/jsonpath/lexer.go b/jsonpath/lexer.go
index 04fc87a..902b5bf 100644
--- a/jsonpath/lexer.go
+++ b/jsonpath/lexer.go
@@ -79,12 +79,14 @@ func stepState(l *lexer.Lexer) lexer.StateFunc {
for {
switch t := l.Next(); {
case t == '.':
+ // Don't emit dot as it is used for subsequent child
if l.Peek() == '.' {
l.Emit(TRecursive)
return stepState
}
l.Emit(TChildDot)
return childState
+
case t == '[':
l.Emit(TChildStart)
return childState
@@ -109,7 +111,7 @@ func childState(l *lexer.Lexer) lexer.StateFunc {
return childState
case t == '\'' || t == '"':
- // TODO what other characters?
+ // FIXME what other characters?
l.AcceptRun(alphanumeric + "-_")
l.Accept(string(t))
l.Emit(TQuotedName)
@@ -121,7 +123,12 @@ func childState(l *lexer.Lexer) lexer.StateFunc {
l.Emit(TName)
return childState
+ case t == '.':
+ l.Backup()
+ return stepState
+
case t == '[':
+ // FIXME predicate or another child
l.Emit(TPredicateStart)
return predicateState
@@ -129,11 +136,6 @@ func childState(l *lexer.Lexer) lexer.StateFunc {
l.Emit(TChildEnd)
return stepState
- case t == '.':
- l.Backup()
- l.Emit(TChildEnd)
- return stepState
-
case t == lexer.EOFRune:
l.Emit(lexer.EOFToken)
return nil
diff --git a/jsonpath/lexer_test.go b/jsonpath/lexer_test.go
index 504d413..ba43a31 100644
--- a/jsonpath/lexer_test.go
+++ b/jsonpath/lexer_test.go
@@ -46,7 +46,6 @@ func TestValidStates(t *testing.T) {
lexer.Token{Type: TAbsolute, Value: "$"},
lexer.Token{Type: TChildDot, Value: "."},
lexer.Token{Type: TName, Value: "one"},
- lexer.Token{Type: TChildEnd, Value: ""},
lexer.Token{Type: TChildDot, Value: "."},
lexer.Token{Type: TName, Value: "two"},
},
@@ -144,6 +143,20 @@ func TestValidStates(t *testing.T) {
lexer.Token{Type: TChildEnd, Value: "]"},
},
},
+ {
+ path: "$.a.b.c.d",
+ tokens: []lexer.Token{
+ lexer.Token{Type: TAbsolute, Value: "$"},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "a"},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "b"},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "c"},
+ lexer.Token{Type: TChildDot, Value: "."},
+ lexer.Token{Type: TName, Value: "d"},
+ },
+ },
}
for _, tt := range tests {
diff --git a/jsonpath/nodes.go b/jsonpath/nodes.go
deleted file mode 100644
index 816002c..0000000
--- a/jsonpath/nodes.go
+++ /dev/null
@@ -1,68 +0,0 @@
-package jsonpath
-
-import (
- "fmt"
-)
-
-type jsonpath struct {
- absolute bool
- steps []step
-}
-
-func (jp jsonpath) String() string {
- out := ""
- if jp.absolute {
- out = "$"
- }
- for _, s := range jp.steps {
- out += s.String()
- }
- return out
-}
-
-type step struct {
- recursive bool
- selector selector
- predicate *predicate
-}
-
-func (s step) String() string {
- if s.recursive {
- return ".."
- }
- if s.predicate != nil {
- return fmt.Sprintf("%s%s", s.selector.String(), s.predicate.String())
- }
- return s.selector.String()
-}
-
-type selector struct {
- wildcard bool
- value string
-}
-
-func (s selector) String() string {
- if s.wildcard {
- return "[*]"
- }
- return fmt.Sprintf("[%s]", s.value)
-}
-
-type predicate struct {
- pType string
- start int
- end int
- filter string
-}
-
-func (p predicate) String() string {
- switch p.pType {
- case "index":
- return fmt.Sprintf("[%d]", p.start)
- case "range":
- return fmt.Sprintf("[%d:%d]", p.start, p.end)
- case "union":
- // TODO
- }
- return ""
-}
diff --git a/jsonpath/parse_test.go b/jsonpath/parse_test.go
deleted file mode 100644
index db41efe..0000000
--- a/jsonpath/parse_test.go
+++ /dev/null
@@ -1,39 +0,0 @@
-package jsonpath
-
-import (
- "strings"
- "testing"
-
- "src.userspace.com.au/query/json"
-)
-
-func TestParse(t *testing.T) {
- tests := []struct {
- path, src, expect string
- }{
- {
- path: "$.test",
- src: `{"test":"one"}`,
- expect: "one",
- },
- }
-
- p := Parser{}
-
- for _, tt := range tests {
- doc, err := json.Parse(strings.NewReader(tt.src))
- if err != nil {
- t.Errorf("json.Parse(%q) failed: %s", tt.src, err)
- }
-
- sel, err := p.Parse(tt.path)
- if err != nil {
- t.Errorf("Parse(%q) failed: %s", tt.path, err)
- }
- actual := sel.MatchFirst(doc)
- actualText := actual.InnerText()
- if actualText != tt.expect {
- t.Errorf("MatchFirst(%s) => %s, expected %s", tt.src, actualText, tt.expect)
- }
- }
-}
diff --git a/jsonpath/parser.go b/jsonpath/parser.go
index 748479f..5f0cd3a 100644
--- a/jsonpath/parser.go
+++ b/jsonpath/parser.go
@@ -2,9 +2,9 @@ package jsonpath
import (
"fmt"
+ "strconv"
"strings"
- //base "src.userspace.com.au/query"
"src.userspace.com.au/query/json"
"src.userspace.com.au/query/lexer"
)
@@ -24,95 +24,151 @@ func (p *Parser) next() (done bool) {
if p.tok != nil {
p.pos = p.tok.Position
}
- fmt.Printf("%s(%d): '%s'\n", tokenNames[p.tok.Type], p.tok.Type, p.tok.Value)
return p.tok != nil && !done
}
func (p *Parser) Parse(input string) (Selector, error) {
+ var sel, nr Selector
+ var err error
+
p.l = lexer.New(input, pathState)
p.l.Start()
// First token
p.next()
+
if p.tok.Type != TAbsolute {
+ // TODO does jsonpath have relative searches
return nil, fmt.Errorf("expected root, got %s", p.tok.Value)
}
- result, err := p.parseQualifiedSelector()
- if err != nil {
- return nil, err
+ p.next()
+
+ if p.tok.Type == lexer.EOFToken {
+ return rootSelector, nil
+ }
+
+ sel = rootSelector
+
+ for {
+ switch p.tok.Type {
+ case TRecursive:
+ p.next()
+ p.next()
+ if nr, err = p.parseStepSelector(); err != nil {
+ return nil, err
+ }
+ sel = recursiveSelector(nr)
+
+ case TChildDot:
+ p.next()
+ if nr, err = p.parseStepSelector(); err != nil {
+ return nil, err
+ }
+ sel = childSelector(sel, nr)
+ default:
+ return sel, nil
+ }
}
- return childSelector(rootSelector, result), nil
+ panic("unreachable")
}
-// parseQualifiedSelector
-func (p *Parser) parseQualifiedSelector() (result Selector, err error) {
+func (p *Parser) parseStepSelector() (Selector, error) {
+ var sel, nr Selector
+ var err error
+
+ sel = p.parseNodeTestSelector()
p.next()
switch p.tok.Type {
- case TRecursive:
- nr, _ := p.parseStepSelector()
- result = recursiveSelector(nr)
+ case TPredicateStart:
+ p.next()
+ if nr, err = p.parsePredicateExprSelector(); err != nil {
+ return nil, err
+ }
+ sel = childSelector(sel, nr)
- case TChildDot, TChildStart:
- result, err = p.parseStepSelector()
+ case lexer.EOFToken:
+ return sel, nil
default:
- return nil, fmt.Errorf("expected . or .. or something, got %s", p.tok.Value)
}
- return result, nil
+ return sel, nil
}
-func (p *Parser) parseStepSelector() (result Selector, err error) {
- p.next()
- result, err = p.parseNodeTestSelector()
- if err != nil {
- return nil, err
- }
- p.next()
- if p.tok.Type == TPredicateStart {
- // TODO
- }
- return result, nil
-}
-
-func (p *Parser) parseNodeTestSelector() (result Selector, err error) {
+func (p *Parser) parseNodeTestSelector() (sel Selector) {
switch p.tok.Type {
case TName:
/*
switch p.tok.Value {
case "object", "array", "string", "number", "boolean", "null":
// TODO
- //result = typeSelector(p.tok.Value)
+ //sel = typeSelector(p.tok.Value)
default:
}
*/
- result = nameSelector(p.tok.Value)
+ sel = nameSelector(p.tok.Value)
+
+ case TQuotedName:
+ sel = nameSelector(strings.Trim(p.tok.Value, `"'`))
+
case TWildcard:
- result = wildcardSelector
+ sel = wildcardSelector
+
default:
- fmt.Println("here: ", tokenNames[p.tok.Type])
}
- return result, err
+ return sel
}
-func (p *Parser) parseChildSelector() Selector {
- var result Selector
- p.next()
- switch p.tok.Type {
- case TQuotedName:
- result = nameSelector(strings.Trim(p.tok.Value, `"'`))
- case TName:
- result = nameSelector(p.tok.Value)
+func (p *Parser) parsePredicateExprSelector() (Selector, error) {
+ var err error
+
+ if p.tok.Type != TNumber {
+ return nil, fmt.Errorf("expecting number")
+ }
+
+ num, err := strconv.ParseInt(p.tok.Value, 10, 64)
+ if err != nil {
+ return nil, err
+ }
+ return arrayIndexSelector(num), nil
+ /* TODO
+ var els []int64
+ if p.tok.Type == TPredicateEnd {
+ return arrayIndexSelector(num), nil
}
+
+ els = append(els, num)
+
p.next()
- return result
+
+ if p.tok.Type == TRange {
+ // FIXME
+ p.next()
+ num, err := strconv.ParseInt(p.tok.Value, 10, 64)
+ if err != nil {
+ return nil, err
+ }
+ els = append(els, num)
+ // We have start and finish range
+ }
+ if p.tok.Type == TUnion {
+ // FIXME
+ }
+ }
+ return childSelector(rootSelector, arrayIndexSelector(idx)), nil
+ return sel, nil
+ */
}
// rootSelector checks node is root
func rootSelector(n *json.Node) bool {
- result := (n.Type == json.DocumentNode)
- fmt.Printf("rootSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result)
+ result := false
+ if n.Parent != nil && n.Parent.Type == json.DocumentNode {
+ result = true
+ } else {
+ result = (n.Type == json.DocumentNode)
+ }
return result
}
@@ -124,9 +180,15 @@ func wildcardSelector(n *json.Node) bool {
// childSelector creates a selector for c being a child of p
func childSelector(p, c Selector) Selector {
return func(n *json.Node) bool {
- fmt.Printf("childSelector => type: %s, val: %s\n", json.NodeNames[n.Type], n.Data)
+ /*
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ if p(n) && c(child) {
+ return true
+ }
+ }
+ return false
+ */
result := (c(n) && n.Parent != nil && p(n.Parent))
- fmt.Printf("childSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result)
return result
}
}
@@ -135,7 +197,6 @@ func childSelector(p, c Selector) Selector {
func nameSelector(k string) Selector {
return func(n *json.Node) bool {
result := (n.Type == json.ElementNode && n.Data == k)
- fmt.Printf("nameSelector => type: %s, val: %s, result: %t\n", json.NodeNames[n.Type], n.Data, result)
return result
}
}
@@ -159,9 +220,32 @@ func hasRecursiveMatch(n *json.Node, a Selector) bool {
return false
}
+// arrayIndexSelector generates selector for node being idx index of parent
+func arrayIndexSelector(idx int64) Selector {
+ return func(n *json.Node) bool {
+ if n.DataType != "arrayitem" {
+ return false
+ }
+ if n.Parent == nil {
+ return false
+ }
+
+ parent := n.Parent
+ i := int64(0)
+ for c := parent.FirstChild; c != nil && i <= idx; c = c.NextSibling {
+ if i == idx && c == n {
+ return true
+ }
+ i++
+ }
+ return false
+ }
+}
+
// typeSelector matches a node with type t
func typeSelector(t string) Selector {
return func(n *json.Node) bool {
+ // FIXME
if n.DataType == t {
return true
}
diff --git a/jsonpath/parser_test.go b/jsonpath/parser_test.go
new file mode 100644
index 0000000..2406651
--- /dev/null
+++ b/jsonpath/parser_test.go
@@ -0,0 +1,92 @@
+package jsonpath
+
+import (
+ //"fmt"
+ "strings"
+ "testing"
+
+ "src.userspace.com.au/query/json"
+)
+
+func TestParse(t *testing.T) {
+ tests := []struct {
+ path, src, expect string
+ }{
+ {
+ path: "$",
+ src: `{"test":"one"}`,
+ expect: "one",
+ },
+ {
+ path: "$.test",
+ src: `{"test":"one"}`,
+ expect: "one",
+ },
+ {
+ path: "$.a.b",
+ src: `{"a":{"b":"two"}}`,
+ expect: "two",
+ },
+ {
+ path: "$.a.b.c",
+ src: `{"a":{"b":{"c":"two"}}}`,
+ expect: "two",
+ },
+ {
+ path: "$.a.b",
+ src: `{"fail":{"a":"one"},"a":{"b":"three"}}`,
+ expect: "three",
+ },
+ {
+ path: "$.test.test",
+ src: `{"fail":{"test1":"one"},"test1":{"test3":"two"}}`,
+ expect: "",
+ },
+ {
+ path: "$..test",
+ src: `{"fail":{"test1":{"test":"two"}}}`,
+ expect: "two",
+ },
+ {
+ path: "$.a.b.c.d",
+ src: `{"a":{"b":{"c":{"d":"blah"}}}}`,
+ expect: "blah",
+ },
+ {
+ path: "$.test[2]",
+ src: `{"test":[1,"two","three"]}`,
+ expect: "three",
+ },
+ {
+ path: "$.*",
+ src: `{"test":"one"}`,
+ expect: "one",
+ },
+ }
+
+ p := Parser{}
+
+ for _, tt := range tests {
+ doc, err := json.Parse(strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatalf("json.Parse(%q) failed: %s", tt.src, err)
+ }
+ //doc.PrintTree(0)
+
+ sel, err := p.Parse(tt.path)
+ if err != nil {
+ t.Fatalf("Parse(%q) failed: %s", tt.path, err)
+ }
+ actualText := ""
+ actual := sel.MatchFirst(doc)
+ if actual != nil {
+ actualText = actual.InnerText()
+ }
+
+ if tt.expect == "" && actualText != "" {
+ t.Fatalf("MatchFirst(%s) => %s, expected nothing", tt.src, actualText)
+ } else if actualText != tt.expect {
+ t.Fatalf("MatchFirst(%s) => %s, expected %s", tt.src, actualText, tt.expect)
+ }
+ }
+}