R interface to jq, a JSON processor http://stedolan.github.io/jq/
jqr
makes it easy to process large amounts of json without having to convert from json to R, or without using regular expressions. This means that the eventual loading into R can be quicker.
The jq
command line examples from the jq tutorial work exactly the same in R!
library(curl)
library(jqr)
curl('https://api.github.com/repos/ropensci/jqr/commits?per_page=5') %>%
jq('.[] | {message: .commit.message, name: .commit.committer.name}')
#> [
#> {
#> "message": "Bump Travis",
#> "name": "Jeroen Ooms"
#> },
#> {
#> "message": "Fix for GCC-8 stringop-truncation warning",
#> "name": "Jeroen Ooms"
#> },
#> {
#> "message": "update cran comments",
#> "name": "Scott Chamberlain"
#> },
#> {
#> "message": "tweaks to man files",
#> "name": "Scott Chamberlain"
#> },
#> {
#> "message": "Fix travis file?",
#> "name": "Jeroen"
#> }
#> ]
Try running some of the other examples.
Binary packages for OS-X or Windows can be installed directly from CRAN:
Installation from source on Linux or OSX requires libjq
. On Ubuntu 14.04 and 16.04 lower use libjq-dev from Launchpad:
sudo add-apt-repository -y ppa:opencpu/jq
sudo apt-get update -q
sudo apt-get install -y libjq-dev
More recent Debian or Ubuntu install libjq-dev directly from Universe:
sudo apt-get install -y libjq-dev
On Fedora we need jq-devel:
sudo yum install jq-devel
On CentOS / RHEL we install jq-devel via EPEL:
sudo yum install epel-release
sudo yum install jq-devel
On OS-X use jq from Homebrew:
brew install jq
On Solaris we can have libjq_dev from OpenCSW:
pkgadd -d http://get.opencsw.org/now
/opt/csw/bin/pkgutil -U
/opt/csw/bin/pkgutil -y -i libjq_dev
There’s a low level interface in which you can execute jq
code just as you would on the command line:
jq(str, ".[]")
#> [
#> {
#> "foo": 1,
#> "bar": 2
#> },
#> {
#> "foo": 3,
#> "bar": 4
#> },
#> {
#> "foo": 5,
#> "bar": 6
#> }
#> ]
jq(str, "[.[] | {name: .foo} | keys]")
#> [
#> [
#> "name"
#> ],
#> [
#> "name"
#> ],
#> [
#> "name"
#> ]
#> ]
Note that we print the output to look like a valid JSON object to make it easier to look at. However, it’s a simple character string or vector of strings. A trick you can do is to wrap your jq program in brackets like [.[]]
instead of .[]
, e.g.,
jq(str, ".[]") %>% unclass
#> [1] "{\"foo\":1,\"bar\":2}" "{\"foo\":3,\"bar\":4}" "{\"foo\":5,\"bar\":6}"
# vs.
jq(str, "[.[]]") %>% unclass
#> [1] "[{\"foo\":1,\"bar\":2},{\"foo\":3,\"bar\":4},{\"foo\":5,\"bar\":6}]"
Combine many jq arguments - they are internally combined with a pipe |
(note how these are identical)
jq(str, ".[] | {name: .foo} | keys")
#> [
#> [
#> "name"
#> ],
#> [
#> "name"
#> ],
#> [
#> "name"
#> ]
#> ]
jq(str, ".[]", "{name: .foo}", "keys")
#> [
#> [
#> "name"
#> ],
#> [
#> "name"
#> ],
#> [
#> "name"
#> ]
#> ]
Also accepts many JSON inputs now
jq("[123, 456] [77, 88, 99]", ".[]")
#> [
#> 123,
#> 456,
#> 77,
#> 88,
#> 99
#> ]
jq('{"foo": 77} {"bar": 45}', ".[]")
#> [
#> 77,
#> 45
#> ]
jq('[{"foo": 77, "stuff": "things"}] [{"bar": 45}] [{"n": 5}]', ".[] | keys")
#> [
#> [
#> "foo",
#> "stuff"
#> ],
#> [
#> "bar"
#> ],
#> [
#> "n"
#> ]
#> ]
# if you have jsons in a vector
jsons <- c('[{"foo": 77, "stuff": "things"}]', '[{"bar": 45}]', '[{"n": 5}]')
jq(paste0(jsons, collapse = " "), ".[]")
#> [
#> {
#> "foo": 77,
#> "stuff": "things"
#> },
#> {
#> "bar": 45
#> },
#> {
#> "n": 5
#> }
#> ]
The other is higher level, and uses a suite of functions to construct queries. Queries are constucted, then excuted internally with jq()
after the last piped command.
You don’t have to use pipes though. See examples below.
Examples:
Index
x <- '[{"message": "hello", "name": "jenn"}, {"message": "world", "name": "beth"}]'
x %>% index()
#> [
#> {
#> "message": "hello",
#> "name": "jenn"
#> },
#> {
#> "message": "world",
#> "name": "beth"
#> }
#> ]
Sort
reverse order
Show the query to be used using peek()
x <- '{"user":"stedolan","titles":["JQ Primer", "More JQ"]}'
jq(x, '{user, title: .titles[]}')
#> [
#> {
#> "user": "stedolan",
#> "title": "JQ Primer"
#> },
#> {
#> "user": "stedolan",
#> "title": "More JQ"
#> }
#> ]
x %>% index()
#> [
#> "stedolan",
#> [
#> "JQ Primer",
#> "More JQ"
#> ]
#> ]
x %>% build_object(user, title = `.titles[]`)
#> [
#> {
#> "user": "stedolan",
#> "title": "JQ Primer"
#> },
#> {
#> "user": "stedolan",
#> "title": "More JQ"
#> }
#> ]
jq(x, '{user, title: .titles[]}') %>% jsonlite::toJSON() %>% jsonlite::validate()
#> [1] TRUE
join
ltrimstr
'["fo", "foo", "barfoo", "foobar", "afoo"]' %>% index() %>% ltrimstr(foo)
#> [
#> "fo",
#> "",
#> "barfoo",
#> "bar",
#> "afoo"
#> ]
rtrimstr
'["fo", "foo", "barfoo", "foobar", "foob"]' %>% index() %>% rtrimstr(foo)
#> [
#> "fo",
#> "",
#> "bar",
#> "foobar",
#> "foob"
#> ]
startswith
'["fo", "foo", "barfoo", "foobar", "barfoob"]' %>% index %>% startswith(foo)
#> [
#> false,
#> true,
#> false,
#> true,
#> false
#> ]
'["fo", "foo"] ["barfoo", "foobar", "barfoob"]' %>% index %>% startswith(foo)
#> [
#> false,
#> true,
#> false,
#> true,
#> false
#> ]
endswith
'["fo", "foo", "barfoo", "foobar", "barfoob"]' %>% index %>% endswith(foo)
#> [
#> false,
#> true,
#> true,
#> false,
#> false
#> ]
tojson, fromjson, tostring
'[1, "foo", ["foo"]]' %>% index
#> [
#> 1,
#> "foo",
#> [
#> "foo"
#> ]
#> ]
'[1, "foo", ["foo"]]' %>% index %>% tostring
#> [
#> "1",
#> "foo",
#> "[\"foo\"]"
#> ]
'[1, "foo", ["foo"]]' %>% index %>% tojson
#> [
#> "1",
#> "\"foo\"",
#> "[\"foo\"]"
#> ]
'[1, "foo", ["foo"]]' %>% index %>% tojson %>% fromjson
#> [
#> 1,
#> "foo",
#> [
#> "foo"
#> ]
#> ]
contains
unique
With filtering via select()
you can use various operators, like ==
, &&
, ||
. We translate these internally for you to what jq
wants to see (==
, and
, or
).
Simple, one condition
More complicated. Combine more than one condition; combine each individual filtering task in parentheses
x <- '{"foo": 4, "bar": 2} {"foo": 5, "bar": 4} {"foo": 8, "bar": 12}'
x %>% select((.foo < 6) && (.bar > 3))
#> {
#> "foo": 5,
#> "bar": 4
#> }
x %>% select((.foo < 6) || (.bar > 3))
#> [
#> {
#> "foo": 4,
#> "bar": 2
#> },
#> {
#> "foo": 5,
#> "bar": 4
#> },
#> {
#> "foo": 8,
#> "bar": 12
#> }
#> ]
get type information for each element
'[0, false, [], {}, null, "hello"]' %>% types
#> [
#> "number",
#> "boolean",
#> "array",
#> "object",
#> "null",
#> "string"
#> ]
'[0, false, [], {}, null, "hello", true, [1,2,3]]' %>% types
#> [
#> "number",
#> "boolean",
#> "array",
#> "object",
#> "null",
#> "string",
#> "boolean",
#> "array"
#> ]
select elements by type
get keys
delete by key name
check for key existence
str3 <- '[[0,1], ["a","b","c"]]'
str3 %>% haskey(2)
#> [
#> false,
#> true
#> ]
str3 %>% haskey(1,2)
#> [
#> true,
#> false,
#> true,
#> true
#> ]
Build an object, selecting variables by name, and rename
More complicated build_object()
, using the included dataset commits
commits %>%
index() %>%
build_object(sha = .sha, name = .commit.committer.name)
#> [
#> {
#> "sha": [
#> "110e009996e1359d25b8e99e71f83b96e5870790"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "7b6a018dff623a4f13f6bcd52c7c56d9b4a4165f"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "a50e548cc5313c187483bc8fb1b95e1798e8ef65"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "4b258f7d31b34ff5d45fba431169e7fd4c995283"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "d1cb8ee0ad3ddf03a37394bfa899cfd3ddd007c5"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> }
#> ]
'{"a": 7}' %>% do(.a + 1)
#> 8
'{"a": [1,2], "b": [3,4]}' %>% do(.a + .b)
#> [
#> 1,
#> 2,
#> 3,
#> 4
#> ]
'{"a": [1,2], "b": [3,4]}' %>% do(.a - .b)
#> [
#> 1,
#> 2
#> ]
'{"a": 3}' %>% do(4 - .a)
#> 1
'["xml", "yaml", "json"]' %>% do('. - ["xml", "yaml"]')
#> ". - [\"xml\", \"yaml\"]"
'5' %>% do(10 / . * 3)
#> 6
comparisons
'[5,4,2,7]' %>% index() %>% do(. < 4)
#> [
#> false,
#> false,
#> true,
#> false
#> ]
'[5,4,2,7]' %>% index() %>% do(. > 4)
#> [
#> true,
#> false,
#> false,
#> true
#> ]
'[5,4,2,7]' %>% index() %>% do(. <= 4)
#> [
#> false,
#> true,
#> true,
#> false
#> ]
'[5,4,2,7]' %>% index() %>% do(. >= 4)
#> [
#> true,
#> true,
#> false,
#> true
#> ]
'[5,4,2,7]' %>% index() %>% do(. == 4)
#> [
#> false,
#> true,
#> false,
#> false
#> ]
'[5,4,2,7]' %>% index() %>% do(. != 4)
#> [
#> true,
#> false,
#> true,
#> true
#> ]
length
sqrt
floor
find minimum
'[5,4,2,7]' %>% minj
#> 2
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% minj
#> {
#> "foo": 2,
#> "bar": 3
#> }
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% minj(foo)
#> {
#> "foo": 1,
#> "bar": 14
#> }
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% minj(bar)
#> {
#> "foo": 2,
#> "bar": 3
#> }
find maximum
'[5,4,2,7]' %>% maxj
#> 7
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% maxj
#> {
#> "foo": 1,
#> "bar": 14
#> }
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% maxj(foo)
#> {
#> "foo": 2,
#> "bar": 3
#> }
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% maxj(bar)
#> {
#> "foo": 1,
#> "bar": 14
#> }
jq
sometimes creates pieces of JSON that are valid in themselves, but together are not. combine()
is a way to make valid JSON.
This outputs a few pieces of JSON
(x <- commits %>%
index() %>%
build_object(sha = .sha, name = .commit.committer.name))
#> [
#> {
#> "sha": [
#> "110e009996e1359d25b8e99e71f83b96e5870790"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "7b6a018dff623a4f13f6bcd52c7c56d9b4a4165f"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "a50e548cc5313c187483bc8fb1b95e1798e8ef65"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "4b258f7d31b34ff5d45fba431169e7fd4c995283"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "d1cb8ee0ad3ddf03a37394bfa899cfd3ddd007c5"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> }
#> ]
Use combine()
to put them together.
combine(x)
#> [
#> {
#> "sha": [
#> "110e009996e1359d25b8e99e71f83b96e5870790"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "7b6a018dff623a4f13f6bcd52c7c56d9b4a4165f"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "a50e548cc5313c187483bc8fb1b95e1798e8ef65"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "4b258f7d31b34ff5d45fba431169e7fd4c995283"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> },
#> {
#> "sha": [
#> "d1cb8ee0ad3ddf03a37394bfa899cfd3ddd007c5"
#> ],
#> "name": [
#> "Nicolas Williams"
#> ]
#> }
#> ]
jqr
in R doing citation(package = 'jqr')