diff --git a/DESCRIPTION b/DESCRIPTION index 7c44b1a..cfbf20b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: jsonify Type: Package Title: Convert Between 'R' Objects and Javascript Object Notation (JSON) -Version: 1.0.0001 +Version: 1.0.0002 Date: 2019-11-03 Authors@R: c( person("David", "Cooley", ,"dcooley@symbolix.com.au", role = c("aut", "cre")), diff --git a/R/scratch.R b/R/scratch.R index dd410eb..c8a645c 100644 --- a/R/scratch.R +++ b/R/scratch.R @@ -37,6 +37,7 @@ # ) # # cat( jsonify:::rcpp_to_ndjson(lst, FALSE, -1L, TRUE, TRUE, "row") ) +# cat( jsonify:::rcpp_to_ndjson(lst, FALSE, -1L, TRUE, TRUE, "column") ) # # N <- 1e6 # data <- data.frame( diff --git a/R/to_json.R b/R/to_json.R index 004b467..5ab3f24 100644 --- a/R/to_json.R +++ b/R/to_json.R @@ -56,6 +56,47 @@ to_json <- function( x, unbox = FALSE, digits = NULL, numeric_dates = TRUE, #' #' Converts R objects to ndjson #' +#' @inheritParams to_json +#' +#' @details +#' +#' Lists are converted to ndjson non-recursively. That is, each of the objects +#' in the list at the top level are converted to a new-line JSON object. Any nested +#' sub-elements are then contained within that JSON object. See examples +#' +#' @examples +#' +#' to_ndjson( 1:5 ) +#' to_ndjson( letters ) +#' +#' mat <- matrix(1:6, ncol = 2) +#' +#' to_ndjson( x = mat ) +#' to_ndjson( x = mat, by = "col" ) +#' +#' df <- data.frame( +#' x = 1:5 +#' , y = letters[1:5] +#' , z = as.Date(seq(18262, 18262 + 4, by = 1 ), origin = "1970-01-01" ) +#' ) +#' +#' to_ndjson( x = df ) +#' to_ndjson( x = df, numeric_dates = FALSE ) +#' to_ndjson( x = df, factors_as_string = FALSE ) +#' to_ndjson( x = df, by = "column" ) +#' +#' ## Lists are non-recurisve; only elements `x` and `y` are converted to ndjson +#' lst <- list( +#' x = 1:5 +#' , y = list( +#' a = letters[1:5] +#' , b = data.frame(i = 10:15, j = 20:25) +#' ) +#' ) +#' +#' to_ndjson( x = lst ) +#' to_ndjson( x = lst, by = "column") +#' #' #' @export to_ndjson <- function( x, unbox = FALSE, digits = NULL, numeric_dates = TRUE, @@ -67,7 +108,7 @@ to_ndjson <- function( x, unbox = FALSE, digits = NULL, numeric_dates = TRUE, } handle_digits <- function( digits ) { - if( is.null( digits ) ) return(-1) + if( is.null( digits ) ) return(-1L) return( as.integer( digits ) ) } diff --git a/inst/include/jsonify/to_json/api.hpp b/inst/include/jsonify/to_json/api.hpp index 57d5012..f9db291 100644 --- a/inst/include/jsonify/to_json/api.hpp +++ b/inst/include/jsonify/to_json/api.hpp @@ -33,8 +33,9 @@ namespace api { // // loop over rows or columns, // } - inline Rcpp::StringVector to_ndjson( + inline void to_ndjson( Rcpp::DataFrame& df, + std::ostringstream& os, bool unbox = false, int digits = -1, bool numeric_dates = true, @@ -48,7 +49,7 @@ namespace api { Rcpp::StringVector column_names = df.names(); bool in_data_frame = true; - std::ostringstream os; // for storing the final string of ndjson + //std::ostringstream os; // for storing the final string of ndjson if( by == "row" ) { @@ -107,12 +108,13 @@ namespace api { } - Rcpp::StringVector sv = os.str(); - return sv; + // Rcpp::StringVector sv = os.str(); + // return sv; } - inline Rcpp::StringVector to_ndjson( + inline void to_ndjson( Rcpp::LogicalMatrix mat, + std::ostringstream& os, bool unbox = false, std::string by = "row" ) { @@ -121,7 +123,7 @@ namespace api { R_xlen_t n_col = mat.ncol(); R_xlen_t i; - std::ostringstream os; // for storing the final string of ndjson + //std::ostringstream os; // for storing the final string of ndjson if( by == "row" ) { @@ -154,12 +156,13 @@ namespace api { } else { Rcpp::stop("jsonify - expecting matrix operatinos by row or column"); } - Rcpp::StringVector sv = os.str(); - return sv; + // Rcpp::StringVector sv = os.str(); + // return sv; } - inline Rcpp::StringVector to_ndjson( + inline void to_ndjson( Rcpp::IntegerMatrix mat, + std::ostringstream& os, bool unbox = false, std::string by = "row" ) { @@ -168,7 +171,7 @@ namespace api { R_xlen_t n_col = mat.ncol(); R_xlen_t i; - std::ostringstream os; // for storing the final string of ndjson + //std::ostringstream os; // for storing the final string of ndjson if( by == "row" ) { @@ -201,13 +204,14 @@ namespace api { } else { Rcpp::stop("jsonify - expecting matrix operatinos by row or column"); } - Rcpp::StringVector sv = os.str(); - return sv; + // Rcpp::StringVector sv = os.str(); + // return sv; } - inline Rcpp::StringVector to_ndjson( + inline void to_ndjson( Rcpp::NumericMatrix mat, + std::ostringstream& os, bool unbox = false, int digits = -1, std::string by = "row" @@ -217,7 +221,7 @@ namespace api { R_xlen_t n_col = mat.ncol(); R_xlen_t i; - std::ostringstream os; // for storing the final string of ndjson + //std::ostringstream os; // for storing the final string of ndjson if( by == "row" ) { @@ -250,12 +254,13 @@ namespace api { } else { Rcpp::stop("jsonify - expecting matrix operatinos by row or column"); } - Rcpp::StringVector sv = os.str(); - return sv; + // Rcpp::StringVector sv = os.str(); + // return sv; } inline Rcpp::StringVector to_ndjson( Rcpp::StringMatrix mat, + std::ostringstream& os, bool unbox = false, std::string by = "row" ) { @@ -264,7 +269,7 @@ namespace api { R_xlen_t n_col = mat.ncol(); R_xlen_t i; - std::ostringstream os; // for storing the final string of ndjson + //std::ostringstream os; // for storing the final string of ndjson if( by == "row" ) { @@ -295,12 +300,54 @@ namespace api { } } else { - Rcpp::stop("jsonify - expecting matrix operatinos by row or column"); + Rcpp::stop("jsonify - expecting matrix operations by row or column"); + } + // Rcpp::StringVector sv = os.str(); + // return sv; + } + + inline void to_ndjson( + Rcpp::List& lst, + std::ostringstream& os, + bool unbox = false, + int digits = -1, + bool numeric_dates = true, + bool factors_as_string = true, + std::string by = "row" + ) { + R_xlen_t n = lst.size(); + R_xlen_t i; + + for( i = 0; i < n; ++i ) { + rapidjson::StringBuffer sb; + rapidjson::Writer < rapidjson::StringBuffer > writer( sb ); + SEXP s = lst[ i ]; + jsonify::writers::complex::write_value( writer, s, unbox, digits, numeric_dates, factors_as_string, by ); + os << sb.GetString(); + os << '\n'; } - Rcpp::StringVector sv = os.str(); - return sv; + + } + + template < int RTYPE > + inline void to_ndjson( + Rcpp::Vector< RTYPE > obj, + std::ostringstream& os, + bool unbox = false, + int digits = -1, + bool numeric_dates = true, + bool factors_as_string = true + ) { + + rapidjson::StringBuffer sb; + rapidjson::Writer < rapidjson::StringBuffer > writer( sb ); + jsonify::writers::simple::write_value( writer, obj, unbox, digits, numeric_dates, factors_as_string ); + os << sb.GetString(); + os << '\n'; + } + // lists are non-recursive; only the first element is ndjsonified... inline Rcpp::StringVector to_ndjson( SEXP obj, bool unbox = false, @@ -310,111 +357,69 @@ namespace api { std::string by = "row" ) { - // rapidjson::StringBuffer sb; - // rapidjson::Writer < rapidjson::StringBuffer > writer( sb ); - // - // std::ostringstream os; // for storing the final string of ndjson + std::ostringstream os; // for storing the final string of ndjson - // TODO - // the way ndjson is created depends on the type of input object - // a list will be element-wise - // data.frame / matrix will be whatever 'by' is set - // other cases not handled? - // - // given the rapidjson DOM doesn't accept '\n' characters, - // I may have to iterate over the object and create one json object at a time - // and output to an OSStream, append '\n', - // then at the end convert to StringVector. switch( TYPEOF( obj ) ) { case LGLSXP: { if( !Rf_isMatrix( obj ) ) { - Rcpp::LogicalVector lv = Rcpp::as< Rcpp::LogicalVector >( obj ); - std::ostringstream os; // for storing the final string of ndjson - - rapidjson::StringBuffer sb; - rapidjson::Writer < rapidjson::StringBuffer > writer( sb ); - jsonify::writers::simple::write_value( writer, lv, unbox ); - - os << sb.GetString(); - os << '\n'; - Rcpp::StringVector res = os.str(); - return res; + + to_ndjson< LGLSXP >( obj, os, unbox, digits, numeric_dates, factors_as_string ); + } else { Rcpp::LogicalMatrix lm = Rcpp::as< Rcpp::LogicalMatrix >( obj ); - return to_ndjson( lm, unbox, by ); + to_ndjson( lm, os, unbox, by ); } + break; } case INTSXP: { if( !Rf_isMatrix( obj ) ) { - Rcpp::IntegerVector iv = Rcpp::as< Rcpp::IntegerVector >( obj ); - std::ostringstream os; // for storing the final string of ndjson - - rapidjson::StringBuffer sb; - rapidjson::Writer < rapidjson::StringBuffer > writer( sb ); - jsonify::writers::simple::write_value( writer, iv, unbox ); + + to_ndjson< INTSXP >( obj, os, unbox, digits, numeric_dates, factors_as_string ); - os << sb.GetString(); - os << '\n'; - Rcpp::StringVector res = os.str(); - return res; } else { Rcpp::IntegerMatrix im = Rcpp::as< Rcpp::IntegerMatrix >( obj ); - return to_ndjson( im, unbox, by ); + to_ndjson( im, os, unbox, by ); + } + break; } case REALSXP: { if( !Rf_isMatrix( obj ) ) { - Rcpp::NumericVector nv = Rcpp::as< Rcpp::NumericVector >( obj ); - std::ostringstream os; // for storing the final string of ndjson - - rapidjson::StringBuffer sb; - rapidjson::Writer < rapidjson::StringBuffer > writer( sb ); - jsonify::writers::simple::write_value( writer, nv, unbox, digits ); + to_ndjson< REALSXP >( obj, os, unbox, digits, numeric_dates, factors_as_string ); - os << sb.GetString(); - os << '\n'; - Rcpp::StringVector res = os.str(); - return res; } else { Rcpp::NumericMatrix nm = Rcpp::as< Rcpp::NumericMatrix >( obj ); - return to_ndjson( nm, unbox, digits, by ); + to_ndjson( nm, os, unbox, digits, by ); + } + break; } case STRSXP: { if( !Rf_isMatrix( obj ) ) { - Rcpp::StringVector sv = Rcpp::as< Rcpp::StringVector >( obj ); - - std::ostringstream os; // for storing the final string of ndjson - - rapidjson::StringBuffer sb; - rapidjson::Writer < rapidjson::StringBuffer > writer( sb ); - jsonify::writers::simple::write_value( writer, sv, unbox ); - os << sb.GetString(); - os << '\n'; - Rcpp::StringVector res = os.str(); - return res; + to_ndjson< STRSXP >( obj, os, unbox, digits, numeric_dates, factors_as_string ); } else { Rcpp::StringMatrix sm = Rcpp::as< Rcpp::StringMatrix >( obj ); - return to_ndjson( sm, unbox, by ); + to_ndjson( sm, os, unbox, by ); + } + break; } case VECSXP: { if( Rf_inherits( obj, "data.frame") ) { + Rcpp::DataFrame df = Rcpp::as< Rcpp::DataFrame >( obj ); - return to_ndjson( df, unbox, digits, numeric_dates, factors_as_string, by ); + to_ndjson( df, os, unbox, digits, numeric_dates, factors_as_string, by ); + } else { // list Rcpp::List lst = Rcpp::as< Rcpp::List >( obj ); - R_xlen_t n = lst.size(); - R_xlen_t i; - for( i = 0; i < n; ++i ) { - SEXP s = lst[ i ]; - return to_ndjson( s, unbox, digits, numeric_dates, factors_as_string, by ); - } + to_ndjson( lst, os, unbox, digits, numeric_dates, factors_as_string, by ); + } + break; } default: { Rcpp::stop("jsonify - expecting a matrix, data.frame or list"); @@ -422,12 +427,9 @@ namespace api { } - return Rcpp::StringVector::create(); - - //rapidjson::StringBuffer sb; - //rapidjson::Writer < rapidjson::StringBuffer > writer( sb ); - //jsonify::writers::complex::write_value( writer, lst, unbox, digits, numeric_dates, factors_as_string, by ); - //return jsonify::utils::finalise_json( sb ); + Rcpp::StringVector js = os.str(); + js.attr("class") = "json"; + return js; } diff --git a/inst/include/jsonify/to_json/writers/ndjson.hpp b/inst/include/jsonify/to_json/writers/ndjson.hpp new file mode 100644 index 0000000..e69de29 diff --git a/inst/include/jsonify/to_json/writers/simple.hpp b/inst/include/jsonify/to_json/writers/simple.hpp index 7a924ef..120529f 100644 --- a/inst/include/jsonify/to_json/writers/simple.hpp +++ b/inst/include/jsonify/to_json/writers/simple.hpp @@ -342,14 +342,14 @@ namespace simple { write_value( writer, lv, unbox ); break; } - case STRSXP: { + default: { Rcpp::StringVector sv = Rcpp::as< Rcpp::StringVector >( sexp ); write_value( writer, sv, unbox ); break; } - default: { - Rcpp::stop("Unknown R object type"); - } + // default: { + // Rcpp::stop("Unknown R object type"); + // } } } @@ -491,7 +491,7 @@ namespace simple { bool unbox = false ) { - Rcpp::LogicalVector this_row = mat(row, Rcpp::_); + Rcpp::LogicalVector this_row = mat( row, Rcpp::_ ); write_value( writer, this_row, unbox ); } diff --git a/man/to_ndjson.Rd b/man/to_ndjson.Rd index ce238c4..f41e84b 100644 --- a/man/to_ndjson.Rd +++ b/man/to_ndjson.Rd @@ -7,6 +7,63 @@ to_ndjson(x, unbox = FALSE, digits = NULL, numeric_dates = TRUE, factors_as_string = TRUE, by = "row") } +\arguments{ +\item{x}{object to convert to JSON} + +\item{unbox}{logical indicating if single-value arrays should be 'unboxed', +that is, not contained inside an array.} + +\item{digits}{integer specifying the number of decimal places to round numerics. +Default is \code{NULL} - no rounding} + +\item{numeric_dates}{logical indicating if dates should be treated as numerics. +Defaults to TRUE for speed. If FALSE, the dates will be coerced to character in UTC time zone} + +\item{factors_as_string}{logical indicating if factors should be treated as strings. Defaults to TRUE.} + +\item{by}{either "row" or "column" indicating if data.frames and matrices should be processed +row-wise or column-wise. Defaults to "row"} +} \description{ Converts R objects to ndjson } +\details{ +Lists are converted to ndjson non-recursively. That is, each of the objects +in the list at the top level are converted to a new-line JSON object. Any nested +sub-elements are then contained within that JSON object. See examples +} +\examples{ + +to_ndjson( 1:5 ) +to_ndjson( letters ) + +mat <- matrix(1:6, ncol = 2) + +to_ndjson( x = mat ) +to_ndjson( x = mat, by = "col" ) + +df <- data.frame( + x = 1:5 + , y = letters[1:5] + , z = as.Date(seq(18262, 18262 + 4, by = 1 ), origin = "1970-01-01" ) + ) + +to_ndjson( x = df ) +to_ndjson( x = df, numeric_dates = FALSE ) +to_ndjson( x = df, factors_as_string = FALSE ) +to_ndjson( x = df, by = "column" ) + +## Lists are non-recurisve; only elements `x` and `y` are converted to ndjson +lst <- list( + x = 1:5 + , y = list( + a = letters[1:5] + , b = data.frame(i = 10:15, j = 20:25) + ) +) + +to_ndjson( x = lst ) +to_ndjson( x = lst, by = "column") + + +} diff --git a/tests/testthat/test-to_ndjson.R b/tests/testthat/test-to_ndjson.R new file mode 100644 index 0000000..b9e503f --- /dev/null +++ b/tests/testthat/test-to_ndjson.R @@ -0,0 +1,38 @@ +context("to_ndjson") + +test_that("objects converted to ndjson",{ + + ## vectors - as per json + expect_equal( unclass( to_ndjson( 1:5 ) ),"[1,2,3,4,5]\n") + expect_equal( unclass( to_ndjson( letters[1:5] ) ), '["a","b","c","d","e"]\n') + + mat <- matrix(1:6, ncol = 2) + + expect_equal( unclass( to_ndjson( x = mat ) ), '[1,4]\n[2,5]\n[3,6]\n') + expect_equal( unclass( to_ndjson( x = mat, by = "col" ) ), '[1,2,3]\n[4,5,6]\n' ) + + df <- data.frame( + x = 1:5 + , y = letters[1:5] + , z = as.Date(seq(18262, 18262 + 4, by = 1 ), origin = "1970-01-01" ) + ) + + expect_equal( unclass( to_ndjson( x = df ) ) , "{\"x\":1,\"y\":\"a\",\"z\":18262.0}\n{\"x\":2,\"y\":\"b\",\"z\":18263.0}\n{\"x\":3,\"y\":\"c\",\"z\":18264.0}\n{\"x\":4,\"y\":\"d\",\"z\":18265.0}\n{\"x\":5,\"y\":\"e\",\"z\":18266.0}\n" ) + expect_equal( unclass( to_ndjson( x = df, numeric_dates = FALSE ) ), "{\"x\":1,\"y\":\"a\",\"z\":\"2020-01-01\"}\n{\"x\":2,\"y\":\"b\",\"z\":\"2020-01-02\"}\n{\"x\":3,\"y\":\"c\",\"z\":\"2020-01-03\"}\n{\"x\":4,\"y\":\"d\",\"z\":\"2020-01-04\"}\n{\"x\":5,\"y\":\"e\",\"z\":\"2020-01-05\"}\n" ) + expect_equal( unclass( to_ndjson( x = df, factors_as_string = FALSE ) ), "{\"x\":1,\"y\":1,\"z\":18262.0}\n{\"x\":2,\"y\":2,\"z\":18263.0}\n{\"x\":3,\"y\":3,\"z\":18264.0}\n{\"x\":4,\"y\":4,\"z\":18265.0}\n{\"x\":5,\"y\":5,\"z\":18266.0}\n" ) + expect_equal( unclass( to_ndjson( x = df, by = "column" ) ), "{\"x\":[1,2,3,4,5]}\n{\"y\":[\"a\",\"b\",\"c\",\"d\",\"e\"]}\n{\"z\":[18262.0,18263.0,18264.0,18265.0,18266.0]}\n" ) + + ## Lists are non-recurisve; only elements `x` and `y` are converted to ndjson + lst <- list( + x = 1:5 + , y = list( + a = letters[1:5] + , b = data.frame(i = 10:15, j = 20:25) + ) + ) + + expect_equal( unclass( to_ndjson( x = lst ) ), "[1,2,3,4,5]\n{\"a\":[\"a\",\"b\",\"c\",\"d\",\"e\"],\"b\":[{\"i\":10,\"j\":20},{\"i\":11,\"j\":21},{\"i\":12,\"j\":22},{\"i\":13,\"j\":23},{\"i\":14,\"j\":24},{\"i\":15,\"j\":25}]}\n" ) + expect_equal( unclass( to_ndjson( x = lst, by = "column") ), "[1,2,3,4,5]\n{\"a\":[\"a\",\"b\",\"c\",\"d\",\"e\"],\"b\":{\"i\":[10,11,12,13,14,15],\"j\":[20,21,22,23,24,25]}}\n" ) + + +}) \ No newline at end of file