kkondo1981 · kkondo1981 · Jun 6, 2021 · Jun 6, 2021 · Jun 9, 2021 · Jun 9, 2021
diff --git a/NAMESPACE b/NAMESPACE
@@ -7,6 +7,7 @@ S3method(predict,AccurateGLM)
 S3method(print,AccurateGLM)
 S3method(residuals,AccurateGLM)
 export(aglm)
+export(aglm.fit)
 export(createEqualFreqBins)
 export(createEqualWidthBins)
 export(cv.aglm)

diff --git a/R/accurate-glm.R b/R/accurate-glm.R
@@ -14,6 +14,7 @@
 #' @slot fit.preval Same as in the result of \link{cv.glmnet}.
 #' @slot foldid Same as in the result of \link{cv.glmnet}.
 #' @slot call An object of class `call`, corresponding to the function call when this `AccurateGLM` object is created.
+#' @slot formula_info A list to store information related to the formula object.
 #'
 #' @author Kenji Kondo
 #'
@@ -32,7 +33,8 @@ setClass("AccurateGLM",
                                        lambda.1se="numeric",
                                        fit.preval="matrix",
                                        foldid="integer",
-                                       call="ANY"))
+                                       call="ANY",
+                                       formula_info="ANY"))
 
 
 #' Class for results of `cva.aglm()`

diff --git a/R/aglm.R b/R/aglm.R
@@ -100,22 +100,27 @@
 #' @importFrom assertthat assert_that
 #' @importFrom glmnet glmnet
 #' @importFrom methods new
-aglm <- function(x, y,
-                 qualitative_vars_UD_only=NULL,
-                 qualitative_vars_both=NULL,
-                 qualitative_vars_OD_only=NULL,
-                 quantitative_vars=NULL,
-                 use_LVar=FALSE,
-                 extrapolation="default",
-                 add_linear_columns=TRUE,
-                 add_OD_columns_of_qualitatives=TRUE,
-                 add_interaction_columns=FALSE,
-                 OD_type_of_quantitatives="C",
-                 nbin.max=NULL,
-                 bins_list=NULL,
-                 bins_names=NULL,
-                 family=c("gaussian","binomial","poisson"),
-                 ...) {
+#'
+#' @name aglm
+NULL
+
+
+aglm.base <- function(x, y,
+                      qualitative_vars_UD_only=NULL,
+                      qualitative_vars_both=NULL,
+                      qualitative_vars_OD_only=NULL,
+                      quantitative_vars=NULL,
+                      use_LVar=FALSE,
+                      extrapolation="default",
+                      add_linear_columns=TRUE,
+                      add_OD_columns_of_qualitatives=TRUE,
+                      add_interaction_columns=FALSE,
+                      OD_type_of_quantitatives="C",
+                      nbin.max=NULL,
+                      bins_list=NULL,
+                      bins_names=NULL,
+                      family=c("gaussian","binomial","poisson"),
+                      ...) {
   # Create an input object
   x <- newInput(x,
                 qualitative_vars_UD_only=qualitative_vars_UD_only,
@@ -159,3 +164,72 @@ aglm <- function(x, y,
 
   return(new("AccurateGLM", backend_models=list(glmnet=glmnet_result), vars_info=x@vars_info, call=match.call()))
 }
+
+
+#' @rdname aglm
+aglm <- function(formula,
+                 data,
+                 family=c("gaussian","binomial","poisson"),
+                 weights=NULL,
+                 offset=NULL,
+                 subset=NULL,
+                 na.action=getOption("na.action"),
+                 drop.unused.levels=FALSE,
+                 xlev=NULL,
+                 ...) {
+  cl <- match.call(expand.dots=FALSE)
+  m <- match(c("formula", "data", "weights", "offset", "subset", "na.action", "drop.unused.levels", "xlev"),
+             names(cl), 0)
+  cl <- cl[c(1, m)]
+  cl[[1]] <- quote(stats::model.frame)
+  mf <- eval.parent(cl)
+
+  # The intercept term is not necessary in the original design matrix.
+  tf <- attr(mf, "terms")
+  attr(tf, "intercept") <- 0
+
+  x <- model.matrix(tf, mf)
+  y <- model.response(mf)
+  weights <- model.weights(mf)
+  offset <- model.offset(mf)
+
+  model <- aglm.base(x=x,
+                     y=y,
+                     family=family,
+                     weights=weights,
+                     offset=offset,
+                     ...)
+  model@call[[1]] <- match.call()
+  model@formula_info <- list(terms=attr(mf, "terms"),
+                             xlev=xlev)
+
+  return(model)
+}
+
+
+#' @rdname aglm
+#' @export
+aglm.fit <- function(x, y,
+                     qualitative_vars_UD_only=NULL,
+                     qualitative_vars_both=NULL,
+                     qualitative_vars_OD_only=NULL,
+                     quantitative_vars=NULL,
+                     use_LVar=FALSE,
+                     extrapolation="default",
+                     add_linear_columns=TRUE,
+                     add_OD_columns_of_qualitatives=TRUE,
+                     add_interaction_columns=FALSE,
+                     OD_type_of_quantitatives="C",
+                     nbin.max=NULL,
+                     bins_list=NULL,
+                     bins_names=NULL,
+                     family=c("gaussian","binomial","poisson"),
+                     ...) {
+  cl <- match.call()
+  cl[[1]] <- quote(aglm.base)
+  model <- eval(cl)
+  model@call[[1]] <- match.call()
+  model@formula_info <- NULL
+
+  return(model)
+}
diff --git a/R/predict-aglm.R b/R/predict-aglm.R
@@ -47,11 +47,11 @@
 #' @importFrom methods new
 #' @importFrom stats predict
 predict.AccurateGLM <- function(object,
-                                newx=NULL,
+                                newdata=NULL,
                                 s=NULL,
                                 type=c("link","response","coefficients","nonzero","class"),
                                 exact=FALSE,
-                                newoffset,
+                                newoffset=NULL,
                                 ...) {
   # It's necessary to use same names for some arguments as the original methods,
   # because devtools::check() issues warnings when using inconsistent names.
@@ -62,6 +62,30 @@ predict.AccurateGLM <- function(object,
   # Check and set `type`
   type <- match.arg(type)
 
+  if (is.null(object@formula_info)) {
+    newx <- newdata
+  } else {
+    # Create the original design matrix from data and formula
+    formula_info <- object@formula_info
+    cl <- match.call(expand.dots=FALSE)
+    m <- match(c("subset", "na.action", "drop.unused.levels", "xlev"),
+               names(cl), 0)
+    cl <- cl[c(1, m)]
+    cl$formula <- delete.response(formula_info$terms)
+    cl$data <- newdata
+    cl$offset <- newoffset
+    cl$xlev <- formula_info$xlev
+    cl[[1]] <- quote(stats::model.frame)
+    mf <- eval.parent(cl)
+
+    # The intercept term is not necessary in the original design matrix.
+    tf <- attr(mf, "terms")
+    attr(tf, "intercept") <- 0
+
+    newx <- model.matrix(tf, mf)
+    newoffset <- model.offset(mf)
+  }
+
   # Create an input object
   if (class(newx)[1] != "data.frame") newx <- data.frame(newx)
   for (i in seq(dim(newx)[2])) {

diff --git a/examples/aglm-1.R b/examples/aglm-1.R
@@ -11,32 +11,29 @@ colnames(xy)[ncol(xy)] <- "y" # Let medv be the objective variable, y.
 ## Split data into train and test
 n <- nrow(xy) # Sample size.
 set.seed(2018) # For reproducibility.
-test.id <- sample(n, round(n/4)) # ID numbders for test data.
+test.id <- sample(n, round(n/4)) # ID numbers for test data.
 test <- xy[test.id,] # test is the data.frame for testing.
 train <- xy[-test.id,] # train is the data.frame for training.
-x <- train[-ncol(xy)]
-y <- train$y
-newx <- test[-ncol(xy)]
 y_true <- test$y
 
 ## Fit the model
-model <- aglm(x, y)  # alpha=1 (the default value)
+model <- aglm(y ~ ., data=train)  # alpha=1 (the default value)
 
 ## Predict for various alpha and lambda
 lambda <- 0.1
-y_pred <- predict(model, newx=newx, s=lambda)
+y_pred <- predict(model, newdata=test, s=lambda)
 rmse <- sqrt(mean((y_true - y_pred)^2))
 cat(sprintf("RMSE for lambda=%.2f: %.5f \n\n", lambda, rmse))
 
 lambda <- 1.0
-y_pred <- predict(model, newx=newx, s=lambda)
+y_pred <- predict(model, newdata=test, s=lambda)
 rmse <- sqrt(mean((y_true - y_pred)^2))
 cat(sprintf("RMSE for lambda=%.2f: %.5f \n\n", lambda, rmse))
 
 alpha <- 0
-model <- aglm(x, y, alpha=alpha)
+model <- aglm(y ~ ., data=train, alpha=alpha)
 
 lambda <- 0.1
-y_pred <- predict(model, newx=newx, s=lambda)
+y_pred <- predict(model, newdata=test, s=lambda)
 rmse <- sqrt(mean((y_true - y_pred)^2))
 cat(sprintf("RMSE for alpha=%.2f and lambda=%.2f: %.5f \n\n", alpha, lambda, rmse))
diff --git a/examples/aglm-fit-1.R b/examples/aglm-fit-1.R
@@ -0,0 +1,42 @@
+
+#################### Gaussian case ####################
+
+library(MASS) # For Boston
+library(aglm)
+
+## Read data
+xy <- Boston # xy is a data.frame to be processed.
+colnames(xy)[ncol(xy)] <- "y" # Let medv be the objective variable, y.
+
+## Split data into train and test
+n <- nrow(xy) # Sample size.
+set.seed(2018) # For reproducibility.
+test.id <- sample(n, round(n/4)) # ID numbers for test data.
+test <- xy[test.id,] # test is the data.frame for testing.
+train <- xy[-test.id,] # train is the data.frame for training.
+x <- train[-ncol(xy)]
+y <- train$y
+newx <- test[-ncol(xy)]
+y_true <- test$y
+
+## Fit the model
+model <- aglm.fit(x, y)  # alpha=1 (the default value)
+
+## Predict for various alpha and lambda
+lambda <- 0.1
+y_pred <- predict(model, newdata=newx, s=lambda)
+rmse <- sqrt(mean((y_true - y_pred)^2))
+cat(sprintf("RMSE for lambda=%.2f: %.5f \n\n", lambda, rmse))
+
+lambda <- 1.0
+y_pred <- predict(model, newdata=newx, s=lambda)
+rmse <- sqrt(mean((y_true - y_pred)^2))
+cat(sprintf("RMSE for lambda=%.2f: %.5f \n\n", lambda, rmse))
+
+alpha <- 0
+model <- aglm.fit(x, y, alpha=alpha)
+
+lambda <- 0.1
+y_pred <- predict(model, newdata=newx, s=lambda)
+rmse <- sqrt(mean((y_true - y_pred)^2))
+cat(sprintf("RMSE for alpha=%.2f and lambda=%.2f: %.5f \n\n", alpha, lambda, rmse))
diff --git a/man/AccurateGLM-class.Rd b/man/AccurateGLM-class.Rd
diff --git a/man/aglm.Rd b/man/aglm.Rd
diff --git a/man/predict.AccurateGLM.Rd b/man/predict.AccurateGLM.Rd