Finish Release-1

matrix-profile-foundation · Aug 14, 2018 · 2348ad8 · 2348ad8
2 parents 5618beb + 079f9ba
commit 2348ad8
Show file tree

Hide file tree

Showing 36 changed files with 498 additions and 109 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,5 +1,7 @@
 ^packrat/
 ^\.Rprofile$
+^\.travis.yml$
 ^.*\.Rproj$
 ^\.Rproj\.user$
 ^LICENSE\.md$
+^README\.Rmd$
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,4 @@
-packrat/lib*/
 .Rproj.user
 .RData
 .Rhistory
+packrat/lib*/
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,17 @@
+language: r
+cache:
+  directories:
+    - $TRAVIS_BUILD_DIR/packrat/src
+    - $TRAVIS_BUILD_DIR/packrat/lib
+  packages: true
+sudo: false
+install:
+  - R -e "0" --args --bootstrap-packrat
+  - R -e "packrat::restore(restart = FALSE)"
+r_build_args: --no-multiarch --with-keep.source
+r_check_args: --as-cran
+r_packages:
+  - covr
+
+after_success:
+  - Rscript -e 'library(covr); codecov()'
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: tsmp
 Type: Package
 Title: Time Series with Matrix Profile
-Version: 0.2.5
+Version: 0.2.10
 Authors@R: c(
     person("Francisco", "Bischoff", email = "[email protected]", role = c("aut", "cre")),
     person("Michael", "Yeh", email = "[email protected]", role = c("res", "ccp", "ctb"))

diff --git a/R/m_guide_search.R b/R/m_guide_search.R
@@ -8,7 +8,7 @@
 #' @param window.size an `int` with the size of the sliding window.
 #' @param matrix.profile multidimensional matrix profile (matrix)
 #' @param profile.index multidimensional profile index (from [mstomp()] or [mstomp.par()]).
-#' @param n.dim an `int`. The dimensionality of the MOTIF to find
+#' @param n.dim an `int`. The dimensionality of the MOTIF to find.
 #'
 #' @return Returns the `motif.idx` with the index of MOTIFs founded and `motif.dim`
 #' with the spanned dimensions of respective MOTIF.
@@ -23,9 +23,15 @@
 #' @references Website: <http://www.cs.ucr.edu/~eamonn/MatrixProfile.html>
 #'
 #' @examples
+#' # This is a fast toy example and results are useless. For a complete result, run the code inside
+#' #'Not run' section below.
+#' w <- toy_data$sub.len
+#' mp <- mstomp(toy_data$data[1:200,], w, verbose = 0)
+#' motifs <- guide.search(toy_data$data[1:200,], w, mp$mp, mp$pi, 2)
 #' \dontrun{
-#' mp <- mstomp.par(toy_data$data, 30)
-#' motifs <- guide.search(toy_data$data, 30, mp$mp, mp$pi, 2)
+#' w <- toy_data$sub.len
+#' mp <- mstomp.par(toy_data$data, w, verbose = 0)
+#' motifs <- guide.search(toy_data$data, w, mp$mp, mp$pi, 2)
 #' }
 
 guide.search <- function(data, window.size, matrix.profile, profile.index, n.dim) {

diff --git a/R/m_unconstrain_search.R b/R/m_unconstrain_search.R
@@ -23,9 +23,15 @@
 #' @references Website: <http://www.cs.ucr.edu/~eamonn/MatrixProfile.html>
 #'
 #' @examples
+#' # This is a fast toy example and results are useless. For a complete result, run the code inside
+#' #'Not run' section below.
+#' w <- toy_data$sub.len
+#' mp <- mstomp(toy_data$data[1:200,], w, verbose = 0)
+#' motifs <- unconstrain.search(toy_data$data[1:200,], w, mp$mp, mp$pi, 2)
 #' \dontrun{
-#' mp <- mstomp.par(toy_data$data, 30)
-#' motifs <- unconstrain.search(toy_data$data, 30, mp$mp, mp$pi, 4, 2)
+#' w <- toy_data$sub.len
+#' mp <- mstomp.par(toy_data$data, w)
+#' motifs <- unconstrain.search(toy_data$data, w, mp$mp, mp$pi, 4, 2)
 #' }
 #'
 

diff --git a/R/mass.R b/R/mass.R
@@ -20,7 +20,7 @@
 #' @references <https://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html>
 #'
 #' @examples
-#' w <- 30
+#' w <- toy_data$sub.len
 #' ref.data <- toy_data$data[,1]
 #' query.data <- toy_data$data[,1]
 #' d.size <- length(ref.data)

diff --git a/R/mass_pre.R b/R/mass_pre.R
@@ -15,7 +15,7 @@
 #' @references <https://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html>
 #'
 #' @examples
-#' w <- 30
+#' w <- toy_data$sub.len
 #' ref.data <- toy_data$data[,1]
 #' query.data <- toy_data$data[,1]
 #' d.size <- length(ref.data)

diff --git a/R/misc.R b/R/misc.R
@@ -7,7 +7,7 @@
 #' @export
 #'
 #' @examples
-#' data.sd <- fast.movsd(toy_data$data[,1], 30)
+#' data.sd <- fast.movsd(toy_data$data[,1], toy_data$sub.len)
 
 fast.movsd <- function(data, n) {
 
@@ -53,7 +53,7 @@ fast.movsd <- function(data, n) {
 #' @return Returns a vector with the moving average
 #' @export
 #' @examples
-#' data.avg <- fast.movavg(toy_data$data[,1], 30)
+#' data.avg <- fast.movavg(toy_data$data[,1], toy_data$sub.len)
 
 fast.movavg <- function(data, n) {
   data.mean <- stats::filter(data, rep(1 / n, n), sides = 2)

diff --git a/R/mstomp.R b/R/mstomp.R
@@ -6,12 +6,14 @@
 #' The MSTOMP computes the Matrix Profile and Profile Index for Multivariate Time Series that is meaningful for multidimensional MOTIF discovery. It uses the STOMP algorithm that is faster than STAMP but lacks its anytime property.
 #'
 #' Although this functions handles Multivariate Time Series, it can also be used to handle Univariate Time Series.
+#' `verbose` changes how much information is printed by this function; `0` means nothing, `1` means text, `2` means text and sound.
 #'
 #' @param data a `matrix` of `numeric`, where each colums is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
 #' @param window.size an `int` with the size of the sliding window.
 #' @param must.dim an `int` or `vector` of which dimensions to forcibly include (default is `NULL`).
 #' @param exc.dim an `int` or `vector` of which dimensions to exclude (default is `NULL`).
 #' @param exclusion.zone a `numeric` with size of the exclusion zone, based on query size (default is `1/2`).
+#' @param verbose an `int`. See details. (Default is `2`).
 #'
 #' @return Returns the matrix profile `mp` and profile index `pi`.
 #' It also returns the left and right matrix profile `lmp`, `rmp` and profile index `lpi`, `rpi` that may be used to detect Time Series Chains (Yan Zhu 2018).
@@ -26,16 +28,15 @@
 #'
 #' @examples
 #' # using all dimensions
-#' mp <- mstomp(toy_data$data[1:200,], 30)
+#' mp <- mstomp(toy_data$data[1:200,], 30, verbose = 0)
 #' \dontrun{
 #' # force using dimensions 1 and 2
 #' mp <- mstomp(toy_data$data[1:200,], 30, must.dim = c(1, 2))
 #' # exclude dimensions 2 and 3
 #' mp <- mstomp(toy_data$data[1:200,], 30, exc.dim = c(2, 3))
 #' }
 
-mstomp <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion.zone = 1 / 2) {
-
+mstomp <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion.zone = 1 / 2, verbose = 2) {
   eps <- .Machine$double.eps^0.5
 
   ## get various length
@@ -108,9 +109,13 @@ mstomp <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion
   data[is.na(data)] <- 0
   data[is.infinite(data)] <- 0
 
-  pb <- utils::txtProgressBar(min = 0, max = matrix.profile.size, style = 3, width = 80)
-  on.exit(close(pb))
-  on.exit(beepr::beep(), TRUE)
+  if (verbose > 0) {
+    pb <- utils::txtProgressBar(min = 0, max = matrix.profile.size, style = 3, width = 80)
+    on.exit(close(pb))
+  }
+  if (verbose > 1) {
+    on.exit(beepr::beep(), TRUE)
+  }
 
   ## initialization
   data.fft <- matrix(0, (window.size + data.size), n.dim)
@@ -141,7 +146,9 @@ mstomp <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion
 
   for (i in 1:matrix.profile.size) {
     # compute the distance profile
-    utils::setTxtProgressBar(pb, i)
+    if (verbose > 0) {
+      utils::setTxtProgressBar(pb, i)
+    }
 
     query <- as.matrix(data[i:(i + window.size - 1), ])
 
@@ -252,7 +259,9 @@ mstomp <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion
 
   tictac <- Sys.time() - tictac
 
-  message(sprintf("\nFinished in %.2f %s", tictac, units(tictac)))
+  if (verbose > 0) {
+    message(sprintf("\nFinished in %.2f %s", tictac, units(tictac)))
+  }
 
   return(list(
     rmp = right.matrix.profile, rpi = right.profile.index,

diff --git a/R/mstomp_par.R b/R/mstomp_par.R
@@ -6,11 +6,13 @@
 #' The MSTOMP computes the Matrix Profile and Profile Index for Multivariate Time Series that is meaningful for multidimensional MOTIF discovery. It uses the STOMP algorithm that is faster than STAMP but lacks its anytime property.
 #'
 #' Although this functions handles Multivariate Time Series, it can also be used to handle Univariate Time Series.
+#' `verbose` changes how much information is printed by this function; `0` means nothing, `1` means text, `2` means text and sound.
 #'
 #' @param data a `matrix` of `numeric`, where each colums is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
 #' @param window.size an `int`. Size of the sliding window.
 #' @param exclusion.zone an `int`. Size of the exclusion zone, based on query size (default is `1/2`).
 #' @param n.workers an `int`. Number of workers for parallel. (Default is `2`).
+#' @param verbose an `int`. See details. (Default is `2`).
 #'
 #' @return Returns the matrix profile `mp` and profile index `pi`.
 #' It also returns the left and right matrix profile `lmp`, `rmp` and profile index `lpi`, `rpi` that may be used to detect Time Series Chains (Yan Zhu 2018).
@@ -26,11 +28,10 @@
 #' @examples
 #' # using all dimensions
 #' Sys.sleep(1) # sometimes sleep is needed if you run parallel multiple times in a row
-#' mp <- mstomp.par(toy_data$data[1:100,], 30)
+#' mp <- mstomp.par(toy_data$data[1:100,], 30, verbose = 0)
 #' @import beepr doSNOW foreach parallel
 
-mstomp.par <- function(data, window.size, exclusion.zone = 1 / 2, n.workers = 2) {
-
+mstomp.par <- function(data, window.size, exclusion.zone = 1 / 2, n.workers = 2, verbose = 2) {
   eps <- .Machine$double.eps^0.5
 
   ## get various length
@@ -109,14 +110,23 @@ mstomp.par <- function(data, window.size, exclusion.zone = 1 / 2, n.workers = 2)
   cores <- min(max(2, n.workers), parallel::detectCores())
 
   # SNOW package
-  progress <- function(n) utils::setTxtProgressBar(pb, n)
+  if (verbose > 0) {
+    progress <- function(n) utils::setTxtProgressBar(pb, n)
+  }
+  else {
+    progress <- function(n) return(invisible(TRUE))
+  }
   opts <- list(progress = progress)
 
   cl <- parallel::makeCluster(cores)
   doSNOW::registerDoSNOW(cl)
   on.exit(parallel::stopCluster(cl))
-  on.exit(close(pb), TRUE)
-  on.exit(beepr::beep(), TRUE)
+  if (verbose > 0) {
+    on.exit(close(pb), TRUE)
+  }
+  if (verbose > 1) {
+    on.exit(beepr::beep(), TRUE)
+  }
 
   ## initialize variable
   per.work <- max(10, ceiling(matrix.profile.size / 100))
@@ -135,7 +145,9 @@ mstomp.par <- function(data, window.size, exclusion.zone = 1 / 2, n.workers = 2)
 
   tictac <- Sys.time()
 
-  pb <- utils::txtProgressBar(min = 0, max = n.work, style = 3, width = 80)
+  if (verbose > 0) {
+    pb <- utils::txtProgressBar(min = 0, max = n.work, style = 3, width = 80)
+  }
 
   i <- NULL # CRAN NOTE fix
   `%dopar%` <- foreach::`%dopar%` # CRAN NOTE fix
@@ -184,11 +196,11 @@ mstomp.par <- function(data, window.size, exclusion.zone = 1 / 2, n.workers = 2)
         last.product[1, ] <- first.product[idx, ]
 
         dist.pro <- 2 * (window.size - (last.product - window.size * data.mean * kronecker(matrix(1, matrix.profile.size, 1), t(data.mean[idx, ]))) /
-                           (data.sd * kronecker(matrix(1, matrix.profile.size, 1), t(data.sd[idx, ]))))
+          (data.sd * kronecker(matrix(1, matrix.profile.size, 1), t(data.sd[idx, ]))))
       }
 
       dist.pro <- Re(dist.pro)
-      #dist.pro <- max(dist.pro, 0)
+      # dist.pro <- max(dist.pro, 0)
       drop.value <- query[1, ]
 
       # apply exclusion zone
@@ -202,10 +214,12 @@ mstomp.par <- function(data, window.size, exclusion.zone = 1 / 2, n.workers = 2)
       dist.pro[skip.location, ] <- Inf
 
       # figure out and store the nearest neighbor
-      if (n.dim > 1)
-        dist.pro.sort <- t(apply(dist.pro, 1, sort)) # sort by row, left to right
-      else
+      if (n.dim > 1) {
+        dist.pro.sort <- t(apply(dist.pro, 1, sort))
+      } # sort by row, left to right
+      else {
         dist.pro.sort <- dist.pro
+      }
 
       dist.pro.cum <- rep(0, matrix.profile.size)
       dist.pro.merg <- rep(0, matrix.profile.size)
@@ -264,7 +278,9 @@ mstomp.par <- function(data, window.size, exclusion.zone = 1 / 2, n.workers = 2)
 
   tictac <- Sys.time() - tictac
 
-  message(sprintf("\nFinished in %.2f %s", tictac, units(tictac)))
+  if (verbose > 0) {
+    message(sprintf("\nFinished in %.2f %s", tictac, units(tictac)))
+  }
 
   return(list(
     rmp = right.matrix.profile, rpi = right.profile.index,

diff --git a/R/sdts_predict.R b/R/sdts_predict.R
@@ -10,9 +10,20 @@
 #' @family SDTS
 #'
 #' @examples
+#' # This is a fast toy example and results are useless. For a complete result, run the code inside
+#' #'Not run' section below.
+#' w <- c(110, 220)
+#' subs <- 11000:20000
+#' tr_data <- test_data$train$data[subs]
+#' tr_label <- test_data$train$label[subs]
+#' te_data <- test_data$test$data[subs]
+#' te_label <- test_data$test$label[subs]
+#' model <- sdts.train(tr_data, tr_label, w, verbose = 0)
+#' predict <- sdts.predict(model, te_data, round(mean(w)))
+#' sdts.f.score(te_label, predict, 1)
 #' \dontrun{
 #' windows <- c(110, 220, 330)
-#' model <- sdts.train(test_data$train$data, test_data$train$label, windows)
+#' model <- sdts.train(test_data$train$data, test_data$train$label, windows, verbose = 0)
 #' predict <- sdts.predict(model, test_data$test$data, round(mean(windows)))
 #' sdts.f.score(test_data$test$label, predict, 1)
 #' }
@@ -86,6 +97,17 @@ sdts.predict <- function(model, data, window.size) {
 #' @family SDTS
 #'
 #' @examples
+#' # This is a fast toy example and results are useless. For a complete result, run the code inside
+#' #'Not run' section below.
+#' w <- c(110, 220)
+#' subs <- 11000:20000
+#' tr_data <- test_data$train$data[subs]
+#' tr_label <- test_data$train$label[subs]
+#' te_data <- test_data$test$data[subs]
+#' te_label <- test_data$test$label[subs]
+#' model <- sdts.train(tr_data, tr_label, w, verbose = 0)
+#' predict <- sdts.predict(model, te_data, round(mean(w)))
+#' sdts.f.score(te_label, predict, 1)
 #' \dontrun{
 #' windows <- c(110, 220, 330)
 #' model <- sdts.train(test_data$train$data, test_data$train$label, windows)