From 6cde002f8cbd7f936467f6f46cd29f8a8ffbd227 Mon Sep 17 00:00:00 2001
From: ZorzArg <george.argyriou@odysseusinc.com>
Date: Fri, 8 Mar 2024 11:18:18 +0000
Subject: [PATCH] shiny app updates

---
 .gitignore                      |   2 +
 shiny/R/fn.R                    |   1 +
 shiny/R/loadData.R              | 104 +++++---
 shiny/app.R                     | 448 +++++++++++++++++++++++++++-----
 shiny/migration/dataMigration.R | 414 +++++++++++++++++++++++++----
 shiny/migration/helpers.R       |  71 ++---
 6 files changed, 854 insertions(+), 186 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2ecf99c..d9444b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # Project specific files and folders
 results/
+results_0603/
 results_old/
 logs/
 config.yml
@@ -17,6 +18,7 @@ hidden/
 results.zip
 scratchDiagnostics
 shiny/data/
+shiny/data_0603/
 shiny/data_old/
 bindResults.R
 bindResults_Public.R
diff --git a/shiny/R/fn.R b/shiny/R/fn.R
index a19487c..8d5572b 100644
--- a/shiny/R/fn.R
+++ b/shiny/R/fn.R
@@ -223,6 +223,7 @@ plotKM2 <- function(dat) {
   plot_colors <- unname(grafify::graf_palettes$kelly)
 
   p <- ggplot(dat, aes(x = time, y = estimate, color = outcomeCohortId)) +
+    #ggsurvfit::add_risktable(risktable_stats = c("n.risk", "cum.censor", "cum.event")) +   ## ADDED
     geom_step(linewidth = 1.5) +
     scale_color_manual(values = plot_colors) + #scale colors to kelly
     scale_y_continuous(labels = scales::percent_format()) + # convert y axis to percent
diff --git a/shiny/R/loadData.R b/shiny/R/loadData.R
index a793708..8b195e7 100644
--- a/shiny/R/loadData.R
+++ b/shiny/R/loadData.R
@@ -8,7 +8,7 @@ library(readr)
 source(here::here("shiny", "R", "fn.R"))
 source(here::here("shiny", "migration", "helpers.R"))
 
-dataPath <- here::here("shiny", "data")
+dataPath <- here::here("shiny", "data_0603")
 
 
 # About -----------------
@@ -50,7 +50,7 @@ ctsChar <- readr::read_csv(fs::path(dataPath, "baselineContinuous.csv"),
 conceptChar <- readr::read_csv(fs::path(dataPath, "baselineConcepts.csv"),
                                show_col_types = FALSE) %>%
   dplyr::rename(count = n) %>%
-  dplyr::select(-cohortDefinitionId) %>%
+  dplyr::select(-cohortDefinitionId, -timeWindow) %>%
   maskLowCount()
 
 ## Cohort baseline
@@ -63,17 +63,13 @@ cohortChar <- readr::read_csv(fs::path(dataPath, "baselineCohorts.csv"),
       TRUE ~ "Conditions"
     )
   ) %>%
-  dplyr::select(databaseId, timeWindow, cohortName, domain, covariateName, count, pct) %>%
-  dplyr::mutate(timeWindow = dplyr::case_when(
-    timeWindow == "[-365 : 0]" ~ "T(-365d to 0d)",
-    TRUE ~ timeWindow
-  )) %>%
+  dplyr::select(databaseId, cohortName, domain, covariateName, count, pct) %>%
   maskLowCount()
 
 ## Chapters baseline
 icdChar <- readr::read_csv(fs::path(dataPath, "baselineChapters.csv"),
                            show_col_types = FALSE) %>%
-  dplyr::select(-COHORT_ID) %>%
+  dplyr::select(-COHORT_ID, -timeWindow) %>%
   dplyr::rename(count = COUNTVALUE) %>%
   maskLowCount()
 
@@ -88,6 +84,7 @@ domainConceptChar <- sort(unique(conceptChar$domain))
 incTab <- readr::read_csv(fs::path(dataPath, "incidence.csv"),
                           show_col_types = FALSE) %>%
   dplyr::select(-OUTCOME_COHORT_DEFINITION_ID)  %>%
+  dplyr::mutate(PERSON_YEARS = as.integer(PERSON_DAYS/365.25), .before =6) %>%
   maskLowCountInci()
 
 ### Incidence Pickers
@@ -99,15 +96,6 @@ yearInci <- c("All", as.character(2000:2022))
 postIndex <- readr::read_csv(fs::path(dataPath, "postIndexPrevalence.csv"),
                              show_col_types = FALSE) %>%
   dplyr::select(-cohortId, -covariateId) %>%
-  dplyr::mutate(timeWindow = dplyr::case_when(
-    timeWindow == "1d - 365d" ~ "T(1d to 365d)",
-    timeWindow == "1d - 183d" ~ "T(1d to 183d)",
-    timeWindow == "184d - 365d" ~ "T(184d to 365d)",
-    timeWindow == "366d - 730d" ~ "T(366d to 730d)",
-    timeWindow == "731d - 1825d" ~ "T(731d to 1825d)",
-    timeWindow == "1d - 9999d" ~ "T(1d to 9999d)",
-    TRUE ~ timeWindow
-  )) %>%
   maskLowCount()
 
 
@@ -130,14 +118,19 @@ drugPi <- postIndex %>%
   # dplyr::mutate(
   #   timeWindow = factor(timeWindow, levels = c("1d - 183d", "184d - 365d", "1d - 365d", "366d - 730d", "731d - 1825d"))
   # ) %>%
-  dplyr::select(databaseId, timeWindow, cohortName, covariateName, count, pct) %>%
+  dplyr::select(databaseId, timeWindow, cohortName, covariateName, count, pct, cat) %>%
   dplyr::arrange(databaseId, timeWindow) %>%
-  dplyr::mutate(timeWindow = as.character(timeWindow))
+  dplyr::mutate(timeWindow = as.character(timeWindow),
+                cat = dplyr::case_when(
+                  cat == "within" ~ "Within time window",
+                  cat == "followUp" ~ "Complete follow-up"
+                  )
+                )
 
 ### Pickers
 drugCohorts <- unique(drugPi$covariateName)
 drugTimeWindow <- unique(drugPi$timeWindow)
-
+drugCategory <- unique(drugPi$cat)
 
 ## Procedures -----------------
 
@@ -154,29 +147,52 @@ procTimeWindow <- unique(procPi$timeWindow)
 # 5. Treatment Patterns -----------------
 
 ## Load treatment patterns table
-txPatDat <- readr::read_csv(fs::path(dataPath, "treatmentPatterns.csv"),
+txPatDat <- readr::read_csv(fs::path(dataPath, "treatmentPatternsAll.csv"),
                             show_col_types = FALSE)
 
-
-txPatDat2 <- readr::read_csv(fs::path(dataPath, "treatmentPatterns2.csv"),
+txPatDat6m <- readr::read_csv(fs::path(dataPath, "treatmentPatterns6m.csv"),
                              show_col_types = FALSE)
 
+txPatDat1y <- readr::read_csv(fs::path(dataPath, "treatmentPatterns1y.csv"),
+                              show_col_types = FALSE)
+
+txPatDat2y <- readr::read_csv(fs::path(dataPath, "treatmentPatterns2y.csv"),
+                              show_col_types = FALSE)
+
+txPatDatNsaids <- readr::read_csv(fs::path(dataPath, "treatmentPatternsAllNsaids.csv"),
+                            show_col_types = FALSE)
+
+txPatDat6mNsaids <- readr::read_csv(fs::path(dataPath, "treatmentPatterns6mNsaids.csv"),
+                              show_col_types = FALSE)
+
+txPatDat1yNsaids <- readr::read_csv(fs::path(dataPath, "treatmentPatterns1yNsaids.csv"),
+                              show_col_types = FALSE)
+
+txPatDat2yNsaids <- readr::read_csv(fs::path(dataPath, "treatmentPatterns2yNsaids.csv"),
+                              show_col_types = FALSE)
+
 txPatDatAll <- dplyr::bind_rows(
   txPatDat,
-  txPatDat2
+  txPatDat6m,
+  txPatDat1y,
+  txPatDat2y,
+  txPatDatNsaids,
+  txPatDat6mNsaids,
+  txPatDat1yNsaids,
+  txPatDat2yNsaids
 )
 
 ### Sankey pickers
-cohortName2 <- c(
-  cohortName,
-  "hmb2", "hmb2_age_lt_30", "hmb2_age_30_45", "hmb2_age_45_55"
-)
+cohortName2 <- c(cohortName)
 
 sankeyCohorts <- tibble::tibble(
-  id = c(1L, 1001L, 1002L, 1003L, 44L, 44001L, 44002L, 44003L),
+  id = c(1L, 1001L, 1002L, 1003L),
   name = cohortName2
 )
 
+txTime <- unique(txPatDatAll$time)
+txType <- unique(txPatDatAll$type)
+
 
 # 6. Time to Event -----------------
 
@@ -192,13 +208,23 @@ ttd <- arrow::read_parquet(file = fs::path(here::here(dataPath ,"ttd.parquet")))
     TRUE ~ `Cohort Name`)
   )
 
+ttd2 <- arrow::read_parquet(file = fs::path(here::here(dataPath ,"ttd2.parquet"))) %>%
+  dplyr::mutate(targetId = as.double(targetId)) %>%
+  dplyr::left_join(strataCounts, by = c("targetId" = "Strata Cohort Id", "database" = "Database")) %>%
+  dplyr::rename(`Cohort Name` = `Strata Cohort Name`) %>%
+  dplyr::select(database:`Cohort Name`) %>%
+  dplyr::mutate(`Cohort Name` = dplyr::case_when(
+    targetId == 1 ~ "hmb",
+    TRUE ~ `Cohort Name`)
+  )
+
 ### Relabel strata
-ttd <- relabelStrata(
-  ttd,
-  oldLabels = as.character(c(27:29, 31:35)),
-  newLabels = c("oc", "danazol", "grha", "lglIUD",
-                "nsaids", "progestin", "tranexamicAcid", "ulipristalAcetate")
-)
+# ttd <- relabelStrata(
+#   ttd,
+#   oldLabels = as.character(c(27:29, 31:35)),
+#   newLabels = c("oc", "danazol", "grha", "lglIUD",
+#                 "nsaids", "progestin", "tranexamicAcid", "ulipristalAcetate")
+# )
 
 ### TTE cohort pickers
 ttdCohorts <- unique(ttd$`Cohort Name`)
@@ -209,6 +235,9 @@ tteCohorts <- tibble::tibble(
   name = cohortName
 )
 
+## TTD Line pickers
+ttdLine <- unique(ttd$line)
+
 
 ## Time to Intervention -----------------
 
@@ -222,15 +251,14 @@ tti <- arrow::read_parquet(file = fs::path(here::here(dataPath ,"tti.parquet")))
     TRUE ~ `Cohort Name`)
     )
 
-
-
 ### Relabel strata
 tti <- relabelOutcome(
   tti,
-  oldLabels = as.character(36:43),
+  oldLabels = as.character(38:45),
   newLabels = c("bloodTransfusion", "copperIUD", "endometrialAblation", "hormonalIUD",
                 "hysterectomy", "myomectomy", "uae", "undefinedIUD")
 )
 
 ### TTI cohort pickers
 ttiCohorts <- unique(tti$`Cohort Name`)
+
diff --git a/shiny/app.R b/shiny/app.R
index b272bbc..4795f75 100644
--- a/shiny/app.R
+++ b/shiny/app.R
@@ -11,6 +11,7 @@ library(reactable)
 library(ggplot2)
 library(grafify)
 library(markdown)
+library(ggsurvfit)
 options(shiny.fullstacktrace = FALSE)
 
 
@@ -19,15 +20,16 @@ title <- "EHDEN HMB"
 
 underlyingDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'."
 drugUtilizationDescription <- "Counts equal to 5 and below have been masked and replaced with '<5."
-clinicalCharacteristicsDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'."
+drugUtilizationDescription2 <-  "Category 'Within time window' means that a drug exposure started and ended within the specified time window."
+drugUtilizationDescription3 <-  "Category 'Complete follow-up' means that a drug exposure spanned throughout the specified time window."
+clinicalCharacteristicsDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'. Characteristics are assessed within a time window of 365 to 0 days prior to index date"
 
-incidenceDescription <- "Incidence rate is calculated by 'Outcome Count'/'Person Days' * 100."
+incidenceDescription <- "Incidence rate is defined as Outcome Count/Person Years * 1000."
 
-treatmentPatternsDescription <- "Treatment Patterns counts (Sequences) are restricted to 30. HMB cohort is the population whose index event is HMB and are censored by hysterectomy. Thus we consider only drugs.
-HMB2 cohort are those whose index event is HMB where there is no censoring for hysterectomy. Therefore we consider both drugs and procedures."
+treatmentPatternsDescription <- "Treatment Patterns counts (Sequences) are restricted to 30. HMB cohort is the population whose index event is HMB and are censored by hysterectomy. Thus we consider only drugs."
 
 procedureAnalysisDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'.
-In the Time to Intervention tab we consider the time for only those who experienced the event, not the entire cohort population."
+In the 'Time to Intervention' tab we consider the time for only those who experienced the event, not the entire cohort population."
 
 dashboardVersion <- "0.3.0"
 dashboardDate <- Sys.Date()
@@ -79,6 +81,7 @@ body <- dashboardBody(
     ### About Tab------
     tabItem(
       tabName = "about",
+
       #### Study description
       fluidRow(
         box(
@@ -88,6 +91,7 @@ body <- dashboardBody(
           includeMarkdown("StudyDescription.md")
         )
       ),
+
       #### Study Information
       fluidRow(
         box(
@@ -99,6 +103,7 @@ body <- dashboardBody(
           "SAP Link:",a(href= "https://odyosg.github.io/ehden_hmb/sap.html", "https://odyosg.github.io/ehden_hmb/sap.html")
         )
       ),
+
       #### Database Information
       fluidRow(
         box(
@@ -125,9 +130,10 @@ body <- dashboardBody(
 
       fluidRow(
         tabBox(
-          #title = "Cohort Counts",
           id = "cohortCounts",
           width = 12,
+
+          #### Cohort Counts
           tabPanel("Cohort Counts",
                    fluidRow(
                      box(
@@ -151,6 +157,8 @@ body <- dashboardBody(
                      )
                    )
           ),
+
+          #### Strata Counts
           tabPanel("Strata Counts",
                    fluidRow(
                      box(
@@ -199,6 +207,7 @@ body <- dashboardBody(
                      box(
                        status = "success",
                        column(width = 6,
+
                               pickerInput(
                                 inputId = "databaseNameDemo",
                                 label = "Database Name",
@@ -233,6 +242,7 @@ body <- dashboardBody(
                      box(
                        status = "success",
                        column(width = 6,
+
                               pickerInput(
                                 inputId = "databaseNameCts",
                                 label = "Database Name",
@@ -267,6 +277,7 @@ body <- dashboardBody(
                      box(
                        status = "success",
                        column(width = 6,
+
                               pickerInput(
                                 inputId = "databaseNameConcept",
                                 label = "Database Name",
@@ -286,6 +297,7 @@ body <- dashboardBody(
                               ),
                        ),
                        column(width = 6,
+
                               pickerInput(
                                 inputId = "domainConcept",
                                 label = "Domain",
@@ -310,6 +322,7 @@ body <- dashboardBody(
                      box(
                        status = "success",
                        column(width = 6,
+
                               pickerInput(
                                 inputId = "databaseNameCohortCov",
                                 label = "Database Name",
@@ -329,6 +342,7 @@ body <- dashboardBody(
                               )
                        ),
                        column(width = 6,
+
                               pickerInput(
                                 inputId = "domainCohortCov",
                                 label = "Domain",
@@ -354,6 +368,7 @@ body <- dashboardBody(
                      box(
                        status = "success",
                        column(width = 6,
+
                               pickerInput(
                                 inputId = "databaseNameIcd",
                                 label = "Database Name",
@@ -407,14 +422,12 @@ body <- dashboardBody(
           id = "baselineChar",
           width = 12,
 
-          tabPanel(
-            "Table",
+          tabPanel("Table",
             fluidRow(
               box(
                 status = "success",
                 column(width = 6,
 
-                       # pick database
                        pickerInput(
                          inputId = "databaseNameInci",
                          label = "Database Name",
@@ -424,7 +437,6 @@ body <- dashboardBody(
                          multiple = TRUE
                        ),
 
-                       # pick cohortName
                        pickerInput(
                          inputId = "cohortNameInci",
                          label = "Cohort Name",
@@ -438,7 +450,6 @@ body <- dashboardBody(
                 ),
                 column(width = 6,
 
-                       # pick year
                        pickerInput(
                          inputId = "yearInci",
                          label = "Year",
@@ -462,10 +473,7 @@ body <- dashboardBody(
 
                    fluidRow(
                      column(width = 6,
-
-                            box(
-
-                              status = "success",
+                            box(status = "success",
 
                               pickerInput(
                                 inputId = "databaseNameYrInci",
@@ -489,7 +497,6 @@ body <- dashboardBody(
                    ),
 
                    fluidRow(
-
                      box(
                        width = 12,
                        plotOutput("inciYearPlot")
@@ -518,7 +525,7 @@ body <- dashboardBody(
         box(
           status = "success",
           column(width = 6,
-                 # pick database
+
                  pickerInput(
                    inputId = "databaseNameCondPi",
                    label = "Database Name",
@@ -527,7 +534,7 @@ body <- dashboardBody(
                    options = shinyWidgets::pickerOptions(actionsBox = TRUE),
                    multiple = TRUE
                  ),
-                 # pick cohort
+
                  pickerInput(
                    inputId = "cohortNameCondPi",
                    label = "Cohort Name",
@@ -539,7 +546,7 @@ body <- dashboardBody(
 
           ),
           column(width = 6,
-                 # pick outcome
+
                  pickerInput(
                    inputId = "conditionNameCondPi",
                    label = "Condition Name",
@@ -549,7 +556,7 @@ body <- dashboardBody(
                    multiple = TRUE
                  ),
 
-                 # pick year
+
                  pickerInput(
                    inputId = "timeWindowCondPi",
                    label = "Time Window",
@@ -582,6 +589,8 @@ body <- dashboardBody(
           width = 12,
           background = "light-blue",
           textOutput("treatmentPatternsDescription"),
+          textOutput("drugUtilizationDescription2"),
+          textOutput("drugUtilizationDescription3"),
           textOutput("drugUtilizationDescription")
         )
       ),
@@ -595,7 +604,8 @@ body <- dashboardBody(
                    fluidRow(
                      box(
                        status = "success",
-                       column(width = 6,
+                       column(width = 4,
+
                               pickerInput(
                                 inputId = "databaseNameDuPi",
                                 label = "Database Name",
@@ -604,6 +614,7 @@ body <- dashboardBody(
                                 options = shinyWidgets::pickerOptions(actionsBox = TRUE),
                                 multiple = TRUE
                               ),
+
                               pickerInput(
                                 inputId = "cohortNameDuPi",
                                 label = "Cohort Name",
@@ -613,7 +624,8 @@ body <- dashboardBody(
                                 multiple = TRUE
                               )
                        ),
-                       column(width = 6,
+                       column(width = 4,
+
                               pickerInput(
                                 inputId = "drugNameDuPi",
                                 label = "Drug Name",
@@ -622,6 +634,7 @@ body <- dashboardBody(
                                 options = shinyWidgets::pickerOptions(actionsBox = TRUE),
                                 multiple = TRUE
                               ),
+
                               pickerInput(
                                 inputId = "timeWindowDuPi",
                                 label = "Time Window",
@@ -630,6 +643,17 @@ body <- dashboardBody(
                                 options = shinyWidgets::pickerOptions(actionsBox = TRUE),
                                 multiple = TRUE
                               )
+                       ),
+                       column(width = 4,
+
+                              pickerInput(
+                                inputId = "drugCat",
+                                label = "Category",
+                                choices = drugCategory,
+                                selected = drugCategory,
+                                options = shinyWidgets::pickerOptions(actionsBox = TRUE),
+                                multiple = TRUE
+                              )
                        )
                      ),
                      fluidRow(
@@ -646,18 +670,36 @@ body <- dashboardBody(
           tabPanel("Sequences",
                    fluidRow(
                      box(
-                       status = "success",
-                       width = 4,
+                      status = "success",
+                      width = 4,
+                      column(width = 6,
+
                        pickerInput(
                          inputId = "databaseNameSankey",
                          label = "Database Name",
                          choices = databaseName
                        ),
+
                        pickerInput(
                          inputId = "cohortNameSankey",
                          label = "Cohort Name",
                          choices = cohortName2
+                       ),
+                      ),
+                      column(width = 6,
+
+                       pickerInput(
+                         inputId = "txTime",
+                         label = "Time from index date",
+                         choices = txTime
+                       ),
+
+                       pickerInput(
+                         inputId = "txType",
+                         label = "Type",
+                         choices = txType
                        )
+                      )
                      )
                    ),
                    fluidRow(
@@ -673,40 +715,102 @@ body <- dashboardBody(
                        reactableOutput("txPatDat"))
                    )
           ),
-          ### TTD Panel
-          tabPanel("Time to Discontinuation",
+
+          ### TTD Panel (without)
+          tabPanel("Time to Discontinuation (without NSAIDs)",
+                   fluidRow(
+                     box(
+                      status = "success",
+                      column(width = 6,
+                       height = "230px",
+                       background = "light-blue",
+
+                       pickerInput(
+                         inputId = "databaseNameTtd",
+                         label = "Database Name",
+                         choices = databaseName
+                       ),
+
+                       pickerInput(
+                         inputId = "cohortNameTtd",
+                         label = "Cohort Name",
+                         choices = ttdCohorts
+                       ),
+                      ),
+                      column(width = 6,
+
+                       pickerInput(
+                         inputId = "strataTtd",
+                         label = "Drugs",
+                         choices = c("Single", "All")
+                       )
+                      )
+                     )
+                   ),
                    fluidRow(
-                     column(width = 9,
-                            status = "success",
-                            plotOutput("ttdKmPlot")
+                     column(width = 12,
+                       box(
+                        width = NULL,
+                        plotOutput(height = "1000px",
+                                   "ttdKmPlot")
+                      )
                      ),
-                     column(width = 3,
-                            box(
-                              width = 9,
+
+                   ),
+                   fluidRow(
+                     box(
+                       width = 12,
+                       reactableOutput("ttdSurvTab")
+                     )
+                   )
+          ),
+
+          ### TTD Panel (with)
+          tabPanel("Time to Discontinuation (with NSAIDs)",
+                   fluidRow(
+                     box(
+                       status = "success",
+                       column(width = 6,
                               height = "230px",
                               background = "light-blue",
+
                               pickerInput(
-                                inputId = "databaseNameTtd",
+                                inputId = "databaseNameTtd2",
                                 label = "Database Name",
                                 choices = databaseName
                               ),
+
                               pickerInput(
-                                inputId = "cohortNameTtd",
+                                inputId = "cohortNameTtd2",
                                 label = "Cohort Name",
                                 choices = ttdCohorts
                               ),
+                       ),
+                       column(width = 6,
+
                               pickerInput(
-                                inputId = "strataTtd",
+                                inputId = "strataTtd2",
                                 label = "Drugs",
                                 choices = c("Single", "All")
                               )
+                       )
+                     )
+                   ),
+                   fluidRow(
+                     column(width = 12,
+                            box(
+                              width = NULL,
+                              #height = "1000px",
+                              plotOutput(height = "1000px",
+                                         "ttdKmPlot2")
                             )
                      ),
+
                    ),
                    fluidRow(
                      box(
                        width = 12,
-                       reactableOutput("ttdSurvTab")
+                       reactableOutput("ttdSurvTab2")
                      )
                    )
           )
@@ -737,6 +841,7 @@ body <- dashboardBody(
                      box(
                        status = "success",
                        column(width = 6,
+
                               pickerInput(
                                 inputId = "databaseNameProcPi",
                                 label = "Database Name",
@@ -745,6 +850,7 @@ body <- dashboardBody(
                                 options = shinyWidgets::pickerOptions(actionsBox = TRUE),
                                 multiple = TRUE
                               ),
+
                               pickerInput(
                                 inputId = "cohortNameProcPi",
                                 label = "Cohort Name",
@@ -755,6 +861,7 @@ body <- dashboardBody(
                               )
                        ),
                        column(width = 6,
+
                               pickerInput(
                                 inputId = "procNameProcPi",
                                 label = "Procedure Name",
@@ -763,6 +870,7 @@ body <- dashboardBody(
                                 options = shinyWidgets::pickerOptions(actionsBox = TRUE),
                                 multiple = TRUE
                               ),
+
                               pickerInput(
                                 inputId = "timeWindowProcPi",
                                 label = "Time Window",
@@ -782,36 +890,46 @@ body <- dashboardBody(
                      )
                    )
           ),
+
+          ## Time to intervention
           tabPanel("Time to Intervention",
                    fluidRow(
-                     column(width = 9,
-                            status = "success",
-                            plotOutput("ttiKmPlot")
-                     ),
-                     column(width = 3,
-                            box(
-                              width = 9,
+                     box(
+                     status = "success",
+                     column(width = 6,
                               height = "180px",
                               background = "light-blue",
+
                               pickerInput(
                                 inputId = "databaseNameTti",
                                 label = "Database Name",
                                 choices = databaseName
                               ),
+
                               pickerInput(
                                 inputId = "cohortNameTti",
                                 label = "Cohort Name",
                                 choices = ttiCohorts
                               )
-                            )
-                     ),
+                      ),
+                    ),
                    ),
                    fluidRow(
-                     box(
-                       width = 12,
-                       reactableOutput("ttiSurvTab")
+                     column(width = 12,
+                            box(
+                             width = NULL,
+                             plotOutput(height = "1000px",
+                                          "ttiKmPlot")
+
                      )
                    )
+                  ),
+                  fluidRow(
+                    box(
+                       width = 12,
+                       reactableOutput("ttiSurvTab")
+                    )
+                )
           )
         )
       )
@@ -839,12 +957,6 @@ server <- function(input, output, session){
     description
   })
 
-  ## Database Information
-  # output$databaseInformation <- renderReactable(
-  #   databaseMeta %>% reactable()
-  # )
-
-
   # Cohorts ----------------
 
   ## Cohort Counts
@@ -949,6 +1061,7 @@ server <- function(input, output, session){
         columns = list(
           databaseId = colDef(name = "Database Name"),
           domain = colDef(name = "Domain"),
+          #timeWindow = colDef(name = "Time Window"),
           cohortName = colDef(name = "Cohort Name"),
           conceptId = colDef(name = "Concept Id"),
           name = colDef(name = "Concept Name"),
@@ -974,7 +1087,7 @@ server <- function(input, output, session){
       reactable(
         columns = list(
           databaseId = colDef(name = "Database Name"),
-          timeWindow = colDef(name = "Time Window"),
+          #timeWindow = colDef(name = "Time Window"),
           cohortName = colDef(name = "Cohort Name"),
           domain = colDef(name = "Domain"),
           covariateName = colDef(name = "Covariate Name"),
@@ -999,6 +1112,7 @@ server <- function(input, output, session){
       reactable(
         columns = list(
           databaseId = colDef(name = "Database Name"),
+          #timeWindow = colDef(name = "Time Window"),
           cohortName = colDef(name = "Cohort Name"),
           CATEGORY_CODE = colDef(name = "Concept Id"),
           categoryName = colDef(name = "Concept Name"),
@@ -1028,7 +1142,7 @@ server <- function(input, output, session){
     incTab %>%
       dplyr::filter(databaseId %in% input$databaseNameInci,
                     START_YEAR %in% input$yearInci,
-                    OUTCOME_NAME %in% snakecase::to_snake_case(input$cohortNameInci)) %>%
+                    OUTCOME_NAME %in% input$cohortNameInci) %>%
       reactable(
         columns = list(
           databaseId = colDef(name = "Database Name"),
@@ -1036,6 +1150,7 @@ server <- function(input, output, session){
           OUTCOME_NAME = colDef(name = "Cohort Name"),
           PERSONS_AT_RISK = colDef(name = "Persons at Risk", format = colFormat(separators = TRUE)),
           PERSON_DAYS = colDef(name = "Person Days", format = colFormat(separators = TRUE)),
+          PERSON_YEARS = colDef(name = "Person Years", format = colFormat(separators = TRUE)),
           OUTCOMES = colDef(name = "Outcome Count", format = colFormat(separators = TRUE)),
           INCIDENCE_PROPORTION_P100P = colDef(name = "Incidence Proportion (per 100p)", format = colFormat(digits = 2)),
           INCIDENCE_RATE_P1000PY = colDef(name = "Incidence Rate (per 1000yrs)", format = colFormat(digits = 2))
@@ -1055,7 +1170,8 @@ server <- function(input, output, session){
       dplyr::filter(
         START_YEAR != "All",
         databaseId %in% input$databaseNameYrInci,
-        OUTCOME_NAME %in% snakecase::to_snake_case(input$cohortNameYrInci)
+        OUTCOME_NAME %in% input$cohortNameYrInci
+        #OUTCOME_NAME %in% snakecase::to_snake_case(input$cohortNameYrInci)
       )
   })
 
@@ -1109,11 +1225,20 @@ server <- function(input, output, session){
     drugUtilizationDescription
   })
 
+  output$drugUtilizationDescription2 <- renderText({
+    drugUtilizationDescription2
+  })
+
+  output$drugUtilizationDescription3 <- renderText({
+    drugUtilizationDescription3
+  })
+
 
   ## Utilization
   duPiRe <- reactive({
     drugPi %>%
       dplyr::filter(
+        cat %in% input$drugCat,
         databaseId %in% input$databaseNameDuPi,
         cohortName %in% input$cohortNameDuPi,
         covariateName %in% input$drugNameDuPi,
@@ -1126,6 +1251,7 @@ server <- function(input, output, session){
       columns = list(
         databaseId = colDef(name = "Database Name"),
         timeWindow = colDef(name = "Time Window"),
+        cat = colDef(name = "Category"),
         cohortName = colDef(name = "Cohort Name"),
         covariateName = colDef(name = "Drug Cohort Name"),
         count = colDef(name = "Person Count", format = colFormat(separators = TRUE)),
@@ -1154,6 +1280,8 @@ server <- function(input, output, session){
     txPatDatAll %>%
       dplyr::filter(
         databaseId == input$databaseNameSankey,
+        time == input$txTime,
+        type == input$txType,
         cohortId == sankeyPick()) %>%
       dplyr::slice(1:20) %>%
       buildSankeyData() %>%
@@ -1165,6 +1293,8 @@ server <- function(input, output, session){
     txPatDatAll %>%
       dplyr::filter(
         databaseId == input$databaseNameSankey,
+        time == input$txTime,
+        type == input$txType,
         cohortId == sankeyPick()
       ) %>%
       dplyr::arrange(desc(n)) %>%
@@ -1214,7 +1344,7 @@ server <- function(input, output, session){
       dplyr::pull(name)
   })
 
-  # subset to those with strata
+  # subset to those with strata (without)
   ttdSubset <- reactive({
 
     dt1 <- ttd %>%
@@ -1222,6 +1352,7 @@ server <- function(input, output, session){
         database == input$databaseNameTtd,
         `Cohort Name` == input$cohortNameTtd
       )
+
     if (input$strataTtd == "Single") {
       dt1 <- dt1 %>%
         dplyr::filter(
@@ -1233,12 +1364,131 @@ server <- function(input, output, session){
 
   })
 
-  # make km plot
-  output$ttdKmPlot <- renderPlot({
-    plotKM(ttdSubset())
+  # subset to those with strata (with)
+  ttdSubset2 <- reactive({
+
+    dt1 <- ttd2 %>%
+      dplyr::filter(
+        database == input$databaseNameTtd2,
+        `Cohort Name` == input$cohortNameTtd2
+      )
+
+    if (input$strataTtd2 == "Single") {
+      dt1 <- dt1 %>%
+        dplyr::filter(
+          !grepl("\\+", strata)
+        )
+    }
+
+    dt1
+
   })
 
-  ## get survProb table
+
+  ## Find rds file (without)
+  ttdKMRds <- reactive ({
+
+    if (input$cohortNameTtd == "hmb") {
+      lineKM <- "1"
+    } else if (input$cohortNameTtd == "hmb age_lt_30"){
+      lineKM <- "1001"
+    } else if (input$cohortNameTtd == "hmb age_30_45") {
+      lineKM <- "1002"
+    } else if (input$cohortNameTtd == "hmb age_45_55") {
+      lineKM <- "1003"
+    }
+
+    ttd_km_name <- here::here(glue::glue('shiny/data_0603/ttd/wo/tte_{input$databaseNameTtd}_{lineKM}.rds'))
+
+    ttd_km <- readr::read_rds(ttd_km_name)
+
+    ttd_km_final <- ttd_km[[glue::glue("{input$strataTtd}")]]
+
+    return(ttd_km_final)
+  })
+
+
+  output$ttdKmPlot <- renderPlot(
+    res = 80,
+    {
+
+    tte <- ttdKMRds()
+
+    ## Number of colors for lines
+    colors <- colorspace::rainbow_hcl(unique(length(tte$strata)))
+
+    tte |>
+      ggsurvfit(size = 1) +
+      scale_ggsurvfit(x_scales=list(breaks=c(0.5, 0:3))) +  # Breaks at 6m and 1-3 years
+      scale_color_manual(values = colors) +
+      scale_fill_manual(values = colors) +
+      add_risktable(risktable_stats = "{n.risk} ({cum.censor})",
+                    risktable_height = 0.4,
+                    size = 4, # increase font size of risk table statistics
+                    theme =   # increase font size of risk table title and y-axis label
+                      list(
+                        theme_risktable_default(axis.text.y.size = 11,
+                                                plot.title.size = 11),
+                        theme(plot.title = element_text(face = "bold"))
+                      )) +
+      #add_risktable_strata_symbol(symbol = "\U25CF", size = 18) +
+      labs(x = "Follow-up time, years")
+
+  })
+
+  ## Find rds file (with)
+  ttdKMRds2 <- reactive ({
+
+    if (input$cohortNameTtd2 == "hmb") {
+      lineKM <- "1"
+    } else if (input$cohortNameTtd2 == "hmb age_lt_30"){
+      lineKM <- "1001"
+    } else if (input$cohortNameTtd2 == "hmb age_30_45") {
+      lineKM <- "1002"
+    } else if (input$cohortNameTtd2 == "hmb age_45_55") {
+      lineKM <- "1003"
+    }
+
+    ttd_km_name <- here::here(glue::glue('shiny/data_0603/ttd/with/tte_{input$databaseNameTtd2}_{lineKM}.rds'))
+
+    ttd_km <- readr::read_rds(ttd_km_name)
+
+    ttd_km_final <- ttd_km[[glue::glue("{input$strataTtd2}")]]
+
+    return(ttd_km_final)
+  })
+
+
+  output$ttdKmPlot2 <- renderPlot(
+    res = 80,
+    {
+
+      tte <- ttdKMRds2()
+
+      ## Number of colors for lines
+      colors <- colorspace::rainbow_hcl(unique(length(tte$strata)))
+
+      tte |>
+        ggsurvfit(size = 1) +
+        scale_ggsurvfit(x_scales=list(breaks=c(0.5, 0:3))) +  # Breaks at 6m and 1-3 years
+        scale_color_manual(values = colors) +
+        scale_fill_manual(values = colors) +
+        add_risktable(risktable_stats = "{n.risk} ({cum.censor})",
+                      risktable_height = 0.4,
+                      size = 4, # increase font size of risk table statistics
+                      theme =   # increase font size of risk table title and y-axis label
+                        list(
+                          theme_risktable_default(axis.text.y.size = 11,
+                                                  plot.title.size = 11),
+                          theme(plot.title = element_text(face = "bold"))
+                        )) +
+        #add_risktable_strata_symbol(symbol = "\U25CF", size = 18) +
+        labs(x = "Follow-up time, years")
+
+    })
+
+
+  ## get survProb table (without)
   output$ttdSurvTab <- renderReactable(
     makeSurvProbTab(ttdSubset()) %>%
       reactable(
@@ -1259,6 +1509,27 @@ server <- function(input, output, session){
       )
   )
 
+  ## get survProb table (with)
+  output$ttdSurvTab2 <- renderReactable(
+    makeSurvProbTab(ttdSubset2()) %>%
+      reactable(
+        columns = list(
+          database = colDef(name = "Database Name"),
+          `Cohort Name` = colDef(name = "Cohort Name"),
+          strata = colDef(name = "Drug Cohort Name"),
+          `6 month` = colDef(name = "6 month Survival", format = colFormat(digits = 1, percent = TRUE)),
+          `1 year` = colDef(name = "1 year Survival", format = colFormat(digits = 1, percent = TRUE)),
+          `2 year` = colDef(name = "2 year Survival", format = colFormat(digits = 1, percent = TRUE))
+        ),
+        filterable = TRUE,
+        searchable = TRUE,
+        outlined = TRUE,
+        bordered = TRUE,
+        striped = TRUE,
+        defaultPageSize = 20
+      )
+  )
+
 
   # Procedure Analysis ---------------------------------
 
@@ -1317,11 +1588,58 @@ server <- function(input, output, session){
       )
   })
 
-  # make km plot
-  output$ttiKmPlot <- renderPlot({
-    plotKM2(ttiSubset())
+
+  ## Find rds file
+  ttiKMRds <- reactive ({
+
+    if (input$cohortNameTti == "hmb") {
+      lineKM <- "1"
+    } else if (input$cohortNameTti == "hmb age_lt_30"){
+      lineKM <- "1001"
+    } else if (input$cohortNameTti == "hmb age_30_45") {
+      lineKM <- "1002"
+    } else if (input$cohortNameTti == "hmb age_45_55") {
+      lineKM <- "1003"
+    }
+
+    ttd_km_name <- here::here(glue::glue('shiny/data_0603/tti/tti_{input$databaseNameTti}_{lineKM}.rds'))
+
+    ttd_km <- readr::read_rds(ttd_km_name)
+
+    return(ttd_km)
   })
 
+
+  output$ttiKmPlot <- renderPlot(
+    res = 80,
+    {
+
+      tti <- ttiKMRds()
+
+      ## Number of colors for lines
+      colors <- colorspace::rainbow_hcl(unique(length(tti$strata)))
+
+      tti |>
+        ggsurvfit(size = 1) +
+        scale_ggsurvfit(x_scales=list(breaks=c(0.5, 0:3))) +  # Breaks at 6m and 1-3 years
+        scale_color_manual(values = colors) +
+        scale_fill_manual(values = colors) +
+        add_risktable(risktable_stats = "{n.risk} ({cum.censor})",
+                      risktable_height = 0.4,
+                      size = 4, # increase font size of risk table statistics
+                      theme =   # increase font size of risk table title and y-axis label
+                        list(
+                          theme_risktable_default(axis.text.y.size = 11,
+                                                  plot.title.size = 11),
+                          theme(plot.title = element_text(face = "bold"))
+                        )) +
+        #add_risktable_strata_symbol(symbol = "\U25CF", size = 18) +
+        labs(x = "Follow-up time, years")
+
+    })
+
+
+
   ## get survProb table
   output$ttiSurvTab <- renderReactable(
     makeSurvProbTab2(ttiSubset()) %>%
diff --git a/shiny/migration/dataMigration.R b/shiny/migration/dataMigration.R
index 82f94ae..82ad8c8 100644
--- a/shiny/migration/dataMigration.R
+++ b/shiny/migration/dataMigration.R
@@ -7,13 +7,13 @@ library(readr)
 source("shiny/migration/helpers.R")
 
 
-appDataPath <- here::here("shiny", "data_old") # Path to place app data
+appDataPath <- here::here("shiny", "data_0603") # Path to place app data
 
 appDataPath %>% fs::dir_create() # Create new directory
 
-resultsPath <- here::here("results") # Set path to the execution results
+resultsPath <- here::here("results_0603") # Set path to the execution results
 
-listOfDatabase <- fs::dir_ls(resultsPath) %>% #list the databases used in the execution
+listOfDatabase <- fs::dir_ls(resultsPath) %>% # List the databases used in the execution
   basename()
 
 ### List the execution tasks
@@ -24,13 +24,14 @@ listOfTasks <- c("01_buildCohorts",
                  "05_baselineCharacteristics",
                  "06_postIndexPrevalenceConditions",
                  "07_postIndexPrevalenceDrugs",
-                 "08_treatmentHistory",
+                 "07_postIndexPrevalenceDrugs2",
                  "09_treatmentPatterns",
+                 "09_treatmentPatterns2",
                  "10_timeToDiscontinuation",
+                 "10_timeToDiscontinuation2",
                  "11_postIndexPrevalenceProcedures",
-                 "12_timeToIntervention",
-                 "13_treatmentHistory2",
-                 "14_treatmentPatterns")
+                 "12_timeToIntervention"
+                 )
 
 ### Create a data frame of all permutations of paths
 allPaths <- tidyr::expand_grid(listOfDatabase, listOfTasks) %>%
@@ -242,7 +243,6 @@ cts <- bindCsv(allPaths = allPaths,
                task = listOfTasks[5], # baseline char
                file = "continuous_baseline.csv")
 
-
 cts2 <- cts %>%
   dplyr::left_join(
     allCohorts, by = c("cohortDefinitionId" ="id"), relationship = "many-to-many"
@@ -264,6 +264,7 @@ readr::write_csv(cts2, file = fs::path(appDataPath, "baselineContinuous.csv"))
 ## 5. Baseline Concepts ----------------
 
 ### Extract drug concepts
+
 drug <- bindCsv(allPaths = allPaths,
                 task = listOfTasks[5], # baseline char
                 file = "drugs_baseline.csv") %>%
@@ -275,7 +276,7 @@ drug <- bindCsv(allPaths = allPaths,
     domain = "Drugs"
   ) %>%
   dplyr::select(
-    databaseId, domain, cohortDefinitionId, cohortName, conceptId, name,  n, pct
+    databaseId, domain, cohortDefinitionId, cohortName, conceptId, name,  n, pct, timeWindow
   )
 
 ### Extract condition concepts
@@ -290,7 +291,7 @@ cond <- bindCsv(allPaths = allPaths,
     domain = "Conditions"
   ) %>%
   dplyr::select(
-    databaseId, domain, cohortDefinitionId, cohortName, conceptId, name,  n, pct
+    databaseId, domain, cohortDefinitionId, cohortName, conceptId, name,  n, pct, timeWindow
   )
 
 ### Extract procedure concepts (exclude THINBE procedures)
@@ -308,7 +309,7 @@ proc <- bindCsv(allPaths = procPaths,
     domain = "Procedures"
   ) %>%
   dplyr::select(
-    databaseId, domain, cohortDefinitionId, cohortName, conceptId, name,  n, pct
+    databaseId, domain, cohortDefinitionId, cohortName, conceptId, name,  n, pct, timeWindow
   )
 
 conceptTab <- dplyr::bind_rows(
@@ -330,7 +331,7 @@ cohort365 <- bindCsv(allPaths = allPaths,
   ) %>%
   dplyr::mutate(
     covariateName = name,
-    timeWindow = "[-365 : 0]"
+    timeWindow = "-365d to 0d"
     ) %>%
   dplyr::select(databaseId, timeWindow, cohortId, cohortName,
                 covariateId, covariateName, count, pct) %>%
@@ -404,7 +405,7 @@ icd10 <- bindCsv(allPaths = allPaths,
     pct = COUNTVALUE / n
   ) %>%
   dplyr::select(databaseId, COHORT_ID, cohortName,
-                CATEGORY_CODE, categoryName, COUNTVALUE, pct)
+                CATEGORY_CODE, categoryName, COUNTVALUE, pct, timeWindow)
 
 readr::write_csv(icd10, file = fs::path(appDataPath, "baselineChapters.csv"))
 
@@ -416,9 +417,11 @@ readr::write_csv(icd10, file = fs::path(appDataPath, "baselineChapters.csv"))
 piPrevFilesCond <- c("cohort_covariates_1_365.csv",
                      "cohort_covariates_366_730.csv",
                      "cohort_covariates_731_1825.csv")
-piPrevTimeFrameCond <- c("1d - 365d",
-                         "366d - 730d",
-                         "731d - 1825d")
+
+piPrevTimeFrameCond <- c("1d to 365d",
+                         "366d to 730d",
+                         "731d to 1825d")
+
 piPrevCond <- purrr::map2_dfr(piPrevFilesCond,         # files to use
                               piPrevTimeFrameCond,     # time frame column to add
                               ~bindCsv(                # bind csv
@@ -429,6 +432,7 @@ piPrevCond <- purrr::map2_dfr(piPrevFilesCond,         # files to use
                                   timeWindow = .y
                                 ))  %>%
   dplyr::mutate(
+    cat = "-",
     type = "conditions"
   ) %>%
   dplyr::select(-covariateName) %>%
@@ -447,22 +451,30 @@ piPrevCond <- purrr::map2_dfr(piPrevFilesCond,         # files to use
 ### B. Post Index Drugs ----------------
 
 piPrevFilesDrugs <- c(
+  "cohort_covariates_183_1.csv",
+  "cohort_covariates_365_1.csv",
+  "cohort_covariates_730_1.csv",
   "cohort_covariates_1_183.csv",
-  "cohort_covariates_184_365.csv",
   "cohort_covariates_1_365.csv",
+  "cohort_covariates_1_730.csv",
+  "cohort_covariates_184_365.csv",
   "cohort_covariates_366_730.csv",
   "cohort_covariates_731_1825.csv"
 )
 
 piPrevTimeFrameDrugs <- c(
-  "1d - 183d",
-  "184d - 365d",
-  "1d - 365d",
-  "366d - 730d",
-  "731d - 1825d"
+  "-183d to -1d",
+  "-365d to -1d",
+  "-730d to -1d",
+  "1d to 183d",
+  "1d to 365d",
+  "1d to 730d",
+  "184d to 365d",
+  "366d to 730d",
+  "731d to 1825d"
 )
 
-piPrevDrugs <- purrr::map2_dfr(piPrevFilesDrugs,        # files to use
+piPrevDrugsWithin <- purrr::map2_dfr(piPrevFilesDrugs,        # files to use
                                piPrevTimeFrameDrugs,    # time frame column to add
                                ~bindCsv(                # bind csv
                                  allPaths = allPaths,
@@ -471,10 +483,28 @@ piPrevDrugs <- purrr::map2_dfr(piPrevFilesDrugs,        # files to use
                                  dplyr::mutate(         # add timeWindow Column
                                    timeWindow = .y
                                  )) %>%
+  dplyr::rename(cat = type) %>%
+  dplyr::mutate(
+    type = "drugs"
+  )
+
+
+piPrevDrugsFollowUp <- purrr::map2_dfr(piPrevFilesDrugs, # files to use
+                               piPrevTimeFrameDrugs,     # time frame column to add
+                               ~bindCsv(                 # bind csv
+                                 allPaths = allPaths,
+                                 task = listOfTasks[8],  # postindex Drugs
+                                 file = .x) %>%
+                                 dplyr::mutate(          # add timeWindow Column
+                                   timeWindow = .y
+                                 )) %>%
+  dplyr::rename(cat = type) %>%
   dplyr::mutate(
     type = "drugs"
   )
 
+piPrevDrugs <- dplyr::bind_rows(piPrevDrugsWithin, piPrevDrugsFollowUp)
+
 
 ### C. Post Index Procedures ----------------
 # skip THIN no procedures
@@ -490,12 +520,13 @@ piPrevProc <- purrr::map2_dfr(piPrevFilesProc,          # files to use
                               piPrevProcCohorts,        # cohorts
                               ~bindCsv(                 # bind csv
                                 allPaths = procPaths,   # skip THIN no procedures
-                                task = listOfTasks[11], # postindex Proc
+                                task = listOfTasks[13], # postindex Proc
                                 file = .x) %>%
                                 dplyr::mutate(
                                   cohortId = .y
                                 )) %>%
   dplyr::mutate(
+    cat = "-",
     cohortName = dplyr::case_when(
       cohortId == 1 ~ "hmb",
       cohortId == 1001L ~ "hmb_age_lt_30",
@@ -525,7 +556,7 @@ piPrevProc <- purrr::map2_dfr(piPrevFilesProc,          # files to use
     type = "procedures"
   ) %>%
   dplyr::select(databaseId, cohortId, cohortName, covariateId, covariateName,
-                count, pct, timeWindow, type)
+                count, pct, timeWindow, type, cat)
 
 postIndexPrev <- dplyr::bind_rows(
   piPrevCond, piPrevDrugs, piPrevProc
@@ -565,10 +596,41 @@ inic2 <- inic %>%
 
 ## 10. Treatment Patterns ----------------
 
-### A. HMB normal ----------------
+### A. HMB ----------------
+#### All ----------------
+txPath <- allPaths %>%
+  dplyr::filter(listOfTasks == listOfTasks[9]) %>%
+  dplyr::mutate(fullPath = fs::path(fullPath, "/all"))
+
+### Get treatment patterns table
+txPathDat <- purrr::pmap_dfr(
+  txPath,
+  ~bindTxPathTab(path = ..3, database = ..1)
+) %>%
+  tidyr::separate_wider_delim(
+    cols = cohortName,
+    delim = "_",
+    names = c("type", "cohortId")
+  ) %>%
+  dplyr::mutate(
+    cohortName = dplyr::case_when(
+      cohortId == 1 ~ "hmb",
+      cohortId == 1001L ~ "hmb_age_lt_30",
+      cohortId == 1002L ~ "hmb_age_30_45",
+      cohortId == 1003L ~ "hmb_age_45_55"
+    )
+  ) %>%
+  dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>%
+  dplyr::mutate(time = "All time",
+                type = "Without NSAIDS") %>%
+  dplyr::arrange(databaseId, cohortId, desc(n))
+
+readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatternsAll.csv"))
 
+#### 6m ----------------
 txPath <- allPaths %>%
-  dplyr::filter(listOfTasks == listOfTasks[9])
+  dplyr::filter(listOfTasks == listOfTasks[9]) %>%
+  dplyr::mutate(fullPath = fs::path(fullPath, "/6m"))
 
 ### Get treatment patterns table
 txPathDat <- purrr::pmap_dfr(
@@ -589,18 +651,50 @@ txPathDat <- purrr::pmap_dfr(
     )
   ) %>%
   dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>%
+  dplyr::mutate(time = "6 months",
+                type = "Without NSAIDS") %>%
   dplyr::arrange(databaseId, cohortId, desc(n))
 
-readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns.csv"))
+readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns6m.csv"))
+
+#### 1y ----------------
+txPath <- allPaths %>%
+  dplyr::filter(listOfTasks == listOfTasks[9]) %>%
+  dplyr::mutate(fullPath = fs::path(fullPath, "/1y"))
+
+### Get treatment patterns table
+txPathDat <- purrr::pmap_dfr(
+  txPath,
+  ~bindTxPathTab(path = ..3, database = ..1)
+) %>%
+  tidyr::separate_wider_delim(
+    cols = cohortName,
+    delim = "_",
+    names = c("type", "cohortId")
+  ) %>%
+  dplyr::mutate(
+    cohortName = dplyr::case_when(
+      cohortId == 1 ~ "hmb",
+      cohortId == 1001L ~ "hmb_age_lt_30",
+      cohortId == 1002L ~ "hmb_age_30_45",
+      cohortId == 1003L ~ "hmb_age_45_55"
+    )
+  ) %>%
+  dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>%
+  dplyr::mutate(time = "1 year",
+                type = "Without NSAIDS") %>%
+  dplyr::arrange(databaseId, cohortId, desc(n))
 
+readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns1y.csv"))
 
-### B. HMB 2 ----------------
-txPath2 <- allPaths %>%
-  dplyr::filter(listOfTasks == listOfTasks[14])
+#### 2y ----------------
+txPath <- allPaths %>%
+  dplyr::filter(listOfTasks == listOfTasks[9]) %>%
+  dplyr::mutate(fullPath = fs::path(fullPath, "/2y"))
 
 ### Get treatment patterns table
-txPathDat2 <- purrr::pmap_dfr(
-  txPath2,
+txPathDat <- purrr::pmap_dfr(
+  txPath,
   ~bindTxPathTab(path = ..3, database = ..1)
 ) %>%
   tidyr::separate_wider_delim(
@@ -610,30 +704,145 @@ txPathDat2 <- purrr::pmap_dfr(
   ) %>%
   dplyr::mutate(
     cohortName = dplyr::case_when(
-      cohortId == 44 ~ "hmb2",
-      cohortId == 44001L ~ "hmb2_age_lt_30",
-      cohortId == 44002L ~ "hmb2_age_30_45",
-      cohortId == 44003L ~ "hmb2_age_45_55"
+      cohortId == 1 ~ "hmb",
+      cohortId == 1001L ~ "hmb_age_lt_30",
+      cohortId == 1002L ~ "hmb_age_30_45",
+      cohortId == 1003L ~ "hmb_age_45_55"
     )
   ) %>%
   dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>%
+  dplyr::mutate(time = "2 years",
+                type = "Without NSAIDS") %>%
   dplyr::arrange(databaseId, cohortId, desc(n))
 
-readr::write_csv(txPathDat2, file = fs::path(appDataPath, "treatmentPatterns2.csv"))
+readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns2y.csv"))
+
 
+### A. HMB (with NSAIDs) ----------------
+#### All ----------------
+txPath <- allPaths %>%
+  dplyr::filter(listOfTasks == listOfTasks[10]) %>%
+  dplyr::mutate(fullPath = fs::path(fullPath, "/all"))
+
+### Get treatment patterns table
+txPathDat <- purrr::pmap_dfr(
+  txPath,
+  ~bindTxPathTab(path = ..3, database = ..1)
+) %>%
+  tidyr::separate_wider_delim(
+    cols = cohortName,
+    delim = "_",
+    names = c("type", "cohortId")
+  ) %>%
+  dplyr::mutate(
+    cohortName = dplyr::case_when(
+      cohortId == 1 ~ "hmb",
+      cohortId == 1001L ~ "hmb_age_lt_30",
+      cohortId == 1002L ~ "hmb_age_30_45",
+      cohortId == 1003L ~ "hmb_age_45_55"
+    )
+  ) %>%
+  dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>%
+  dplyr::mutate(time = "All time",
+                type = "With NSAIDS") %>%
+  dplyr::arrange(databaseId, cohortId, desc(n))
 
-## get sankey diagram
-# sankey <- purrr::pmap(
-#   txPath,
-#   ~groupSankey(path = ..3, database = ..1)
-# )
-# names(sankey) <- listOfDatabase
-# readr::write_rds(sankey, file = fs::path(appDataPath, "sankey.rds"))
+readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatternsAllNsaids.csv"))
+
+#### 6m ----------------
+txPath <- allPaths %>%
+  dplyr::filter(listOfTasks == listOfTasks[10]) %>%
+  dplyr::mutate(fullPath = fs::path(fullPath, "/6m"))
+
+### Get treatment patterns table
+txPathDat <- purrr::pmap_dfr(
+  txPath,
+  ~bindTxPathTab(path = ..3, database = ..1)
+) %>%
+  tidyr::separate_wider_delim(
+    cols = cohortName,
+    delim = "_",
+    names = c("type", "cohortId")
+  ) %>%
+  dplyr::mutate(
+    cohortName = dplyr::case_when(
+      cohortId == 1 ~ "hmb",
+      cohortId == 1001L ~ "hmb_age_lt_30",
+      cohortId == 1002L ~ "hmb_age_30_45",
+      cohortId == 1003L ~ "hmb_age_45_55"
+    )
+  ) %>%
+  dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>%
+  dplyr::mutate(time = "6 months",
+                type = "With NSAIDS") %>%
+  dplyr::arrange(databaseId, cohortId, desc(n))
+
+readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns6mNsaids.csv"))
+
+#### 1y ----------------
+txPath <- allPaths %>%
+  dplyr::filter(listOfTasks == listOfTasks[10]) %>%
+  dplyr::mutate(fullPath = fs::path(fullPath, "/1y"))
+
+### Get treatment patterns table
+txPathDat <- purrr::pmap_dfr(
+  txPath,
+  ~bindTxPathTab(path = ..3, database = ..1)
+) %>%
+  tidyr::separate_wider_delim(
+    cols = cohortName,
+    delim = "_",
+    names = c("type", "cohortId")
+  ) %>%
+  dplyr::mutate(
+    cohortName = dplyr::case_when(
+      cohortId == 1 ~ "hmb",
+      cohortId == 1001L ~ "hmb_age_lt_30",
+      cohortId == 1002L ~ "hmb_age_30_45",
+      cohortId == 1003L ~ "hmb_age_45_55"
+    )
+  ) %>%
+  dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>%
+  dplyr::mutate(time = "1 year",
+                type = "With NSAIDS") %>%
+  dplyr::arrange(databaseId, cohortId, desc(n))
+
+readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns1yNsaids.csv"))
+
+#### 2y ----------------
+txPath <- allPaths %>%
+  dplyr::filter(listOfTasks == listOfTasks[10]) %>%
+  dplyr::mutate(fullPath = fs::path(fullPath, "/2y"))
+
+### Get treatment patterns table
+txPathDat <- purrr::pmap_dfr(
+  txPath,
+  ~bindTxPathTab(path = ..3, database = ..1)
+) %>%
+  tidyr::separate_wider_delim(
+    cols = cohortName,
+    delim = "_",
+    names = c("type", "cohortId")
+  ) %>%
+  dplyr::mutate(
+    cohortName = dplyr::case_when(
+      cohortId == 1 ~ "hmb",
+      cohortId == 1001L ~ "hmb_age_lt_30",
+      cohortId == 1002L ~ "hmb_age_30_45",
+      cohortId == 1003L ~ "hmb_age_45_55"
+    )
+  ) %>%
+  dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>%
+  dplyr::mutate(time = "2 years",
+                type = "With NSAIDS") %>%
+  dplyr::arrange(databaseId, cohortId, desc(n))
+
+readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns2yNsaids.csv"))
 
 
 ## 11. Time to event ----------------
 
-### A. Time to discontinuation ----------------
+### A.1 TTD without NSAIDs (Tables) ----------------
 
 ### List files to extract
 ttdFiles <- c("tte_1.csv",
@@ -647,12 +856,13 @@ permutations <- tidyr::expand_grid(
 )
 
 ### Bind all in ttd
+#debug(bindTteData)
 ttd <- purrr::pmap_dfr(
   permutations,
   ~bindTteData(
     path = resultsPath,
     database = ..2,
-    task = listOfTasks[10],
+    task = listOfTasks[11], ## Change number
     file = ..1
   )
 )
@@ -662,8 +872,88 @@ arrow::write_parquet(
   sink = fs::path(appDataPath, "ttd.parquet")
 )
 
+### A.2 TTD with NSAIDs (Tables) ----------------
 
-### B. Time to intervention ----------------
+### List files to extract
+ttdFiles <- c("tte_1.csv",
+              "tte_1001.csv",
+              "tte_1002.csv",
+              "tte_1003.csv")
+
+permutations <- tidyr::expand_grid(
+  ttdFiles,
+  listOfDatabase
+)
+
+### Bind all in ttd
+#debug(bindTteData)
+ttd <- purrr::pmap_dfr(
+  permutations,
+  ~bindTteData(
+    path = resultsPath,
+    database = ..2,
+    task = listOfTasks[12],
+    file = ..1
+  )
+)
+
+arrow::write_parquet(
+  x = ttd,
+  sink = fs::path(appDataPath, "ttd2.parquet")
+)
+
+### B.1 TTD without NSAIDs (survfit) ----------------
+
+## Create output folder
+outputPath <- here::here(appDataPath, "ttd/wo") %>%
+  fs::dir_create()
+
+outputPath <- here::here(appDataPath, "ttd/wo")
+
+for (i in 1:length(listOfDatabase)) {
+
+  db <- listOfDatabase[i]
+
+  path <- allPaths %>%
+    dplyr::filter(listOfTasks == listOfTasks[11] & listOfDatabase == db) %>%
+    dplyr::pull(fullPath)
+
+  listOfFiles <- list.files(path, full.names = FALSE, pattern = ".rds", recursive = TRUE)
+
+  inputPath <- here::here("results_0603", db, listOfTasks[11])
+
+  fs::file_copy(here::here(inputPath, listOfFiles),
+                here::here(outputPath, listOfFiles), overwrite = TRUE)
+
+}
+
+### B.2 TTD with NSAIDs (survfit) ----------------
+
+## Create output folder
+outputPath <- here::here(appDataPath, "ttd/with") %>%
+  fs::dir_create()
+
+outputPath <- here::here(appDataPath, "ttd/with")
+
+for (i in 1:length(listOfDatabase)) {
+
+  db <- listOfDatabase[i]
+
+  path <- allPaths %>%
+    dplyr::filter(listOfTasks == listOfTasks[12] & listOfDatabase == db) %>%
+    dplyr::pull(fullPath)
+
+  listOfFiles <- list.files(path, full.names = FALSE, pattern = ".rds", recursive = TRUE)
+
+  inputPath <- here::here("results_0603", db, listOfTasks[12])
+
+  fs::file_copy(here::here(inputPath, listOfFiles),
+                here::here(outputPath, listOfFiles), overwrite = TRUE)
+
+}
+
+
+### C.1 Time to intervention (Tables) ----------------
 
 ttiFiles <- c("procedure_survival_1.csv",
               "procedure_survival_1001.csv",
@@ -681,7 +971,7 @@ tti <- purrr::pmap_dfr(
   ~bindTteData2(
     path = resultsPath,
     database = ..2,
-    task = listOfTasks[12],
+    task = listOfTasks[14],
     file = ..1
   )
 )
@@ -691,3 +981,29 @@ arrow::write_parquet(
   sink = fs::path(appDataPath, "tti.parquet")
 )
 
+
+### C.2 Time to intervention (survfit) ----------------
+
+## Create output folder
+outputPath <- here::here(appDataPath, "tti") %>%
+  fs::dir_create()
+
+outputPath <- here::here(appDataPath, "tti")
+
+for (i in 1:length(listOfDatabase)) {
+
+  db <- listOfDatabase[i]
+
+  path <- allPaths %>%
+    dplyr::filter(listOfTasks == listOfTasks[14] & listOfDatabase == db) %>%
+    dplyr::pull(fullPath)
+
+  listOfFiles <- list.files(path, full.names = FALSE, pattern = ".rds", recursive = TRUE)
+
+  inputPath <- here::here("results_0603", db, listOfTasks[14])
+
+  fs::file_copy(here::here(inputPath, listOfFiles),
+                here::here(outputPath, listOfFiles), overwrite = TRUE)
+
+}
+
diff --git a/shiny/migration/helpers.R b/shiny/migration/helpers.R
index 78eeca9..f0965e1 100644
--- a/shiny/migration/helpers.R
+++ b/shiny/migration/helpers.R
@@ -155,46 +155,48 @@ bindTteData <- function(path,
     tools::file_path_sans_ext()
 
   #read in data
-  tteData <- readr::read_csv(file = pathToFile, show_col_types = FALSE) %>%
-    dplyr::filter(
-      time <= 3
-    )
-
-  #remove singleLine Strata
-  singleLineStrata <- tteData %>%
-    dplyr::filter(
-      !grepl("\\+", strata)
-    ) %>%
-    dplyr::pull(strata) %>%
-    unique()
-
-  #get top 4 multi lines
-  top4MultiLineStrata <-tteData %>%
-    dplyr::filter(
-      grepl("\\+", strata)
-    ) %>%
-    count(strata) %>%
-    dplyr::arrange(desc(n)) %>%
-    dplyr::slice(1:4) %>%
-    dplyr::pull(strata) %>%
-    unique()
-
-  # combine specified strata lines
-  strataLines <- c(singleLineStrata, top4MultiLineStrata)
+  tteData <- readr::read_csv(file = pathToFile, show_col_types = FALSE)
+
+  # %>%
+  #   dplyr::filter(
+  #     time <= 3
+  #   )
+  #
+  # #remove singleLine Strata
+  # singleLineStrata <- tteData %>%
+  #   dplyr::filter(
+  #     !grepl("\\+", strata)
+  #   ) %>%
+  #   dplyr::pull(strata) %>%
+  #   unique()
+  #
+  # #get top 4 multi lines
+  # top4MultiLineStrata <-tteData %>%
+  #   dplyr::filter(
+  #     grepl("\\+", strata)
+  #   ) %>%
+  #   count(strata) %>%
+  #   dplyr::arrange(desc(n)) %>%
+  #   dplyr::slice(1:4) %>%
+  #   dplyr::pull(strata) %>%
+  #   unique()
+  #
+  # # combine specified strata lines
+  # strataLines <- c(singleLineStrata, top4MultiLineStrata)
 
 
   # subset tted Data to the specified strata lines
   subsetTteData <- tteData %>%
     dplyr::mutate(strata = as.character(strata)) %>%
-    dplyr::filter(
-      strata %in% strataLines
-    ) %>%
+    # dplyr::filter(
+    #   strata %in% strataLines
+    # ) %>%
     dplyr::mutate(
       database = !!database,
       targetId = !!targetId
     ) %>%
     dplyr::select(
-      database, targetId, strata, time, n.risk, n.event, estimate, std.error
+      database, targetId, strata, time, n.risk, n.event, estimate, std.error, line
     )
 
   return(subsetTteData)
@@ -215,10 +217,11 @@ bindTteData2 <- function(path,
     tools::file_path_sans_ext()
 
   #read in data
-  tteData <- readr::read_csv(file = pathToFile, show_col_types = FALSE) %>%
-    dplyr::filter(
-      time <= 3
-    )
+  tteData <- readr::read_csv(file = pathToFile, show_col_types = FALSE)
+  # %>%
+  #   dplyr::filter(
+  #     time <= 3
+  #   )
 
   # subset tted Data to the specified strata lines
   updateTteData <- tteData %>%