From 6cde002f8cbd7f936467f6f46cd29f8a8ffbd227 Mon Sep 17 00:00:00 2001 From: ZorzArg Date: Fri, 8 Mar 2024 11:18:18 +0000 Subject: [PATCH] shiny app updates --- .gitignore | 2 + shiny/R/fn.R | 1 + shiny/R/loadData.R | 104 +++++--- shiny/app.R | 448 +++++++++++++++++++++++++++----- shiny/migration/dataMigration.R | 414 +++++++++++++++++++++++++---- shiny/migration/helpers.R | 71 ++--- 6 files changed, 854 insertions(+), 186 deletions(-) diff --git a/.gitignore b/.gitignore index 2ecf99c..d9444b2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Project specific files and folders results/ +results_0603/ results_old/ logs/ config.yml @@ -17,6 +18,7 @@ hidden/ results.zip scratchDiagnostics shiny/data/ +shiny/data_0603/ shiny/data_old/ bindResults.R bindResults_Public.R diff --git a/shiny/R/fn.R b/shiny/R/fn.R index a19487c..8d5572b 100644 --- a/shiny/R/fn.R +++ b/shiny/R/fn.R @@ -223,6 +223,7 @@ plotKM2 <- function(dat) { plot_colors <- unname(grafify::graf_palettes$kelly) p <- ggplot(dat, aes(x = time, y = estimate, color = outcomeCohortId)) + + #ggsurvfit::add_risktable(risktable_stats = c("n.risk", "cum.censor", "cum.event")) + ## ADDED geom_step(linewidth = 1.5) + scale_color_manual(values = plot_colors) + #scale colors to kelly scale_y_continuous(labels = scales::percent_format()) + # convert y axis to percent diff --git a/shiny/R/loadData.R b/shiny/R/loadData.R index a793708..8b195e7 100644 --- a/shiny/R/loadData.R +++ b/shiny/R/loadData.R @@ -8,7 +8,7 @@ library(readr) source(here::here("shiny", "R", "fn.R")) source(here::here("shiny", "migration", "helpers.R")) -dataPath <- here::here("shiny", "data") +dataPath <- here::here("shiny", "data_0603") # About ----------------- @@ -50,7 +50,7 @@ ctsChar <- readr::read_csv(fs::path(dataPath, "baselineContinuous.csv"), conceptChar <- readr::read_csv(fs::path(dataPath, "baselineConcepts.csv"), show_col_types = FALSE) %>% dplyr::rename(count = n) %>% - dplyr::select(-cohortDefinitionId) %>% + dplyr::select(-cohortDefinitionId, -timeWindow) %>% maskLowCount() ## Cohort baseline @@ -63,17 +63,13 @@ cohortChar <- readr::read_csv(fs::path(dataPath, "baselineCohorts.csv"), TRUE ~ "Conditions" ) ) %>% - dplyr::select(databaseId, timeWindow, cohortName, domain, covariateName, count, pct) %>% - dplyr::mutate(timeWindow = dplyr::case_when( - timeWindow == "[-365 : 0]" ~ "T(-365d to 0d)", - TRUE ~ timeWindow - )) %>% + dplyr::select(databaseId, cohortName, domain, covariateName, count, pct) %>% maskLowCount() ## Chapters baseline icdChar <- readr::read_csv(fs::path(dataPath, "baselineChapters.csv"), show_col_types = FALSE) %>% - dplyr::select(-COHORT_ID) %>% + dplyr::select(-COHORT_ID, -timeWindow) %>% dplyr::rename(count = COUNTVALUE) %>% maskLowCount() @@ -88,6 +84,7 @@ domainConceptChar <- sort(unique(conceptChar$domain)) incTab <- readr::read_csv(fs::path(dataPath, "incidence.csv"), show_col_types = FALSE) %>% dplyr::select(-OUTCOME_COHORT_DEFINITION_ID) %>% + dplyr::mutate(PERSON_YEARS = as.integer(PERSON_DAYS/365.25), .before =6) %>% maskLowCountInci() ### Incidence Pickers @@ -99,15 +96,6 @@ yearInci <- c("All", as.character(2000:2022)) postIndex <- readr::read_csv(fs::path(dataPath, "postIndexPrevalence.csv"), show_col_types = FALSE) %>% dplyr::select(-cohortId, -covariateId) %>% - dplyr::mutate(timeWindow = dplyr::case_when( - timeWindow == "1d - 365d" ~ "T(1d to 365d)", - timeWindow == "1d - 183d" ~ "T(1d to 183d)", - timeWindow == "184d - 365d" ~ "T(184d to 365d)", - timeWindow == "366d - 730d" ~ "T(366d to 730d)", - timeWindow == "731d - 1825d" ~ "T(731d to 1825d)", - timeWindow == "1d - 9999d" ~ "T(1d to 9999d)", - TRUE ~ timeWindow - )) %>% maskLowCount() @@ -130,14 +118,19 @@ drugPi <- postIndex %>% # dplyr::mutate( # timeWindow = factor(timeWindow, levels = c("1d - 183d", "184d - 365d", "1d - 365d", "366d - 730d", "731d - 1825d")) # ) %>% - dplyr::select(databaseId, timeWindow, cohortName, covariateName, count, pct) %>% + dplyr::select(databaseId, timeWindow, cohortName, covariateName, count, pct, cat) %>% dplyr::arrange(databaseId, timeWindow) %>% - dplyr::mutate(timeWindow = as.character(timeWindow)) + dplyr::mutate(timeWindow = as.character(timeWindow), + cat = dplyr::case_when( + cat == "within" ~ "Within time window", + cat == "followUp" ~ "Complete follow-up" + ) + ) ### Pickers drugCohorts <- unique(drugPi$covariateName) drugTimeWindow <- unique(drugPi$timeWindow) - +drugCategory <- unique(drugPi$cat) ## Procedures ----------------- @@ -154,29 +147,52 @@ procTimeWindow <- unique(procPi$timeWindow) # 5. Treatment Patterns ----------------- ## Load treatment patterns table -txPatDat <- readr::read_csv(fs::path(dataPath, "treatmentPatterns.csv"), +txPatDat <- readr::read_csv(fs::path(dataPath, "treatmentPatternsAll.csv"), show_col_types = FALSE) - -txPatDat2 <- readr::read_csv(fs::path(dataPath, "treatmentPatterns2.csv"), +txPatDat6m <- readr::read_csv(fs::path(dataPath, "treatmentPatterns6m.csv"), show_col_types = FALSE) +txPatDat1y <- readr::read_csv(fs::path(dataPath, "treatmentPatterns1y.csv"), + show_col_types = FALSE) + +txPatDat2y <- readr::read_csv(fs::path(dataPath, "treatmentPatterns2y.csv"), + show_col_types = FALSE) + +txPatDatNsaids <- readr::read_csv(fs::path(dataPath, "treatmentPatternsAllNsaids.csv"), + show_col_types = FALSE) + +txPatDat6mNsaids <- readr::read_csv(fs::path(dataPath, "treatmentPatterns6mNsaids.csv"), + show_col_types = FALSE) + +txPatDat1yNsaids <- readr::read_csv(fs::path(dataPath, "treatmentPatterns1yNsaids.csv"), + show_col_types = FALSE) + +txPatDat2yNsaids <- readr::read_csv(fs::path(dataPath, "treatmentPatterns2yNsaids.csv"), + show_col_types = FALSE) + txPatDatAll <- dplyr::bind_rows( txPatDat, - txPatDat2 + txPatDat6m, + txPatDat1y, + txPatDat2y, + txPatDatNsaids, + txPatDat6mNsaids, + txPatDat1yNsaids, + txPatDat2yNsaids ) ### Sankey pickers -cohortName2 <- c( - cohortName, - "hmb2", "hmb2_age_lt_30", "hmb2_age_30_45", "hmb2_age_45_55" -) +cohortName2 <- c(cohortName) sankeyCohorts <- tibble::tibble( - id = c(1L, 1001L, 1002L, 1003L, 44L, 44001L, 44002L, 44003L), + id = c(1L, 1001L, 1002L, 1003L), name = cohortName2 ) +txTime <- unique(txPatDatAll$time) +txType <- unique(txPatDatAll$type) + # 6. Time to Event ----------------- @@ -192,13 +208,23 @@ ttd <- arrow::read_parquet(file = fs::path(here::here(dataPath ,"ttd.parquet"))) TRUE ~ `Cohort Name`) ) +ttd2 <- arrow::read_parquet(file = fs::path(here::here(dataPath ,"ttd2.parquet"))) %>% + dplyr::mutate(targetId = as.double(targetId)) %>% + dplyr::left_join(strataCounts, by = c("targetId" = "Strata Cohort Id", "database" = "Database")) %>% + dplyr::rename(`Cohort Name` = `Strata Cohort Name`) %>% + dplyr::select(database:`Cohort Name`) %>% + dplyr::mutate(`Cohort Name` = dplyr::case_when( + targetId == 1 ~ "hmb", + TRUE ~ `Cohort Name`) + ) + ### Relabel strata -ttd <- relabelStrata( - ttd, - oldLabels = as.character(c(27:29, 31:35)), - newLabels = c("oc", "danazol", "grha", "lglIUD", - "nsaids", "progestin", "tranexamicAcid", "ulipristalAcetate") -) +# ttd <- relabelStrata( +# ttd, +# oldLabels = as.character(c(27:29, 31:35)), +# newLabels = c("oc", "danazol", "grha", "lglIUD", +# "nsaids", "progestin", "tranexamicAcid", "ulipristalAcetate") +# ) ### TTE cohort pickers ttdCohorts <- unique(ttd$`Cohort Name`) @@ -209,6 +235,9 @@ tteCohorts <- tibble::tibble( name = cohortName ) +## TTD Line pickers +ttdLine <- unique(ttd$line) + ## Time to Intervention ----------------- @@ -222,15 +251,14 @@ tti <- arrow::read_parquet(file = fs::path(here::here(dataPath ,"tti.parquet"))) TRUE ~ `Cohort Name`) ) - - ### Relabel strata tti <- relabelOutcome( tti, - oldLabels = as.character(36:43), + oldLabels = as.character(38:45), newLabels = c("bloodTransfusion", "copperIUD", "endometrialAblation", "hormonalIUD", "hysterectomy", "myomectomy", "uae", "undefinedIUD") ) ### TTI cohort pickers ttiCohorts <- unique(tti$`Cohort Name`) + diff --git a/shiny/app.R b/shiny/app.R index b272bbc..4795f75 100644 --- a/shiny/app.R +++ b/shiny/app.R @@ -11,6 +11,7 @@ library(reactable) library(ggplot2) library(grafify) library(markdown) +library(ggsurvfit) options(shiny.fullstacktrace = FALSE) @@ -19,15 +20,16 @@ title <- "EHDEN HMB" underlyingDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'." drugUtilizationDescription <- "Counts equal to 5 and below have been masked and replaced with '<5." -clinicalCharacteristicsDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'." +drugUtilizationDescription2 <- "Category 'Within time window' means that a drug exposure started and ended within the specified time window." +drugUtilizationDescription3 <- "Category 'Complete follow-up' means that a drug exposure spanned throughout the specified time window." +clinicalCharacteristicsDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'. Characteristics are assessed within a time window of 365 to 0 days prior to index date" -incidenceDescription <- "Incidence rate is calculated by 'Outcome Count'/'Person Days' * 100." +incidenceDescription <- "Incidence rate is defined as Outcome Count/Person Years * 1000." -treatmentPatternsDescription <- "Treatment Patterns counts (Sequences) are restricted to 30. HMB cohort is the population whose index event is HMB and are censored by hysterectomy. Thus we consider only drugs. -HMB2 cohort are those whose index event is HMB where there is no censoring for hysterectomy. Therefore we consider both drugs and procedures." +treatmentPatternsDescription <- "Treatment Patterns counts (Sequences) are restricted to 30. HMB cohort is the population whose index event is HMB and are censored by hysterectomy. Thus we consider only drugs." procedureAnalysisDescription <- "Counts equal to 5 and below have been masked and replaced with '<5'. -In the Time to Intervention tab we consider the time for only those who experienced the event, not the entire cohort population." +In the 'Time to Intervention' tab we consider the time for only those who experienced the event, not the entire cohort population." dashboardVersion <- "0.3.0" dashboardDate <- Sys.Date() @@ -79,6 +81,7 @@ body <- dashboardBody( ### About Tab------ tabItem( tabName = "about", + #### Study description fluidRow( box( @@ -88,6 +91,7 @@ body <- dashboardBody( includeMarkdown("StudyDescription.md") ) ), + #### Study Information fluidRow( box( @@ -99,6 +103,7 @@ body <- dashboardBody( "SAP Link:",a(href= "https://odyosg.github.io/ehden_hmb/sap.html", "https://odyosg.github.io/ehden_hmb/sap.html") ) ), + #### Database Information fluidRow( box( @@ -125,9 +130,10 @@ body <- dashboardBody( fluidRow( tabBox( - #title = "Cohort Counts", id = "cohortCounts", width = 12, + + #### Cohort Counts tabPanel("Cohort Counts", fluidRow( box( @@ -151,6 +157,8 @@ body <- dashboardBody( ) ) ), + + #### Strata Counts tabPanel("Strata Counts", fluidRow( box( @@ -199,6 +207,7 @@ body <- dashboardBody( box( status = "success", column(width = 6, + pickerInput( inputId = "databaseNameDemo", label = "Database Name", @@ -233,6 +242,7 @@ body <- dashboardBody( box( status = "success", column(width = 6, + pickerInput( inputId = "databaseNameCts", label = "Database Name", @@ -267,6 +277,7 @@ body <- dashboardBody( box( status = "success", column(width = 6, + pickerInput( inputId = "databaseNameConcept", label = "Database Name", @@ -286,6 +297,7 @@ body <- dashboardBody( ), ), column(width = 6, + pickerInput( inputId = "domainConcept", label = "Domain", @@ -310,6 +322,7 @@ body <- dashboardBody( box( status = "success", column(width = 6, + pickerInput( inputId = "databaseNameCohortCov", label = "Database Name", @@ -329,6 +342,7 @@ body <- dashboardBody( ) ), column(width = 6, + pickerInput( inputId = "domainCohortCov", label = "Domain", @@ -354,6 +368,7 @@ body <- dashboardBody( box( status = "success", column(width = 6, + pickerInput( inputId = "databaseNameIcd", label = "Database Name", @@ -407,14 +422,12 @@ body <- dashboardBody( id = "baselineChar", width = 12, - tabPanel( - "Table", + tabPanel("Table", fluidRow( box( status = "success", column(width = 6, - # pick database pickerInput( inputId = "databaseNameInci", label = "Database Name", @@ -424,7 +437,6 @@ body <- dashboardBody( multiple = TRUE ), - # pick cohortName pickerInput( inputId = "cohortNameInci", label = "Cohort Name", @@ -438,7 +450,6 @@ body <- dashboardBody( ), column(width = 6, - # pick year pickerInput( inputId = "yearInci", label = "Year", @@ -462,10 +473,7 @@ body <- dashboardBody( fluidRow( column(width = 6, - - box( - - status = "success", + box(status = "success", pickerInput( inputId = "databaseNameYrInci", @@ -489,7 +497,6 @@ body <- dashboardBody( ), fluidRow( - box( width = 12, plotOutput("inciYearPlot") @@ -518,7 +525,7 @@ body <- dashboardBody( box( status = "success", column(width = 6, - # pick database + pickerInput( inputId = "databaseNameCondPi", label = "Database Name", @@ -527,7 +534,7 @@ body <- dashboardBody( options = shinyWidgets::pickerOptions(actionsBox = TRUE), multiple = TRUE ), - # pick cohort + pickerInput( inputId = "cohortNameCondPi", label = "Cohort Name", @@ -539,7 +546,7 @@ body <- dashboardBody( ), column(width = 6, - # pick outcome + pickerInput( inputId = "conditionNameCondPi", label = "Condition Name", @@ -549,7 +556,7 @@ body <- dashboardBody( multiple = TRUE ), - # pick year + pickerInput( inputId = "timeWindowCondPi", label = "Time Window", @@ -582,6 +589,8 @@ body <- dashboardBody( width = 12, background = "light-blue", textOutput("treatmentPatternsDescription"), + textOutput("drugUtilizationDescription2"), + textOutput("drugUtilizationDescription3"), textOutput("drugUtilizationDescription") ) ), @@ -595,7 +604,8 @@ body <- dashboardBody( fluidRow( box( status = "success", - column(width = 6, + column(width = 4, + pickerInput( inputId = "databaseNameDuPi", label = "Database Name", @@ -604,6 +614,7 @@ body <- dashboardBody( options = shinyWidgets::pickerOptions(actionsBox = TRUE), multiple = TRUE ), + pickerInput( inputId = "cohortNameDuPi", label = "Cohort Name", @@ -613,7 +624,8 @@ body <- dashboardBody( multiple = TRUE ) ), - column(width = 6, + column(width = 4, + pickerInput( inputId = "drugNameDuPi", label = "Drug Name", @@ -622,6 +634,7 @@ body <- dashboardBody( options = shinyWidgets::pickerOptions(actionsBox = TRUE), multiple = TRUE ), + pickerInput( inputId = "timeWindowDuPi", label = "Time Window", @@ -630,6 +643,17 @@ body <- dashboardBody( options = shinyWidgets::pickerOptions(actionsBox = TRUE), multiple = TRUE ) + ), + column(width = 4, + + pickerInput( + inputId = "drugCat", + label = "Category", + choices = drugCategory, + selected = drugCategory, + options = shinyWidgets::pickerOptions(actionsBox = TRUE), + multiple = TRUE + ) ) ), fluidRow( @@ -646,18 +670,36 @@ body <- dashboardBody( tabPanel("Sequences", fluidRow( box( - status = "success", - width = 4, + status = "success", + width = 4, + column(width = 6, + pickerInput( inputId = "databaseNameSankey", label = "Database Name", choices = databaseName ), + pickerInput( inputId = "cohortNameSankey", label = "Cohort Name", choices = cohortName2 + ), + ), + column(width = 6, + + pickerInput( + inputId = "txTime", + label = "Time from index date", + choices = txTime + ), + + pickerInput( + inputId = "txType", + label = "Type", + choices = txType ) + ) ) ), fluidRow( @@ -673,40 +715,102 @@ body <- dashboardBody( reactableOutput("txPatDat")) ) ), - ### TTD Panel - tabPanel("Time to Discontinuation", + + ### TTD Panel (without) + tabPanel("Time to Discontinuation (without NSAIDs)", + fluidRow( + box( + status = "success", + column(width = 6, + height = "230px", + background = "light-blue", + + pickerInput( + inputId = "databaseNameTtd", + label = "Database Name", + choices = databaseName + ), + + pickerInput( + inputId = "cohortNameTtd", + label = "Cohort Name", + choices = ttdCohorts + ), + ), + column(width = 6, + + pickerInput( + inputId = "strataTtd", + label = "Drugs", + choices = c("Single", "All") + ) + ) + ) + ), fluidRow( - column(width = 9, - status = "success", - plotOutput("ttdKmPlot") + column(width = 12, + box( + width = NULL, + plotOutput(height = "1000px", + "ttdKmPlot") + ) ), - column(width = 3, - box( - width = 9, + + ), + fluidRow( + box( + width = 12, + reactableOutput("ttdSurvTab") + ) + ) + ), + + ### TTD Panel (with) + tabPanel("Time to Discontinuation (with NSAIDs)", + fluidRow( + box( + status = "success", + column(width = 6, height = "230px", background = "light-blue", + pickerInput( - inputId = "databaseNameTtd", + inputId = "databaseNameTtd2", label = "Database Name", choices = databaseName ), + pickerInput( - inputId = "cohortNameTtd", + inputId = "cohortNameTtd2", label = "Cohort Name", choices = ttdCohorts ), + ), + column(width = 6, + pickerInput( - inputId = "strataTtd", + inputId = "strataTtd2", label = "Drugs", choices = c("Single", "All") ) + ) + ) + ), + fluidRow( + column(width = 12, + box( + width = NULL, + #height = "1000px", + plotOutput(height = "1000px", + "ttdKmPlot2") ) ), + ), fluidRow( box( width = 12, - reactableOutput("ttdSurvTab") + reactableOutput("ttdSurvTab2") ) ) ) @@ -737,6 +841,7 @@ body <- dashboardBody( box( status = "success", column(width = 6, + pickerInput( inputId = "databaseNameProcPi", label = "Database Name", @@ -745,6 +850,7 @@ body <- dashboardBody( options = shinyWidgets::pickerOptions(actionsBox = TRUE), multiple = TRUE ), + pickerInput( inputId = "cohortNameProcPi", label = "Cohort Name", @@ -755,6 +861,7 @@ body <- dashboardBody( ) ), column(width = 6, + pickerInput( inputId = "procNameProcPi", label = "Procedure Name", @@ -763,6 +870,7 @@ body <- dashboardBody( options = shinyWidgets::pickerOptions(actionsBox = TRUE), multiple = TRUE ), + pickerInput( inputId = "timeWindowProcPi", label = "Time Window", @@ -782,36 +890,46 @@ body <- dashboardBody( ) ) ), + + ## Time to intervention tabPanel("Time to Intervention", fluidRow( - column(width = 9, - status = "success", - plotOutput("ttiKmPlot") - ), - column(width = 3, - box( - width = 9, + box( + status = "success", + column(width = 6, height = "180px", background = "light-blue", + pickerInput( inputId = "databaseNameTti", label = "Database Name", choices = databaseName ), + pickerInput( inputId = "cohortNameTti", label = "Cohort Name", choices = ttiCohorts ) - ) - ), + ), + ), ), fluidRow( - box( - width = 12, - reactableOutput("ttiSurvTab") + column(width = 12, + box( + width = NULL, + plotOutput(height = "1000px", + "ttiKmPlot") + ) ) + ), + fluidRow( + box( + width = 12, + reactableOutput("ttiSurvTab") + ) + ) ) ) ) @@ -839,12 +957,6 @@ server <- function(input, output, session){ description }) - ## Database Information - # output$databaseInformation <- renderReactable( - # databaseMeta %>% reactable() - # ) - - # Cohorts ---------------- ## Cohort Counts @@ -949,6 +1061,7 @@ server <- function(input, output, session){ columns = list( databaseId = colDef(name = "Database Name"), domain = colDef(name = "Domain"), + #timeWindow = colDef(name = "Time Window"), cohortName = colDef(name = "Cohort Name"), conceptId = colDef(name = "Concept Id"), name = colDef(name = "Concept Name"), @@ -974,7 +1087,7 @@ server <- function(input, output, session){ reactable( columns = list( databaseId = colDef(name = "Database Name"), - timeWindow = colDef(name = "Time Window"), + #timeWindow = colDef(name = "Time Window"), cohortName = colDef(name = "Cohort Name"), domain = colDef(name = "Domain"), covariateName = colDef(name = "Covariate Name"), @@ -999,6 +1112,7 @@ server <- function(input, output, session){ reactable( columns = list( databaseId = colDef(name = "Database Name"), + #timeWindow = colDef(name = "Time Window"), cohortName = colDef(name = "Cohort Name"), CATEGORY_CODE = colDef(name = "Concept Id"), categoryName = colDef(name = "Concept Name"), @@ -1028,7 +1142,7 @@ server <- function(input, output, session){ incTab %>% dplyr::filter(databaseId %in% input$databaseNameInci, START_YEAR %in% input$yearInci, - OUTCOME_NAME %in% snakecase::to_snake_case(input$cohortNameInci)) %>% + OUTCOME_NAME %in% input$cohortNameInci) %>% reactable( columns = list( databaseId = colDef(name = "Database Name"), @@ -1036,6 +1150,7 @@ server <- function(input, output, session){ OUTCOME_NAME = colDef(name = "Cohort Name"), PERSONS_AT_RISK = colDef(name = "Persons at Risk", format = colFormat(separators = TRUE)), PERSON_DAYS = colDef(name = "Person Days", format = colFormat(separators = TRUE)), + PERSON_YEARS = colDef(name = "Person Years", format = colFormat(separators = TRUE)), OUTCOMES = colDef(name = "Outcome Count", format = colFormat(separators = TRUE)), INCIDENCE_PROPORTION_P100P = colDef(name = "Incidence Proportion (per 100p)", format = colFormat(digits = 2)), INCIDENCE_RATE_P1000PY = colDef(name = "Incidence Rate (per 1000yrs)", format = colFormat(digits = 2)) @@ -1055,7 +1170,8 @@ server <- function(input, output, session){ dplyr::filter( START_YEAR != "All", databaseId %in% input$databaseNameYrInci, - OUTCOME_NAME %in% snakecase::to_snake_case(input$cohortNameYrInci) + OUTCOME_NAME %in% input$cohortNameYrInci + #OUTCOME_NAME %in% snakecase::to_snake_case(input$cohortNameYrInci) ) }) @@ -1109,11 +1225,20 @@ server <- function(input, output, session){ drugUtilizationDescription }) + output$drugUtilizationDescription2 <- renderText({ + drugUtilizationDescription2 + }) + + output$drugUtilizationDescription3 <- renderText({ + drugUtilizationDescription3 + }) + ## Utilization duPiRe <- reactive({ drugPi %>% dplyr::filter( + cat %in% input$drugCat, databaseId %in% input$databaseNameDuPi, cohortName %in% input$cohortNameDuPi, covariateName %in% input$drugNameDuPi, @@ -1126,6 +1251,7 @@ server <- function(input, output, session){ columns = list( databaseId = colDef(name = "Database Name"), timeWindow = colDef(name = "Time Window"), + cat = colDef(name = "Category"), cohortName = colDef(name = "Cohort Name"), covariateName = colDef(name = "Drug Cohort Name"), count = colDef(name = "Person Count", format = colFormat(separators = TRUE)), @@ -1154,6 +1280,8 @@ server <- function(input, output, session){ txPatDatAll %>% dplyr::filter( databaseId == input$databaseNameSankey, + time == input$txTime, + type == input$txType, cohortId == sankeyPick()) %>% dplyr::slice(1:20) %>% buildSankeyData() %>% @@ -1165,6 +1293,8 @@ server <- function(input, output, session){ txPatDatAll %>% dplyr::filter( databaseId == input$databaseNameSankey, + time == input$txTime, + type == input$txType, cohortId == sankeyPick() ) %>% dplyr::arrange(desc(n)) %>% @@ -1214,7 +1344,7 @@ server <- function(input, output, session){ dplyr::pull(name) }) - # subset to those with strata + # subset to those with strata (without) ttdSubset <- reactive({ dt1 <- ttd %>% @@ -1222,6 +1352,7 @@ server <- function(input, output, session){ database == input$databaseNameTtd, `Cohort Name` == input$cohortNameTtd ) + if (input$strataTtd == "Single") { dt1 <- dt1 %>% dplyr::filter( @@ -1233,12 +1364,131 @@ server <- function(input, output, session){ }) - # make km plot - output$ttdKmPlot <- renderPlot({ - plotKM(ttdSubset()) + # subset to those with strata (with) + ttdSubset2 <- reactive({ + + dt1 <- ttd2 %>% + dplyr::filter( + database == input$databaseNameTtd2, + `Cohort Name` == input$cohortNameTtd2 + ) + + if (input$strataTtd2 == "Single") { + dt1 <- dt1 %>% + dplyr::filter( + !grepl("\\+", strata) + ) + } + + dt1 + }) - ## get survProb table + + ## Find rds file (without) + ttdKMRds <- reactive ({ + + if (input$cohortNameTtd == "hmb") { + lineKM <- "1" + } else if (input$cohortNameTtd == "hmb age_lt_30"){ + lineKM <- "1001" + } else if (input$cohortNameTtd == "hmb age_30_45") { + lineKM <- "1002" + } else if (input$cohortNameTtd == "hmb age_45_55") { + lineKM <- "1003" + } + + ttd_km_name <- here::here(glue::glue('shiny/data_0603/ttd/wo/tte_{input$databaseNameTtd}_{lineKM}.rds')) + + ttd_km <- readr::read_rds(ttd_km_name) + + ttd_km_final <- ttd_km[[glue::glue("{input$strataTtd}")]] + + return(ttd_km_final) + }) + + + output$ttdKmPlot <- renderPlot( + res = 80, + { + + tte <- ttdKMRds() + + ## Number of colors for lines + colors <- colorspace::rainbow_hcl(unique(length(tte$strata))) + + tte |> + ggsurvfit(size = 1) + + scale_ggsurvfit(x_scales=list(breaks=c(0.5, 0:3))) + # Breaks at 6m and 1-3 years + scale_color_manual(values = colors) + + scale_fill_manual(values = colors) + + add_risktable(risktable_stats = "{n.risk} ({cum.censor})", + risktable_height = 0.4, + size = 4, # increase font size of risk table statistics + theme = # increase font size of risk table title and y-axis label + list( + theme_risktable_default(axis.text.y.size = 11, + plot.title.size = 11), + theme(plot.title = element_text(face = "bold")) + )) + + #add_risktable_strata_symbol(symbol = "\U25CF", size = 18) + + labs(x = "Follow-up time, years") + + }) + + ## Find rds file (with) + ttdKMRds2 <- reactive ({ + + if (input$cohortNameTtd2 == "hmb") { + lineKM <- "1" + } else if (input$cohortNameTtd2 == "hmb age_lt_30"){ + lineKM <- "1001" + } else if (input$cohortNameTtd2 == "hmb age_30_45") { + lineKM <- "1002" + } else if (input$cohortNameTtd2 == "hmb age_45_55") { + lineKM <- "1003" + } + + ttd_km_name <- here::here(glue::glue('shiny/data_0603/ttd/with/tte_{input$databaseNameTtd2}_{lineKM}.rds')) + + ttd_km <- readr::read_rds(ttd_km_name) + + ttd_km_final <- ttd_km[[glue::glue("{input$strataTtd2}")]] + + return(ttd_km_final) + }) + + + output$ttdKmPlot2 <- renderPlot( + res = 80, + { + + tte <- ttdKMRds2() + + ## Number of colors for lines + colors <- colorspace::rainbow_hcl(unique(length(tte$strata))) + + tte |> + ggsurvfit(size = 1) + + scale_ggsurvfit(x_scales=list(breaks=c(0.5, 0:3))) + # Breaks at 6m and 1-3 years + scale_color_manual(values = colors) + + scale_fill_manual(values = colors) + + add_risktable(risktable_stats = "{n.risk} ({cum.censor})", + risktable_height = 0.4, + size = 4, # increase font size of risk table statistics + theme = # increase font size of risk table title and y-axis label + list( + theme_risktable_default(axis.text.y.size = 11, + plot.title.size = 11), + theme(plot.title = element_text(face = "bold")) + )) + + #add_risktable_strata_symbol(symbol = "\U25CF", size = 18) + + labs(x = "Follow-up time, years") + + }) + + + ## get survProb table (without) output$ttdSurvTab <- renderReactable( makeSurvProbTab(ttdSubset()) %>% reactable( @@ -1259,6 +1509,27 @@ server <- function(input, output, session){ ) ) + ## get survProb table (with) + output$ttdSurvTab2 <- renderReactable( + makeSurvProbTab(ttdSubset2()) %>% + reactable( + columns = list( + database = colDef(name = "Database Name"), + `Cohort Name` = colDef(name = "Cohort Name"), + strata = colDef(name = "Drug Cohort Name"), + `6 month` = colDef(name = "6 month Survival", format = colFormat(digits = 1, percent = TRUE)), + `1 year` = colDef(name = "1 year Survival", format = colFormat(digits = 1, percent = TRUE)), + `2 year` = colDef(name = "2 year Survival", format = colFormat(digits = 1, percent = TRUE)) + ), + filterable = TRUE, + searchable = TRUE, + outlined = TRUE, + bordered = TRUE, + striped = TRUE, + defaultPageSize = 20 + ) + ) + # Procedure Analysis --------------------------------- @@ -1317,11 +1588,58 @@ server <- function(input, output, session){ ) }) - # make km plot - output$ttiKmPlot <- renderPlot({ - plotKM2(ttiSubset()) + + ## Find rds file + ttiKMRds <- reactive ({ + + if (input$cohortNameTti == "hmb") { + lineKM <- "1" + } else if (input$cohortNameTti == "hmb age_lt_30"){ + lineKM <- "1001" + } else if (input$cohortNameTti == "hmb age_30_45") { + lineKM <- "1002" + } else if (input$cohortNameTti == "hmb age_45_55") { + lineKM <- "1003" + } + + ttd_km_name <- here::here(glue::glue('shiny/data_0603/tti/tti_{input$databaseNameTti}_{lineKM}.rds')) + + ttd_km <- readr::read_rds(ttd_km_name) + + return(ttd_km) }) + + output$ttiKmPlot <- renderPlot( + res = 80, + { + + tti <- ttiKMRds() + + ## Number of colors for lines + colors <- colorspace::rainbow_hcl(unique(length(tti$strata))) + + tti |> + ggsurvfit(size = 1) + + scale_ggsurvfit(x_scales=list(breaks=c(0.5, 0:3))) + # Breaks at 6m and 1-3 years + scale_color_manual(values = colors) + + scale_fill_manual(values = colors) + + add_risktable(risktable_stats = "{n.risk} ({cum.censor})", + risktable_height = 0.4, + size = 4, # increase font size of risk table statistics + theme = # increase font size of risk table title and y-axis label + list( + theme_risktable_default(axis.text.y.size = 11, + plot.title.size = 11), + theme(plot.title = element_text(face = "bold")) + )) + + #add_risktable_strata_symbol(symbol = "\U25CF", size = 18) + + labs(x = "Follow-up time, years") + + }) + + + ## get survProb table output$ttiSurvTab <- renderReactable( makeSurvProbTab2(ttiSubset()) %>% diff --git a/shiny/migration/dataMigration.R b/shiny/migration/dataMigration.R index 82f94ae..82ad8c8 100644 --- a/shiny/migration/dataMigration.R +++ b/shiny/migration/dataMigration.R @@ -7,13 +7,13 @@ library(readr) source("shiny/migration/helpers.R") -appDataPath <- here::here("shiny", "data_old") # Path to place app data +appDataPath <- here::here("shiny", "data_0603") # Path to place app data appDataPath %>% fs::dir_create() # Create new directory -resultsPath <- here::here("results") # Set path to the execution results +resultsPath <- here::here("results_0603") # Set path to the execution results -listOfDatabase <- fs::dir_ls(resultsPath) %>% #list the databases used in the execution +listOfDatabase <- fs::dir_ls(resultsPath) %>% # List the databases used in the execution basename() ### List the execution tasks @@ -24,13 +24,14 @@ listOfTasks <- c("01_buildCohorts", "05_baselineCharacteristics", "06_postIndexPrevalenceConditions", "07_postIndexPrevalenceDrugs", - "08_treatmentHistory", + "07_postIndexPrevalenceDrugs2", "09_treatmentPatterns", + "09_treatmentPatterns2", "10_timeToDiscontinuation", + "10_timeToDiscontinuation2", "11_postIndexPrevalenceProcedures", - "12_timeToIntervention", - "13_treatmentHistory2", - "14_treatmentPatterns") + "12_timeToIntervention" + ) ### Create a data frame of all permutations of paths allPaths <- tidyr::expand_grid(listOfDatabase, listOfTasks) %>% @@ -242,7 +243,6 @@ cts <- bindCsv(allPaths = allPaths, task = listOfTasks[5], # baseline char file = "continuous_baseline.csv") - cts2 <- cts %>% dplyr::left_join( allCohorts, by = c("cohortDefinitionId" ="id"), relationship = "many-to-many" @@ -264,6 +264,7 @@ readr::write_csv(cts2, file = fs::path(appDataPath, "baselineContinuous.csv")) ## 5. Baseline Concepts ---------------- ### Extract drug concepts + drug <- bindCsv(allPaths = allPaths, task = listOfTasks[5], # baseline char file = "drugs_baseline.csv") %>% @@ -275,7 +276,7 @@ drug <- bindCsv(allPaths = allPaths, domain = "Drugs" ) %>% dplyr::select( - databaseId, domain, cohortDefinitionId, cohortName, conceptId, name, n, pct + databaseId, domain, cohortDefinitionId, cohortName, conceptId, name, n, pct, timeWindow ) ### Extract condition concepts @@ -290,7 +291,7 @@ cond <- bindCsv(allPaths = allPaths, domain = "Conditions" ) %>% dplyr::select( - databaseId, domain, cohortDefinitionId, cohortName, conceptId, name, n, pct + databaseId, domain, cohortDefinitionId, cohortName, conceptId, name, n, pct, timeWindow ) ### Extract procedure concepts (exclude THINBE procedures) @@ -308,7 +309,7 @@ proc <- bindCsv(allPaths = procPaths, domain = "Procedures" ) %>% dplyr::select( - databaseId, domain, cohortDefinitionId, cohortName, conceptId, name, n, pct + databaseId, domain, cohortDefinitionId, cohortName, conceptId, name, n, pct, timeWindow ) conceptTab <- dplyr::bind_rows( @@ -330,7 +331,7 @@ cohort365 <- bindCsv(allPaths = allPaths, ) %>% dplyr::mutate( covariateName = name, - timeWindow = "[-365 : 0]" + timeWindow = "-365d to 0d" ) %>% dplyr::select(databaseId, timeWindow, cohortId, cohortName, covariateId, covariateName, count, pct) %>% @@ -404,7 +405,7 @@ icd10 <- bindCsv(allPaths = allPaths, pct = COUNTVALUE / n ) %>% dplyr::select(databaseId, COHORT_ID, cohortName, - CATEGORY_CODE, categoryName, COUNTVALUE, pct) + CATEGORY_CODE, categoryName, COUNTVALUE, pct, timeWindow) readr::write_csv(icd10, file = fs::path(appDataPath, "baselineChapters.csv")) @@ -416,9 +417,11 @@ readr::write_csv(icd10, file = fs::path(appDataPath, "baselineChapters.csv")) piPrevFilesCond <- c("cohort_covariates_1_365.csv", "cohort_covariates_366_730.csv", "cohort_covariates_731_1825.csv") -piPrevTimeFrameCond <- c("1d - 365d", - "366d - 730d", - "731d - 1825d") + +piPrevTimeFrameCond <- c("1d to 365d", + "366d to 730d", + "731d to 1825d") + piPrevCond <- purrr::map2_dfr(piPrevFilesCond, # files to use piPrevTimeFrameCond, # time frame column to add ~bindCsv( # bind csv @@ -429,6 +432,7 @@ piPrevCond <- purrr::map2_dfr(piPrevFilesCond, # files to use timeWindow = .y )) %>% dplyr::mutate( + cat = "-", type = "conditions" ) %>% dplyr::select(-covariateName) %>% @@ -447,22 +451,30 @@ piPrevCond <- purrr::map2_dfr(piPrevFilesCond, # files to use ### B. Post Index Drugs ---------------- piPrevFilesDrugs <- c( + "cohort_covariates_183_1.csv", + "cohort_covariates_365_1.csv", + "cohort_covariates_730_1.csv", "cohort_covariates_1_183.csv", - "cohort_covariates_184_365.csv", "cohort_covariates_1_365.csv", + "cohort_covariates_1_730.csv", + "cohort_covariates_184_365.csv", "cohort_covariates_366_730.csv", "cohort_covariates_731_1825.csv" ) piPrevTimeFrameDrugs <- c( - "1d - 183d", - "184d - 365d", - "1d - 365d", - "366d - 730d", - "731d - 1825d" + "-183d to -1d", + "-365d to -1d", + "-730d to -1d", + "1d to 183d", + "1d to 365d", + "1d to 730d", + "184d to 365d", + "366d to 730d", + "731d to 1825d" ) -piPrevDrugs <- purrr::map2_dfr(piPrevFilesDrugs, # files to use +piPrevDrugsWithin <- purrr::map2_dfr(piPrevFilesDrugs, # files to use piPrevTimeFrameDrugs, # time frame column to add ~bindCsv( # bind csv allPaths = allPaths, @@ -471,10 +483,28 @@ piPrevDrugs <- purrr::map2_dfr(piPrevFilesDrugs, # files to use dplyr::mutate( # add timeWindow Column timeWindow = .y )) %>% + dplyr::rename(cat = type) %>% + dplyr::mutate( + type = "drugs" + ) + + +piPrevDrugsFollowUp <- purrr::map2_dfr(piPrevFilesDrugs, # files to use + piPrevTimeFrameDrugs, # time frame column to add + ~bindCsv( # bind csv + allPaths = allPaths, + task = listOfTasks[8], # postindex Drugs + file = .x) %>% + dplyr::mutate( # add timeWindow Column + timeWindow = .y + )) %>% + dplyr::rename(cat = type) %>% dplyr::mutate( type = "drugs" ) +piPrevDrugs <- dplyr::bind_rows(piPrevDrugsWithin, piPrevDrugsFollowUp) + ### C. Post Index Procedures ---------------- # skip THIN no procedures @@ -490,12 +520,13 @@ piPrevProc <- purrr::map2_dfr(piPrevFilesProc, # files to use piPrevProcCohorts, # cohorts ~bindCsv( # bind csv allPaths = procPaths, # skip THIN no procedures - task = listOfTasks[11], # postindex Proc + task = listOfTasks[13], # postindex Proc file = .x) %>% dplyr::mutate( cohortId = .y )) %>% dplyr::mutate( + cat = "-", cohortName = dplyr::case_when( cohortId == 1 ~ "hmb", cohortId == 1001L ~ "hmb_age_lt_30", @@ -525,7 +556,7 @@ piPrevProc <- purrr::map2_dfr(piPrevFilesProc, # files to use type = "procedures" ) %>% dplyr::select(databaseId, cohortId, cohortName, covariateId, covariateName, - count, pct, timeWindow, type) + count, pct, timeWindow, type, cat) postIndexPrev <- dplyr::bind_rows( piPrevCond, piPrevDrugs, piPrevProc @@ -565,10 +596,41 @@ inic2 <- inic %>% ## 10. Treatment Patterns ---------------- -### A. HMB normal ---------------- +### A. HMB ---------------- +#### All ---------------- +txPath <- allPaths %>% + dplyr::filter(listOfTasks == listOfTasks[9]) %>% + dplyr::mutate(fullPath = fs::path(fullPath, "/all")) + +### Get treatment patterns table +txPathDat <- purrr::pmap_dfr( + txPath, + ~bindTxPathTab(path = ..3, database = ..1) +) %>% + tidyr::separate_wider_delim( + cols = cohortName, + delim = "_", + names = c("type", "cohortId") + ) %>% + dplyr::mutate( + cohortName = dplyr::case_when( + cohortId == 1 ~ "hmb", + cohortId == 1001L ~ "hmb_age_lt_30", + cohortId == 1002L ~ "hmb_age_30_45", + cohortId == 1003L ~ "hmb_age_45_55" + ) + ) %>% + dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>% + dplyr::mutate(time = "All time", + type = "Without NSAIDS") %>% + dplyr::arrange(databaseId, cohortId, desc(n)) + +readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatternsAll.csv")) +#### 6m ---------------- txPath <- allPaths %>% - dplyr::filter(listOfTasks == listOfTasks[9]) + dplyr::filter(listOfTasks == listOfTasks[9]) %>% + dplyr::mutate(fullPath = fs::path(fullPath, "/6m")) ### Get treatment patterns table txPathDat <- purrr::pmap_dfr( @@ -589,18 +651,50 @@ txPathDat <- purrr::pmap_dfr( ) ) %>% dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>% + dplyr::mutate(time = "6 months", + type = "Without NSAIDS") %>% dplyr::arrange(databaseId, cohortId, desc(n)) -readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns.csv")) +readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns6m.csv")) + +#### 1y ---------------- +txPath <- allPaths %>% + dplyr::filter(listOfTasks == listOfTasks[9]) %>% + dplyr::mutate(fullPath = fs::path(fullPath, "/1y")) + +### Get treatment patterns table +txPathDat <- purrr::pmap_dfr( + txPath, + ~bindTxPathTab(path = ..3, database = ..1) +) %>% + tidyr::separate_wider_delim( + cols = cohortName, + delim = "_", + names = c("type", "cohortId") + ) %>% + dplyr::mutate( + cohortName = dplyr::case_when( + cohortId == 1 ~ "hmb", + cohortId == 1001L ~ "hmb_age_lt_30", + cohortId == 1002L ~ "hmb_age_30_45", + cohortId == 1003L ~ "hmb_age_45_55" + ) + ) %>% + dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>% + dplyr::mutate(time = "1 year", + type = "Without NSAIDS") %>% + dplyr::arrange(databaseId, cohortId, desc(n)) +readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns1y.csv")) -### B. HMB 2 ---------------- -txPath2 <- allPaths %>% - dplyr::filter(listOfTasks == listOfTasks[14]) +#### 2y ---------------- +txPath <- allPaths %>% + dplyr::filter(listOfTasks == listOfTasks[9]) %>% + dplyr::mutate(fullPath = fs::path(fullPath, "/2y")) ### Get treatment patterns table -txPathDat2 <- purrr::pmap_dfr( - txPath2, +txPathDat <- purrr::pmap_dfr( + txPath, ~bindTxPathTab(path = ..3, database = ..1) ) %>% tidyr::separate_wider_delim( @@ -610,30 +704,145 @@ txPathDat2 <- purrr::pmap_dfr( ) %>% dplyr::mutate( cohortName = dplyr::case_when( - cohortId == 44 ~ "hmb2", - cohortId == 44001L ~ "hmb2_age_lt_30", - cohortId == 44002L ~ "hmb2_age_30_45", - cohortId == 44003L ~ "hmb2_age_45_55" + cohortId == 1 ~ "hmb", + cohortId == 1001L ~ "hmb_age_lt_30", + cohortId == 1002L ~ "hmb_age_30_45", + cohortId == 1003L ~ "hmb_age_45_55" ) ) %>% dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>% + dplyr::mutate(time = "2 years", + type = "Without NSAIDS") %>% dplyr::arrange(databaseId, cohortId, desc(n)) -readr::write_csv(txPathDat2, file = fs::path(appDataPath, "treatmentPatterns2.csv")) +readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns2y.csv")) + +### A. HMB (with NSAIDs) ---------------- +#### All ---------------- +txPath <- allPaths %>% + dplyr::filter(listOfTasks == listOfTasks[10]) %>% + dplyr::mutate(fullPath = fs::path(fullPath, "/all")) + +### Get treatment patterns table +txPathDat <- purrr::pmap_dfr( + txPath, + ~bindTxPathTab(path = ..3, database = ..1) +) %>% + tidyr::separate_wider_delim( + cols = cohortName, + delim = "_", + names = c("type", "cohortId") + ) %>% + dplyr::mutate( + cohortName = dplyr::case_when( + cohortId == 1 ~ "hmb", + cohortId == 1001L ~ "hmb_age_lt_30", + cohortId == 1002L ~ "hmb_age_30_45", + cohortId == 1003L ~ "hmb_age_45_55" + ) + ) %>% + dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>% + dplyr::mutate(time = "All time", + type = "With NSAIDS") %>% + dplyr::arrange(databaseId, cohortId, desc(n)) -## get sankey diagram -# sankey <- purrr::pmap( -# txPath, -# ~groupSankey(path = ..3, database = ..1) -# ) -# names(sankey) <- listOfDatabase -# readr::write_rds(sankey, file = fs::path(appDataPath, "sankey.rds")) +readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatternsAllNsaids.csv")) + +#### 6m ---------------- +txPath <- allPaths %>% + dplyr::filter(listOfTasks == listOfTasks[10]) %>% + dplyr::mutate(fullPath = fs::path(fullPath, "/6m")) + +### Get treatment patterns table +txPathDat <- purrr::pmap_dfr( + txPath, + ~bindTxPathTab(path = ..3, database = ..1) +) %>% + tidyr::separate_wider_delim( + cols = cohortName, + delim = "_", + names = c("type", "cohortId") + ) %>% + dplyr::mutate( + cohortName = dplyr::case_when( + cohortId == 1 ~ "hmb", + cohortId == 1001L ~ "hmb_age_lt_30", + cohortId == 1002L ~ "hmb_age_30_45", + cohortId == 1003L ~ "hmb_age_45_55" + ) + ) %>% + dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>% + dplyr::mutate(time = "6 months", + type = "With NSAIDS") %>% + dplyr::arrange(databaseId, cohortId, desc(n)) + +readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns6mNsaids.csv")) + +#### 1y ---------------- +txPath <- allPaths %>% + dplyr::filter(listOfTasks == listOfTasks[10]) %>% + dplyr::mutate(fullPath = fs::path(fullPath, "/1y")) + +### Get treatment patterns table +txPathDat <- purrr::pmap_dfr( + txPath, + ~bindTxPathTab(path = ..3, database = ..1) +) %>% + tidyr::separate_wider_delim( + cols = cohortName, + delim = "_", + names = c("type", "cohortId") + ) %>% + dplyr::mutate( + cohortName = dplyr::case_when( + cohortId == 1 ~ "hmb", + cohortId == 1001L ~ "hmb_age_lt_30", + cohortId == 1002L ~ "hmb_age_30_45", + cohortId == 1003L ~ "hmb_age_45_55" + ) + ) %>% + dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>% + dplyr::mutate(time = "1 year", + type = "With NSAIDS") %>% + dplyr::arrange(databaseId, cohortId, desc(n)) + +readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns1yNsaids.csv")) + +#### 2y ---------------- +txPath <- allPaths %>% + dplyr::filter(listOfTasks == listOfTasks[10]) %>% + dplyr::mutate(fullPath = fs::path(fullPath, "/2y")) + +### Get treatment patterns table +txPathDat <- purrr::pmap_dfr( + txPath, + ~bindTxPathTab(path = ..3, database = ..1) +) %>% + tidyr::separate_wider_delim( + cols = cohortName, + delim = "_", + names = c("type", "cohortId") + ) %>% + dplyr::mutate( + cohortName = dplyr::case_when( + cohortId == 1 ~ "hmb", + cohortId == 1001L ~ "hmb_age_lt_30", + cohortId == 1002L ~ "hmb_age_30_45", + cohortId == 1003L ~ "hmb_age_45_55" + ) + ) %>% + dplyr::select(databaseId, cohortId, cohortName, event_cohort_name1:event_cohort_name5, End, n) %>% + dplyr::mutate(time = "2 years", + type = "With NSAIDS") %>% + dplyr::arrange(databaseId, cohortId, desc(n)) + +readr::write_csv(txPathDat, file = fs::path(appDataPath, "treatmentPatterns2yNsaids.csv")) ## 11. Time to event ---------------- -### A. Time to discontinuation ---------------- +### A.1 TTD without NSAIDs (Tables) ---------------- ### List files to extract ttdFiles <- c("tte_1.csv", @@ -647,12 +856,13 @@ permutations <- tidyr::expand_grid( ) ### Bind all in ttd +#debug(bindTteData) ttd <- purrr::pmap_dfr( permutations, ~bindTteData( path = resultsPath, database = ..2, - task = listOfTasks[10], + task = listOfTasks[11], ## Change number file = ..1 ) ) @@ -662,8 +872,88 @@ arrow::write_parquet( sink = fs::path(appDataPath, "ttd.parquet") ) +### A.2 TTD with NSAIDs (Tables) ---------------- -### B. Time to intervention ---------------- +### List files to extract +ttdFiles <- c("tte_1.csv", + "tte_1001.csv", + "tte_1002.csv", + "tte_1003.csv") + +permutations <- tidyr::expand_grid( + ttdFiles, + listOfDatabase +) + +### Bind all in ttd +#debug(bindTteData) +ttd <- purrr::pmap_dfr( + permutations, + ~bindTteData( + path = resultsPath, + database = ..2, + task = listOfTasks[12], + file = ..1 + ) +) + +arrow::write_parquet( + x = ttd, + sink = fs::path(appDataPath, "ttd2.parquet") +) + +### B.1 TTD without NSAIDs (survfit) ---------------- + +## Create output folder +outputPath <- here::here(appDataPath, "ttd/wo") %>% + fs::dir_create() + +outputPath <- here::here(appDataPath, "ttd/wo") + +for (i in 1:length(listOfDatabase)) { + + db <- listOfDatabase[i] + + path <- allPaths %>% + dplyr::filter(listOfTasks == listOfTasks[11] & listOfDatabase == db) %>% + dplyr::pull(fullPath) + + listOfFiles <- list.files(path, full.names = FALSE, pattern = ".rds", recursive = TRUE) + + inputPath <- here::here("results_0603", db, listOfTasks[11]) + + fs::file_copy(here::here(inputPath, listOfFiles), + here::here(outputPath, listOfFiles), overwrite = TRUE) + +} + +### B.2 TTD with NSAIDs (survfit) ---------------- + +## Create output folder +outputPath <- here::here(appDataPath, "ttd/with") %>% + fs::dir_create() + +outputPath <- here::here(appDataPath, "ttd/with") + +for (i in 1:length(listOfDatabase)) { + + db <- listOfDatabase[i] + + path <- allPaths %>% + dplyr::filter(listOfTasks == listOfTasks[12] & listOfDatabase == db) %>% + dplyr::pull(fullPath) + + listOfFiles <- list.files(path, full.names = FALSE, pattern = ".rds", recursive = TRUE) + + inputPath <- here::here("results_0603", db, listOfTasks[12]) + + fs::file_copy(here::here(inputPath, listOfFiles), + here::here(outputPath, listOfFiles), overwrite = TRUE) + +} + + +### C.1 Time to intervention (Tables) ---------------- ttiFiles <- c("procedure_survival_1.csv", "procedure_survival_1001.csv", @@ -681,7 +971,7 @@ tti <- purrr::pmap_dfr( ~bindTteData2( path = resultsPath, database = ..2, - task = listOfTasks[12], + task = listOfTasks[14], file = ..1 ) ) @@ -691,3 +981,29 @@ arrow::write_parquet( sink = fs::path(appDataPath, "tti.parquet") ) + +### C.2 Time to intervention (survfit) ---------------- + +## Create output folder +outputPath <- here::here(appDataPath, "tti") %>% + fs::dir_create() + +outputPath <- here::here(appDataPath, "tti") + +for (i in 1:length(listOfDatabase)) { + + db <- listOfDatabase[i] + + path <- allPaths %>% + dplyr::filter(listOfTasks == listOfTasks[14] & listOfDatabase == db) %>% + dplyr::pull(fullPath) + + listOfFiles <- list.files(path, full.names = FALSE, pattern = ".rds", recursive = TRUE) + + inputPath <- here::here("results_0603", db, listOfTasks[14]) + + fs::file_copy(here::here(inputPath, listOfFiles), + here::here(outputPath, listOfFiles), overwrite = TRUE) + +} + diff --git a/shiny/migration/helpers.R b/shiny/migration/helpers.R index 78eeca9..f0965e1 100644 --- a/shiny/migration/helpers.R +++ b/shiny/migration/helpers.R @@ -155,46 +155,48 @@ bindTteData <- function(path, tools::file_path_sans_ext() #read in data - tteData <- readr::read_csv(file = pathToFile, show_col_types = FALSE) %>% - dplyr::filter( - time <= 3 - ) - - #remove singleLine Strata - singleLineStrata <- tteData %>% - dplyr::filter( - !grepl("\\+", strata) - ) %>% - dplyr::pull(strata) %>% - unique() - - #get top 4 multi lines - top4MultiLineStrata <-tteData %>% - dplyr::filter( - grepl("\\+", strata) - ) %>% - count(strata) %>% - dplyr::arrange(desc(n)) %>% - dplyr::slice(1:4) %>% - dplyr::pull(strata) %>% - unique() - - # combine specified strata lines - strataLines <- c(singleLineStrata, top4MultiLineStrata) + tteData <- readr::read_csv(file = pathToFile, show_col_types = FALSE) + + # %>% + # dplyr::filter( + # time <= 3 + # ) + # + # #remove singleLine Strata + # singleLineStrata <- tteData %>% + # dplyr::filter( + # !grepl("\\+", strata) + # ) %>% + # dplyr::pull(strata) %>% + # unique() + # + # #get top 4 multi lines + # top4MultiLineStrata <-tteData %>% + # dplyr::filter( + # grepl("\\+", strata) + # ) %>% + # count(strata) %>% + # dplyr::arrange(desc(n)) %>% + # dplyr::slice(1:4) %>% + # dplyr::pull(strata) %>% + # unique() + # + # # combine specified strata lines + # strataLines <- c(singleLineStrata, top4MultiLineStrata) # subset tted Data to the specified strata lines subsetTteData <- tteData %>% dplyr::mutate(strata = as.character(strata)) %>% - dplyr::filter( - strata %in% strataLines - ) %>% + # dplyr::filter( + # strata %in% strataLines + # ) %>% dplyr::mutate( database = !!database, targetId = !!targetId ) %>% dplyr::select( - database, targetId, strata, time, n.risk, n.event, estimate, std.error + database, targetId, strata, time, n.risk, n.event, estimate, std.error, line ) return(subsetTteData) @@ -215,10 +217,11 @@ bindTteData2 <- function(path, tools::file_path_sans_ext() #read in data - tteData <- readr::read_csv(file = pathToFile, show_col_types = FALSE) %>% - dplyr::filter( - time <= 3 - ) + tteData <- readr::read_csv(file = pathToFile, show_col_types = FALSE) + # %>% + # dplyr::filter( + # time <= 3 + # ) # subset tted Data to the specified strata lines updateTteData <- tteData %>%