From 0b969e6566939649d71b1cfa2ecad1920403aedf Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Tue, 9 Jan 2024 23:19:10 -0500
Subject: [PATCH 01/16] adding to summary

---
 modules/Data_Cleaning/Data_Cleaning.Rmd | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
index e9a484e6b..2475125e9 100644
--- a/modules/Data_Cleaning/Data_Cleaning.Rmd
+++ b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -364,7 +364,8 @@ Pay attention to your data and your `NA` values!
 ## Summary
 
 -   `is.na()`,`any(is.na())`, `count()`, and functions from `naniar`
-    like `gg_miss_var()` can help determine if we have `NA` values
+    like `gg_miss_var()` and `miss_var_summary` can help determine if we have `NA` values
+-   `miss_var_which` can help you drop columns that have any missing values.
 -   `filter()` automatically removes `NA` values - can't confirm or deny
     if condition is met (need `| is.na()` to keep them)
 -   `drop_na()` can help you remove `NA` values from a variable or an

From 96af35fbb0ebaf957c545ffb2f224ea51419c412 Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Tue, 9 Jan 2024 23:22:05 -0500
Subject: [PATCH 02/16] making one of part 1 questions a bonus

---
 modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd     | 4 +++-
 modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
index d257a54aa..921954eec 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
@@ -53,7 +53,9 @@ Hint: You first need to `pull` out the vector version of this variable to use th
 ```
 
 
-4. What percentage of the `subType` variable is complete of `bike` ? Hint: use another `naniar` function.
+**Bonus / Extra practice**
+
+What percentage of the `subType` variable is complete of `bike` ? Hint: use another `naniar` function.
  
 ```{r}
 
diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
index 4e344da72..6bd5bbe05 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
@@ -53,7 +53,9 @@ gg_miss_var(bike)
 ```
 
 
-4. What percentage of the `subType` variable is complete of `bike` ? Hint: use another `naniar` function.
+**Bonus / Extra practice**
+
+What percentage of the `subType` variable is complete of `bike` ? Hint: use another `naniar` function.
  
 ```{r}
 pull(bike, subType) %>% pct_complete() # this

From 2189e265f9014cdb4055a03b0f947557bed58b2e Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Tue, 9 Jan 2024 23:31:48 -0500
Subject: [PATCH 03/16] no more gender

---
 .../Data_Cleaning/lab/Data_Cleaning_Lab.Rmd   | 14 ++++----
 .../lab/Data_Cleaning_Lab_Key.Rmd             | 33 ++++++++++---------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
index 921954eec..57f35db42 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
@@ -70,11 +70,11 @@ Let's say we the data like so:
 
 ```{r}
 BloodType <- tibble(
-  gender =
+  weight_loss =
     c(
-      "M", "Male", "Female", "F", "M",
-      "Male", "Other", "M", "F", "Other",
-      "F", "Male", NA, "Male", "Female"
+      "Y", "No", "Yes", "y", "no",
+      "n", "No", "N", "yes", "Yes",
+      "No", "N", NA, "N", "Other"
     ),
   type = c(
     "A.-", "AB.+", "O.-", "O.+", "AB.-",
@@ -93,13 +93,13 @@ BloodType
 
 There are some issues with this data that we need to figure out!
 
-1. Determine how many `NA` values there are for `gender`.
+1. Determine how many `NA` values there are for `weight_loss` (assume you know that`N` and `n` is for no).
 .
 ```{r}
 
 ```
 
-2. Recode the `gender` variable of the `BloodType` data so that it is consistent. Use `case_when()`. Don't forget to use quotes!
+2. Recode the `weight_loss` variable of the `BloodType` data so that it is consistent. Use `case_when()`. Don't forget to use quotes!
 
 ```{r}
 
@@ -108,7 +108,7 @@ There are some issues with this data that we need to figure out!
 
 ```
 
-Check to see how many values `gender` has for each category (hint: use `count`). It's good practice to regularly check your data throughout the data wrangling process.
+Check to see how many values `weight_loss` has for each category (hint: use `count`). It's good practice to regularly check your data throughout the data wrangling process.
 
 3.
 ```{r}
diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
index 6bd5bbe05..516c6682a 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
@@ -71,11 +71,11 @@ Let's say we the data like so:
 
 ```{r}
 BloodType <- tibble(
-  gender =
+  weight_loss =
     c(
-      "M", "Male", "Female", "F", "M",
-      "Male", "Other", "M", "F", "Other",
-      "F", "Male", NA, "Male", "Female"
+      "Y", "No", "Yes", "y", "no",
+      "n", "No", "N", "yes", "Yes",
+      "No", "N", NA, "N", "Other"
     ),
   type = c(
     "A.-", "AB.+", "O.-", "O.+", "AB.-",
@@ -94,39 +94,40 @@ BloodType
 
 There are some issues with this data that we need to figure out!
 
-1. Determine how many `NA` values there are for `gender`. 
+1. Determine how many `NA` values there are for `weight_loss` (assume you know that`N` and `n` is for no).
 
 ```{r}
-count(BloodType, gender) # the simple way
-sum(is.na(pull(BloodType, gender))) # another way
+count(BloodType, weight_loss) # the simple way
+sum(is.na(pull(BloodType, weight_loss))) # another way
 BloodType %>% # another way
-  pull(gender) %>%
+  pull(weight_loss) %>%
   is.na() %>%
   sum()
 ```
 
-2. Recode the `gender` variable of the `BloodType` data so that it is consistent. Use `case_when()`. Don't forget to use quotes!
+2. Recode the `weight_loss` variable of the `BloodType` data so that it is consistent. Use `case_when()`. Keep "Other" as "Other". Don't forget to use quotes!
 
 ```{r}
 
 BloodType <- BloodType %>%
-  mutate(gender = case_when(
-    gender %in% c("M", "m", "Male") ~ "Male",
-    gender %in% c("F", "female", "Female") ~ "Female",
-    gender %in% c("Other") ~ "Other"
+  mutate(weight_loss = case_when(
+    weight_loss %in% c("N", "n", "No", "no") ~ "No",
+    weight_loss %in% c("Y", "y", "Yes", "yes") ~ "Yes",
+    weight_loss %in% c("Other") ~ "Other"
   ))
 
-count(BloodType, gender)
+count(BloodType, weight_loss)
 ```
 
-Check to see how many values `gender` has for each category (hint: use `count`). It's good practice to regularly check your data throughout the data wrangling process.
+Check to see how many values `weight_loss` has for each category (hint: use `count`). It's good practice to regularly check your data throughout the data wrangling process.
 
 3.
 ```{r}
-BloodType %>% count(gender)
+BloodType %>% count(wight_loss)
 ```
 
 4. Recode the `type` variable of the `BloodType` data to be consistent. Use `recode`. Hint: the inconsistency has to do with lower case `o` and capital `O`. Don't forget to use quotes!
+
 ```{r}
 BloodType <- BloodType %>%
   mutate(type = recode(type,

From 1c854b1412da37e11e27506c1aee13a32baa0e60 Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Tue, 9 Jan 2024 23:33:03 -0500
Subject: [PATCH 04/16] remove word filter from lab

---
 modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd     | 2 +-
 modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
index 57f35db42..c616de34f 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
@@ -39,7 +39,7 @@ Hint: You first need to `pull` out the vector version of this variable to use th
 
 
 
-2.  Filter rows of bike, so that only rows remain that do NOT have missing values for the `route` variable,  using `drop_na`. Assign this to the object `have_route.`
+2.  Clean rows of bike, so that only rows remain that do NOT have missing values for the `route` variable,  using `drop_na`. Assign this to the object `have_route.`
 
 ```{r}
 
diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
index 516c6682a..67b11dae3 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
@@ -40,7 +40,7 @@ bike %>%
 
 
 
-2.  Filter rows of bike, so that only rows remain that do NOT have missing values for the `route` variable,  using `drop_na`. Assign this to the object `have_route.`
+2.  Clean rows of bike, so that only rows remain that do NOT have missing values for the `route` variable,  using `drop_na`. Assign this to the object `have_route.`
 
 ```{r}
 have_rout <- bike %>% drop_na(route)

From 9c1d0744479ea3bd5d7478020ea68c76e8cd6477 Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Tue, 9 Jan 2024 23:38:48 -0500
Subject: [PATCH 05/16] fixing typo

---
 modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
index 67b11dae3..9d3aac516 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
@@ -123,7 +123,7 @@ Check to see how many values `weight_loss` has for each category (hint: use `cou
 
 3.
 ```{r}
-BloodType %>% count(wight_loss)
+BloodType %>% count(weight_loss)
 ```
 
 4. Recode the `type` variable of the `BloodType` data to be consistent. Use `recode`. Hint: the inconsistency has to do with lower case `o` and capital `O`. Don't forget to use quotes!

From 382c94516a890facead98c26ec7abbe5a45f92da Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 00:02:41 -0500
Subject: [PATCH 06/16] treatment instead of gender

---
 modules/Data_Cleaning/Data_Cleaning.Rmd | 147 +++++++++++++-----------
 1 file changed, 77 insertions(+), 70 deletions(-)

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
index 2475125e9..f31e45839 100644
--- a/modules/Data_Cleaning/Data_Cleaning.Rmd
+++ b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -388,17 +388,17 @@ Pay attention to your data and your `NA` values!
 set.seed(124)
 data_diet <- tibble(Diet = rep(c("A", "B", "B"),
                                times = 4), 
-                    Gender = c("Male", 
-                               "m",
+                    Treatment = c("Ginger", 
+                               "g",
                                "Other",
-                               "F", 
-                               "Female",
-                               "M", 
-                               "f",
+                               "peppermint", 
+                               "Peppermint",
+                               "G", 
+                               "Mint",
                                "O", 
-                               "Man",
-                               "f",
-                               "F",
+                               "ginger",
+                               "mint",
+                               "Mint",
                                "O"), 
                     Weight_start = sample(100:250, size = 12),
                     Weight_change = sample(-10:20, size = 12))
@@ -418,12 +418,12 @@ This needs lots of recoding.
 
 ```{r}
 data_diet %>%
-  count(Gender)
+  count(Treatment)
 ```
 
 ## `dplyr` can help!{.codesmall}
 
-Using Excel to find all of the different ways `gender` has been coded,
+Using Excel to find all of the different ways `Treatment` has been coded,
 could be hectic! In `dplyr` you can use the `recode` function.
 
 ::: {style="color: red;"}
@@ -448,13 +448,14 @@ Need quotes for new values! Tolerates quotes for old values.
 ```{r, eval = FALSE}
 
 data_diet %>% 
-  mutate(Gender = recode(Gender, M = "Male", 
-                                 m = "Male", 
-                               Man = "Male",
-                                 O = "Other",
-                                 f = "Female",
-                                 F = "Female")) %>%
-  count(Gender, Diet)
+  mutate(Treatment = recode(Treatment, G = "Ginger", 
+                                       g = "Ginger", 
+                                  ginger = "Ginger",
+                                       O = "Other",
+                                    Mint = "Peppermint",
+                                    mint = "Peppermint",
+                              peppermint = "Peppermint")) %>%
+  count(Treatment, Diet)
 ```
 
 ## `recode()`
@@ -462,13 +463,14 @@ data_diet %>%
 ```{r, eval = TRUE}
 
 data_diet %>% 
-  mutate(Gender = recode(Gender, M = "Male", 
-                                 m = "Male", 
-                               Man = "Male",
-                                 O = "Other",
-                                 f = "Female",
-                                 F = "Female")) %>%
-  count(Gender)
+  mutate(Treatment = recode(Treatment, G = "Ginger", 
+                                       g = "Ginger", 
+                                  ginger = "Ginger",
+                                       O = "Other",
+                                    Mint = "Peppermint",
+                                    mint = "Peppermint",
+                              peppermint = "Peppermint")) %>%
+  count(Treatment, Diet)
 ```
 
 
@@ -490,26 +492,28 @@ Need quotes for conditions and new values!
 
 ```{r}
 data_diet %>% 
-  mutate(Gender = case_when(Gender == "M" ~ "Male", 
-                            Gender == "m" ~ "Male", 
-                            Gender == "Man" ~ "Male",
-                            Gender == "O" ~ "Other",
-                            Gender == "f" ~ "Female",
-                            Gender == "F" ~ "Female"))  %>% 
-  count(Gender)
+  mutate(Treatment = case_when(Treatment == "G" ~ "Ginger", 
+                               Treatment == "g" ~ "Ginger", 
+                               Treatment == "ginger" ~ "Ginger",
+                               Treatment == "O" ~ "Other",
+                               Treatment == "Mint" ~ "Peppermint",
+                               Treatment == "mint" ~ "Peppermint",
+                               Treatment == "peppermint" ~ "Peppermint"))  %>% 
+  count(Treatment)
 
 ```
 
 ## What happened?{.codesmall}
 
-```{r}
+```{r, eval = FALSE}
 data_diet %>% 
-  mutate(Gender = case_when(Gender == "M" ~ "Male", 
-                            Gender == "m" ~ "Male", 
-                            Gender == "Man" ~ "Male",
-                            Gender == "O" ~ "Other",
-                            Gender == "f" ~ "Female",
-                            Gender == "F" ~ "Female"))
+  mutate(Treatment = case_when(Treatment == "G" ~ "Ginger", 
+                               Treatment == "g" ~ "Ginger", 
+                               Treatment == "ginger" ~ "Ginger",
+                               Treatment == "O" ~ "Other",
+                               Treatment == "Mint" ~ "Peppermint",
+                               Treatment == "mint" ~ "Peppermint",
+                               Treatment == "peppermint" ~ "Peppermint")) 
 ```
 
 ## Original data
@@ -540,14 +544,15 @@ or it can be the original values of the column
 
 ```{r}
 data_diet %>% 
-  mutate(Gender = case_when(Gender == "M" ~ "Male", 
-                            Gender == "m" ~ "Male", 
-                            Gender == "Man" ~ "Male",
-                            Gender == "O" ~ "Other",
-                            Gender == "f" ~ "Female",
-                            Gender == "F" ~ "Female",
-                            TRUE ~ Gender))  %>% 
-  count(Gender)
+  mutate(Treatment = case_when(Treatment == "G" ~ "Ginger", 
+                               Treatment == "g" ~ "Ginger", 
+                               Treatment == "ginger" ~ "Ginger",
+                               Treatment == "O" ~ "Other",
+                               Treatment == "Mint" ~ "Peppermint",
+                               Treatment == "mint" ~ "Peppermint",
+                               Treatment == "peppermint" ~ "Peppermint",
+                                TRUE ~ Treatment)) %>%
+  count(Treatment)
 ```
 
 
@@ -555,10 +560,12 @@ data_diet %>%
 
 ```{r}
 data_diet %>% 
-  mutate(Gender = case_when(
-    Gender %in% c("M", "male", "Man", "m", "Male") ~ "Male",
-    Gender %in% c("F", "Female", "f", "female") ~ "Female",
-    Gender %in% c("O", "Other") ~ "Other"))
+  mutate(Treatment = case_when(
+    Treatment %in% c("G", "g", "Ginger", "ginger") ~ "Ginger",
+    Treatment %in% c("Mint", "mint", "Peppermint", "peppermint") ~ "Peppermint",
+    Treatment %in% c("O", "Other") ~ "Other")) %>%
+
+  count(Treatment)
 
 ```
 
@@ -682,13 +689,13 @@ str_sub(string = Effect, start = 1, end = 3)
 ```{r}
 head(data_diet, n = 4)
 data_diet %>% 
-  filter(str_detect(string = Gender,
-                    pattern = "M"))
+  filter(str_detect(string = Treatment,
+                    pattern = "int"))
 ```
 
 ## OK back to our original problem
 ```{r}
-count(data_diet, Gender)
+count(data_diet, Treatment)
 ```
 
 ## Recode was nice but what if miss something?
@@ -696,13 +703,13 @@ count(data_diet, Gender)
 ```{r, eval = FALSE}
 
 data_diet %>% 
-  mutate(Gender = recode(Gender, M = "Male", 
-                                 m = "Male", 
-                               Man = "Male",
-                                 O = "Other",
-                                 f = "Female",
-                                 F = "Female")) %>%
-  count(Gender, Diet)
+  mutate(Treatment = recode(Treatment, G = "Ginger", 
+                                       g = "Ginger", 
+                                  ginger = "Ginger",
+                                       O = "Other",
+                                    Mint = "Peppermint",
+                                    mint = "Peppermint",
+                              peppermint = "Peppermint"))
 ```
 
 ## `case_when()` was an improvement
@@ -711,10 +718,10 @@ But we still might miss a strange value
 
 ```{r, eval = FALSE}
 data_diet %>% 
-  mutate(Gender = case_when(
-    Gender %in% c("M", "male", "Man", "m", "Male") ~ "Male",
-    Gender %in% c("F", "Female", "f", "female") ~ "Female",
-    Gender %in% c("O", "Other") ~ "Other"))
+  mutate(Treatment = case_when(
+    Treatment %in% c("G", "g", "Ginger", "ginger") ~ "Ginger",
+    Treatment %in% c("Mint", "mint", "Peppermint", "peppermint") ~ "Peppermint",
+    Treatment %in% c("O", "Other") ~ "Other"))
 ```
 
 ## `case_when()` improved with `stringr`
@@ -723,11 +730,11 @@ data_diet %>%
 
 ```{r}
 data_diet %>% 
-  mutate(Gender = case_when(
-    str_detect(string = Gender, pattern = "^m|^M") ~ "Male",
-    str_detect(string = Gender, pattern = "^f|^F") ~ "Female",
-    str_detect(string = Gender, pattern = "^o|^O") ~ "Other")) %>%
-  count(Gender)
+  mutate(Treatment = case_when(
+    str_detect(string = Treatment, pattern = "^g|^G") ~ "Ginger",
+    str_detect(string = Treatment, pattern = "int") ~ "Peppermint",
+    str_detect(string = Treatment, pattern = "^o|^O") ~ "Other")) %>%
+  count(Treatment)
 ```
 
 This is a more robust solution! It will catch typos as long as first letter is correct.

From fd0c7095bc3349bece4a65b6080c4c0aeca7f803 Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 00:22:14 -0500
Subject: [PATCH 07/16] adding all to summary

---
 modules/Data_Cleaning/Data_Cleaning.Rmd | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
index f31e45839..6cf344bf6 100644
--- a/modules/Data_Cleaning/Data_Cleaning.Rmd
+++ b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -363,8 +363,7 @@ Pay attention to your data and your `NA` values!
 
 ## Summary
 
--   `is.na()`,`any(is.na())`, `count()`, and functions from `naniar`
-    like `gg_miss_var()` and `miss_var_summary` can help determine if we have `NA` values
+-   `is.na()`,`any(is.na())`, `all(is.na())`,`count()`, and functions from `naniar` like `gg_miss_var()` and `miss_var_summary` can help determine if we have `NA` values
 -   `miss_var_which` can help you drop columns that have any missing values.
 -   `filter()` automatically removes `NA` values - can't confirm or deny
     if condition is met (need `| is.na()` to keep them)

From 41f44bd36cad1116e46af6c21b75a68a53200266 Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 00:58:12 -0500
Subject: [PATCH 08/16] adding some emo package stuff

---
 modules/Data_Cleaning/Data_Cleaning.Rmd | 32 ++++++++-----------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
index 6cf344bf6..dce470929 100644
--- a/modules/Data_Cleaning/Data_Cleaning.Rmd
+++ b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -4,19 +4,19 @@ output:
   ioslides_presentation:
     css: ../../docs/styles.css
     widescreen: yes
-editor_options: 
-  markdown: 
-    wrap: 72
 ---
 
-```{r, echo = FALSE, message = FALSE}
+```{r, echo = FALSE, message = FALSE, results='hide'}
 library(knitr)
 opts_chunk$set(comment = "")
 library(readr)
 suppressPackageStartupMessages(library(dplyr))
 library(tidyverse)
+devtools::install_github("hadley/emo")
+library(emo)
 ```
 
+
 ## Recap on summarization
 
 -   `summary(x)`: quantile information
@@ -85,29 +85,17 @@ is.nan(test)
 is.infinite(test)
 ```
 
-## Useful checking functions {.small}
 
-Do we have any `NA`s? (`any` can help)
+## Useful checking functions
+
+`any()` can help you check if there are any `NA` values in a vector
 
 ```{r}
-A <- c(1, 2, 3, NA)
-B <- c(1, 2, 3, 4)
-any(is.na(A)) # are there any NAs - YES/TRUE
-any(is.na(B)) # are there any NAs- NO/FALSE
+test
+any(is.na(test))
 ```
 
 
-## Useful checking functions {.small}
-
-Are all the values `NA`? (`all` can help)
-
-```{r isna}
-A <- c(1, 2, 3, NA)
-B <- c(1, 2, 3, 4)
-all(is.na(A)) # are there any NAs - YES/TRUE
-all(is.na(B)) # are there any NAs- NO/FALSE
-```
-
 ## Finding `NA` values with `count()`{.codesmall}
 
 ::: {style="color: red;"}
@@ -123,7 +111,7 @@ Check if rare values make sense.
 
 ```{r, message=FALSE}
 library(jhur)
-bike <- read_bike()
+bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv")
 bike %>% count(subType)
 ```
 

From a3109a5e1be607c888e71378259e02583693442e Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 01:17:01 -0500
Subject: [PATCH 09/16] adding cleaning data to github

---
 data/cleaning_diet_data.xlsx            | Bin 0 -> 5366 bytes
 modules/Data_Cleaning/Data_Cleaning.Rmd |   4 ++++
 2 files changed, 4 insertions(+)
 create mode 100644 data/cleaning_diet_data.xlsx

diff --git a/data/cleaning_diet_data.xlsx b/data/cleaning_diet_data.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..0af7a01603e23a0d2b04322a8e79a194d387399a
GIT binary patch
literal 5366
zcmZ`-2UL?=)(ug5M@s0u_g<6|dNC9M=_*2i&_eGZ0>YysO+k<nKtKTr5}I`B2$3qi
z3L-+N0@D6C&W!W;XOeZlyOOo`UdcY^o^$sx)Vp?_8UO$gU>7?;E;stK5)J@>e;ok0
zf&Iox4ess(b@#b%66gW-wh;+%bFD}laPMTJ&^S6L`hb$)vN18G;f+_ybfewCHw5+Y
zfhZ(((pp7PY1$Pdx6<7c24@@A$s#`4&&i=8qGXO!$-BZOcN9RI>)e;mn`y~&*!kh<
z#4NfkIIC&9ADvLK;X<;(LV*x!)?#%VHH2Xlfs$Ey3WLUp>_@0cP{A4-1}`$WERuJ}
z#Bj5{2jN&cHbNO}vA(6G4UEZAJnYCc^`b%4SQ=FhWXQJib!D=7IlgM2)_ngFC_+v(
z-M<j-wh*k8Wl#FHO!uH9`%Nm(R~Qi`bRvF51pi@lIg$a{gX|LqPOISErmW%SLcSdi
zNv}|g7+7kObZ#2vs?1fNmTP;h)%9G3{tZs4XQ8YjxFjSm|B|d~&0mH${64#|lA8J1
z(c3Kn2Ix8cnSKJ&sxt4VV@mU-NX+;7-D5_>zsbFqr!&2Qm01r601#vM`(992Z&8t7
z&x+J`{Z26w6-<c6>S?ySHbFsg6I;<7kCajympfx^BH}~jHYPSjg(MM%pXB*{-Em;^
z<E_MRiFbZ5kqEtJpvD~-iWzDfxPW<*3lQVQaN<jN6p_4Gb2z`vsaw|9oNPg(-HR9W
z^f=!ar3iU0m_*%gFaT5KA0FHI%daizpu7^}^Rq8}EvgJPq%z&eU3xA)BJJvK1>bnO
zWn0MRXnEGpXMdYH_Hp1CCMVi#tw6vKy>4@Nvgu6m)k|%3K|U(SiPE@Ce|f;N)t70;
zhH!m2K~J^rvVkf4Nd%|A1)<%U#0ak}It1`{MZTyrBF3>w2w@dr!0rLAqQ6D)hkLm^
zgu`8arQmOYo({(W#Yti?N*aBkyplv54|@iXi>3!Sy1e`Nj?*Ld{<(E^e2s#C1?*6V
z5uWUU?{2Q3U$e%|2EHoj^)g(2Q>8HJ^cmBlM4NA(@cK4}83u~Awb8$8^)0c8*}bVv
z0L<j#q|rh3)J-pPX%OEFcIQ+Mq$4f^N>F?r?UB9iop4ESI4?e4GD0D1isLMc7xZLn
zv$5u2W9vvopCza5J}dbwl}<$V=M<;<TF`KDFBQ#B_25CSM5^D0!77vJ$5&)E{t9ms
zLW90SmQ$<Ncene#5dK|^t^k*>`B)*Wi2wl5Z!vz!;Ozv3`gn`}V-x?SM@FU<{Dl~G
z&%zNu1%?phMOUCpS39FN?!1NS>0`%#O9YzIuL(V+znzmlZ4?MTwhjrqIAxeS6DD3K
z1@XSKY;yYEL}o%bmnLA<q<nu{lX!9oXclaZCYsn*UfhP9aVy;U(eDjd>Ln9s@$+W?
zma)Fe6+^WzUQv+xG95`pJY}EN<AN8kz?1QW?)YL*P`u1~yfZGeggm>1>#;C|jN3%j
zw<Etzb$LH$Y&H?0__W307FndvmOI)xUUf1afjSTcYK84$2;K)r1F3L=`NKD*IA@H0
zPRq6Gr7aW%v3g-F%X!znvv<X)p5^zzRgl)b`X!Ciw8TtW2`tPLenT^Guf>SW@|bQ4
zEuEExuK*agDOezY_I6OLkifDq4rQQfhm@P>2GYICv03|*24^Pw;pXAk!dAdV?}lT^
z_pMjRG$u3p_XbBrY?c`+<jQXk-iaqp(}sweiHMCpc%cdx&xUYdw!A>RO){;W#M1Yi
z$^u+EYH%HSkcO1Up%PG5$e{fVhzz}|192F2gGHT=9%Ynmvi_8ox8$73C?w|gYvKCn
zM!dUJ`wA}waMnXV47u~<z&fTV&RyFL%m^KIKj@%2O@0(-c~lavs}Z=hAMw0xNOjsc
zYD=F(^3yW7irag+wUg6+qKzXJ4tDKV%FbTOWN?>Pm$uqL#JZOC0>?Ow{T4`~&-QgN
z<lq`j$BE9$3T`@U^kk)Xg3t?~&8#LWZBDLk!f7VSNl=<W|1_#!oJcroo%q_62u@8L
z%4ud#ZSLcaQRk2?=dA^LCv>rGqD&>2+Ru1a*;oPp8-^UStaE3Dgheck_^+F|MaXjH
z%ym3t(dR$zqZ`lDwDyvSP9DhDdYvR(n|Q|s*DaHi&W=e919e~G{5`89LC)OT*dD`!
z&1~{NS^clWooF<GEqoO3_h}~PVJyy^meOS(^h{L4)nUUt^HO)2Zz7pTE)+LS_&rwM
zNv_O&8VYJqkckT|@E!Tel&V8Q;tPu!J7rRmNuHE#%UQSdfJ&`-sN%m~cAjDkK9*W*
zbGp~W<cyiopl(2RM>oa7j7IgKuAPcTP07|SPl?Sqe`;#Lm>i8di=%5gJ1%-UO)`Xb
z=hJN>r}H2}0^6`)vw;b4^_!xOEqRI+x6=+r%hS?u{_V5K6c!CK<HC|2<%q568U1n<
zhh37wWvac85|3#x=?iFQ#LZm6+!7eAg4*QWhnTQvAhaiUtm8nw$hh%4%)XDrTzN5<
zx`l^5ew}WM@Jq`N`N0|?vb9g5M@wOL&>(^%77x`+>TlUUcwOmI!)@a2JwQFWmAbk&
z`q9tz9s8Kf$N7D=reBB2goqKS*`+RD=`_lpG3?d20Dv*$@5YX;t3FOpH|X!@KdQ)<
z1tfV?6V$RVsM!*^@_}rM#^PNc54xknS=P`q=ktx=-p_U)Z~OICMA#MUn!?8Qisyq$
zr#T*v5uZ!X+^IEL7CkuBO5Jrp+S|US1(U@(p1ogS4eM#}Is^}**(p=^irnO%In#2)
z4qwZTa@!ore^d=+FY};!(0yYBRibWbRwrWJ{z8C59IW!P7cz_DC?0{#WGcDZ1;)PO
zdK}Ct^6<?Ae$|^4#{{3d2P-F?Ave3FstqIT7K4JN3YEZ-3B|S2cJp>S8GKo8$G4e|
z8`N_-dhqOcPoEl3L4nK<Cr~Q-ZOM46kC7CeciIzCrUS~E&AwePZ;01M+(m4z=cQJ1
z5p*Bs=tp(R7mWJLieLs$CW$Ww>hD!eZpnLx$pcQ6%)bfQ&V>zZ3<fyoFMDPA*R74u
z3fx;+Fz<l-C@{kj5pbcMZK@Vim&TbxqIsLeX6X6NnK}2Gh$GtLHL8|JfryQ}yYhSw
zrRYTqY66BhC1Vogn%tx60Q1@0Z9u5Ylqs6m*|f6jScxtau-e`g$D<7~mgrDgY?Nam
zij%|%NPDoQU;SmExZ*Z#YD<2ZvlZ(rkibKnz{9g1v*El#>|#<5@jKgjwxX4s#CqY%
z_C6`ikT>MyobJ)1Q~sZu8~&oKkD%Rp%$KJIdFMYipE=}@JQi&U+v};}6>T{=8)ju;
zT$}-t`yPjPs~1|W5^U{*TqOzE)I4S4z)`R#K&@~oK(IhL01gqCTv90d_Pz<o5Kb82
z=G9ub3FhL7V+fZmtOw41$?>fh%l76>D}EuVqgvx3{>k$c#F}mIZxDr&)nQL7i-pQ^
z&`2MF)EQ&A*omM=$ImIQO)2UMhRqeXET$$?H)<K-8zc1ad`bt^Ct)O>S=rQ4hGro(
z!F0ZIlGO|BF{WJ9*By8=qfBQ5NDZiy5*b#WZ}h$}*WlqMFX^a~vljTvM;oNo9Ig#3
zY!EJ?@r6J5s^0w;gM?XY5NY)AtU?AGl4h4y+@n^;N+ls2%%@M8)Z!k#>&Y-8yjPLp
zO6u9x*YB}Vte#u(27?xp;MMkX<mj)o*=QAq=(}_$sLEcT6C8Kk0;IBZm+gt~JqgBV
z#E`w@3cl`juC{QF_Fw2}X)4d0+(O0UxomoS4vF5A=St?Y!*}lQg=Ez^5bENGejcQg
z{}jSwz*#TUW$IDTb|Ot>9ja>TL$O5tL9QXnF!POC>>ZIMt_KyXxvaeWpgc@qjU-!s
z_4F5T^&EOQ+FRyCAiXhPv-^RKZQ=l&caT5aMc4aQjTFWNhFXJ^;ziX&wvN7`D`dA5
z;{>lAMYIbEt+Q~@FNLRzdnH86B}&gt1T{}e!59<k(op&-KAzh|p7ZkRWhiRy;07j(
zJ2nNJ=>FHiw0oY@C`~b)c4!VzIZK*tu%pQBq+IGtZCA+118+-(+6+81^&-`BW+6m`
zK&y1HZ>rEiRj~V7%)WRVI&osO@RQetn(+6u0&WE5?s!~o7xH}%Z`@yzd~&L4GL)yF
z1Hbt1M5R%VnxQBP!vgoLfbo9O*Ndxy*+>O3Jx=Q%MVYjxz?b)jeS6KagMJXrEuho8
z<+GQYTTAS-i0xcBd&y12@CXu+Om0@zrGKf>>2iKM!X+VV5JD4!E$ju@Gn?T<KHM?!
zEv7qs+18-4PzDZg!DyK_1@e{d&HJ5$Wt+p3^a$ke;CKdX_a;|N6e;qXo5K$)zHHyL
z5lZ=Od$g;uFKOq+Wx$@N*=?m)$@s+JZ3_oVGnb#!hzBcMCUPqXlsl?^yW+eQvb6IU
zDe5$9a_lO;Kz6L8&iav8_j+tVwQ1%;h&O>koz#lN-OFuIEdkg?<NdJr@mS8B`O^2%
zuI7ds#mK7WQ5!eWXQdU)_Iu+atOg+gtoE2TFsvh8Xr4zg9O2;q;~CCQHC>Sv-Nlu*
zS~Wyv(}``Xcd*ut^mkkJ_6c-_djGm1Hk&NO=ZH~+VeV7z&3n@X&C(q5J$a&PMdnT%
zn#dq6ro%-~*W7x+qd8~eB>z?I{@|dj<ieRp>KSaLZD=GA3TYp0y5=}ChB7J<Jgyqu
zUAgAS@&tC>oRe5dZ#b`?yrGAFRZo;#;-Of}qT=$7yiUlRp72>Na}m3qOD?T;`6iXJ
z96tkVgcR4*l1H9X0g34T1jE4_6rw%CQP6PhA|n^q(1PEf|2^sfvs&RR1=<e;!iXL-
zoN_OL@Cr218i6B@xq#*QJBbNgu~_=$I^7Gc$}c<AMF95~-1&p}kNI-tQ#<aSNRg+{
zqZc_pE*~ZNuYbsK$g2cqM(*96kUl`v4e~GKOW;p&;L433E~($-YX8egnlO<nR~iWX
zLR(FwZlxFg>@8{Q4C8hg)L~33aUnZm)bh!;>{BAkF6|o`ZI3?EHdHy)JSv15H|<{v
zD}*X%$J&OUIN3ciIt7+XTDl9zL!iyR-!iN_S;nsS))N>vq3ZqX4uT}>n4z{`h%O{&
z+3L#?3^e%?kDk+e%8Wv-<7qTB=ni>^J*AK|oQp77+B1&JlthNZmF>efL$qd9@n?!c
zdO@uw9)YC&vVXrs>Et?jb7P}zh`mHnVxw&jw=?vDdw7f5!M&irBA%Q2(HVP<io%2v
zA)18yU}}`8Yi@mIk5!E=y>5)~P}YW}j41@33g|C$EgC1k&N}z=Gm@U!kU7O=O_!D~
z#0ArXY>OpcFoX)NpK$i8u4?X0FqD2`0cK14XL#H;Cx_1~>==Q2sd}&TSSO;y6{0;U
ztkAcp`)pI|QTO(r>$bKh#lu^0tf?fr7&xBY@(Ka0xw94CxD$u75f(CYt*8whfp_Ct
z32ql1{CZWVlUkLvd~IAXK4PLquHGL^+FiLB=RRU(92VC~TZK6<Z;*&FCRBlN<KAEi
zc-K<4<NK6sa*u*;<AV$C9xU(RY27GCk5Y;SQCpJdaR2)12~=JSqSJqxgU(6i&XV}q
zri)Zac)!sKIE3}K#yYl%A8qkEU0m^=_ZnY~`eQYvxQ1Q-sj01p$8Sm7^*6<^lMk#V
zSM<HZeqbW6SsHG>qh3sF&geK|E{FO;Uzl0dqcffY6ht;kdi#b-pz5b<Wj5IgcPs6?
zgdFMRwggvD+`-ecxtDhIsa_e#d2rv4@b9`WW3k4SgT76OTTaMAjhZ$G;ht|Qr)T$!
zr#88w(4<@V$k$D3HsE|df>UXDUytl2&<yO$(yijBZS8X~iSFNgNuDQrnzC0^Pu9**
z!FOJC2KPDm>eBD-&+uk(v_Ko|g2sG8-&>0?(5s&4?~uS9u(Y_|-pa7dV$xcZAwgOE
z)#=k?9#%;yR+x)-X0t#P%PafbI8FJ7_%j8dccj`zEyu^vq=14&JMR=@jL8dP3zBFq
z{lt*#IzOpB?RtSq6M{4wbjuHQZ;8DnTxnbv4BKHg_?g*!;ASWyoRa*ly<Rqii187E
zAZhcKTbwU~!(;=m+c%liGq1jI>wG}7LD)oVco_WXWH<(2B>qImV8LU(=jGykxJ4My
zP!9*2<o|m{g^d&d8?}GF{>#MbD#6vM%P-__0yuX2j~UEW;ML*8AK+u`FZ|!5imN<V
zef=MvBy8$odH$=*zshpeNBv>3$NM+Szg*Q-@KrzX2Rw!U5AaoYaFyU{yZJ-VgzY}q
z|L~{zT!mh(U4NiuSU>pxD%n-|)nobxevUo0|Aznb&>HIDV$}cu2(fP^Y?KPAetr5s
DFEl6K

literal 0
HcmV?d00001

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
index dce470929..4556c4980 100644
--- a/modules/Data_Cleaning/Data_Cleaning.Rmd
+++ b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -393,6 +393,10 @@ data_diet <- tibble(Diet = rep(c("A", "B", "B"),
 ```
 
 
+```{r, include = FALSE, echo = FALSE}
+#writexl::write_xlsx(data_diet, here::here("data", "cleaning_diet_data.xlsx")
+```
+
 ## Say we have some data about samples in a diet study:
 
 ```{r}

From 4e49a64343dc100a2f1358d43cf341b2bde919ff Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 01:24:47 -0500
Subject: [PATCH 10/16] add reading in from excel

---
 modules/Data_Cleaning/Data_Cleaning.Rmd | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
index 4556c4980..fee5b0d85 100644
--- a/modules/Data_Cleaning/Data_Cleaning.Rmd
+++ b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -396,6 +396,15 @@ data_diet <- tibble(Diet = rep(c("A", "B", "B"),
 ```{r, include = FALSE, echo = FALSE}
 #writexl::write_xlsx(data_diet, here::here("data", "cleaning_diet_data.xlsx")
 ```
+## Reading in the data if it were an excel sheet
+Data is also here: 
+
+http://jhudatascience.org/intro_to_r/data/cleaning_diet_data.xlsx
+
+```{r}
+library(readxl)
+data_diet<- read_excel(here::here("data", "cleaning_diet_data.xlsx"))
+```
 
 ## Say we have some data about samples in a diet study:
 

From 723726e912e52f18fc8e73a606bede56db333c4d Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 01:28:26 -0500
Subject: [PATCH 11/16] adding bike readin of data

---
 modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd     | 2 +-
 modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
index c616de34f..ae923a7d4 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
@@ -22,7 +22,7 @@ library(tidyverse)
 library(broom)
 library(naniar)
 
-bike <- read_bike()
+bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv")
 
 ```
 
diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
index 9d3aac516..afef625e2 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
@@ -22,7 +22,7 @@ library(tidyverse)
 library(broom)
 library(naniar)
 
-bike <- read_bike()
+bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv")
 ```
 
 # Part 1

From 7e4dd149626ccee4347dbe2411e3619488422e9a Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 01:30:42 -0500
Subject: [PATCH 12/16] remove emojis from emo

---
 modules/Data_Cleaning/Data_Cleaning.Rmd | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
index fee5b0d85..59db4c50c 100644
--- a/modules/Data_Cleaning/Data_Cleaning.Rmd
+++ b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -12,8 +12,7 @@ opts_chunk$set(comment = "")
 library(readr)
 suppressPackageStartupMessages(library(dplyr))
 library(tidyverse)
-devtools::install_github("hadley/emo")
-library(emo)
+
 ```
 
 
@@ -50,7 +49,7 @@ inaccuracies, or recoding it in a way that makes it more manageable.
 
 ::: {style="color: red;"}
 
-`r emo::ji("warning")` MOST IMPORTANT RULE - LOOK `r emo::ji("eyes")` AT YOUR DATA! `r emo::ji("warning")`
+ MOST IMPORTANT RULE - LOOK  AT YOUR DATA!
 
 :::
 
@@ -299,9 +298,9 @@ bike %>%
 ::: 
 
 
-`r emo::ji("warning")` Sometimes removing `NA` values leads to distorted math - be careful!
+ Sometimes removing `NA` values leads to distorted math - be careful!
 
-`r emo::ji("warning")` Think about what your `NA` means for your data (are you sure ?).
+ Think about what your `NA` means for your data (are you sure ?).
 
 - Is an `NA` for values so low they could not be reported? 
 
@@ -317,13 +316,13 @@ survey reports 0 if student has tried cigarettes but did not smoke that
 week
 
 
-`r emo::ji("warning")` You might want to keep the `NA` values so that you know the original sample size. 
+You might want to keep the `NA` values so that you know the original sample size. 
 
 
 ## Word of caution {.codesmall}
 
 
-`r emo::ji("warning")` Calculating percentages will give you a different result depending on your choice to include NA values.!
+ Calculating percentages will give you a different result depending on your choice to include NA values.!
 
 This is because the denominator changes.
 

From ddb4a73df7d2c7838d7156376d9de8bf6255937c Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 01:35:45 -0500
Subject: [PATCH 13/16] swap out emo for emoji

---
 modules/Data_Cleaning/Data_Cleaning.Rmd | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
index 59db4c50c..41825a91b 100644
--- a/modules/Data_Cleaning/Data_Cleaning.Rmd
+++ b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -12,7 +12,8 @@ opts_chunk$set(comment = "")
 library(readr)
 suppressPackageStartupMessages(library(dplyr))
 library(tidyverse)
-
+install.packages("emoji", repos='http://cran.us.r-project.org')
+library(emoji)
 ```
 
 
@@ -49,7 +50,7 @@ inaccuracies, or recoding it in a way that makes it more manageable.
 
 ::: {style="color: red;"}
 
- MOST IMPORTANT RULE - LOOK  AT YOUR DATA!
+`r emoji("warning")` MOST IMPORTANT RULE - LOOK `r emoji("eyes")` AT YOUR DATA! `r emoji("warning")`
 
 :::
 
@@ -298,9 +299,9 @@ bike %>%
 ::: 
 
 
- Sometimes removing `NA` values leads to distorted math - be careful!
+`r emoji("warning")` Sometimes removing `NA` values leads to distorted math - be careful!
 
- Think about what your `NA` means for your data (are you sure ?).
+`r emoji("warning")` Think about what your `NA` means for your data (are you sure ?).
 
 - Is an `NA` for values so low they could not be reported? 
 
@@ -316,13 +317,13 @@ survey reports 0 if student has tried cigarettes but did not smoke that
 week
 
 
-You might want to keep the `NA` values so that you know the original sample size. 
+`r emoji("warning")` You might want to keep the `NA` values so that you know the original sample size. 
 
 
 ## Word of caution {.codesmall}
 
 
- Calculating percentages will give you a different result depending on your choice to include NA values.!
+`r emoji("warning")` Calculating percentages will give you a different result depending on your choice to include NA values.!
 
 This is because the denominator changes.
 

From bebd92a1393cc219133d679c13a5f9e41b4ea4c0 Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 01:41:16 -0500
Subject: [PATCH 14/16] requiring that they read in the data for the cleaning
 lab

---
 .../Data_Cleaning/lab/Data_Cleaning_Lab.Rmd   | 15 ++++++++++++---
 .../lab/Data_Cleaning_Lab_Key.Rmd             | 19 +++++++++++--------
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
index ae923a7d4..194ab7032 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd
@@ -21,13 +21,22 @@ library(jhur)
 library(tidyverse)
 library(broom)
 library(naniar)
-
-bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv")
-
 ```
 
 # Part 1
 
+0. Read in the bike data, you can use the URL or download the data.
+
+Bike Lanes Dataset: BikeBaltimore is the Department of Transportation's bike program. 
+The data is from http://data.baltimorecity.gov/Transportation/Bike-Lanes/xzfj-gyms
+
+You can Download as a CSV in your current working directory.  Note its also available at: 	http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv 
+
+```{r}
+
+
+```
+
 
 1.  Use the `is.na()`  and `any()` functions to check if the bike `dateInstalled` variable has any `NA` values. Use the pipe between each step. 
 Hint: You first need to `pull` out the vector version of this variable to use the `is.na()` function.
diff --git a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
index afef625e2..b9233f5ff 100644
--- a/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
+++ b/modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd
@@ -5,12 +5,6 @@ editor_options:
   chunk_output_type: console
 ---
 
-# Data used
-
-Bike Lanes Dataset: BikeBaltimore is the Department of Transportation's bike program. 
-The data is from http://data.baltimorecity.gov/Transportation/Bike-Lanes/xzfj-gyms
-
-You can Download as a CSV in your current working directory.  Note its also available at: 	http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv 
 
 ```{r}
 library(readr)
@@ -21,12 +15,21 @@ library(jhur)
 library(tidyverse)
 library(broom)
 library(naniar)
-
-bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv")
 ```
 
+
 # Part 1
 
+0. Read in the bike data, you can use the URL or download the data.
+
+Bike Lanes Dataset: BikeBaltimore is the Department of Transportation's bike program. 
+The data is from http://data.baltimorecity.gov/Transportation/Bike-Lanes/xzfj-gyms
+
+You can Download as a CSV in your current working directory.  Note its also available at: 	http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv 
+
+```{r}
+bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.csv")
+```
 
 1.  Use the `is.na()`  and `any()` functions to check if the bike `dateInstalled` variable has any `NA` values. 
 Hint: You first need to `pull` out the vector version of this variable to use the `is.na()` function.

From 152c3dc9d3a7dfd39a2e59ad50813e883c6646da Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 02:13:35 -0500
Subject: [PATCH 15/16] adding example of why you might want NA values

---
 data/cleaning_diet_data.xlsx            | Bin 5366 -> 5361 bytes
 modules/Data_Cleaning/Data_Cleaning.Rmd |  89 +++++++++++++-----------
 2 files changed, 48 insertions(+), 41 deletions(-)

diff --git a/data/cleaning_diet_data.xlsx b/data/cleaning_diet_data.xlsx
index 0af7a01603e23a0d2b04322a8e79a194d387399a..cf1e3058205df6c3b4a9ee60e23ab7a5d5c899d2 100644
GIT binary patch
delta 1084
zcmV-C1jGCGDe)<=oC6L@+e%iP0ssJJ3IG5Y0F#gt8h@3RTW^~%6oB9ND-iD~7;>!$
zRlBI$9;!}jw>?e{IRvYKk?l6yzn{UxO{q^*UJyRl?;Kx~tG`w0y?~I6*K40#guVxL
z#&@jVul=8ox09vsNk!`&EqM)V{~t*ID*cokxOkEUfbw*JTCV*<spc{WWL7{$<ppn`
z)|#A)ihn9S6#GCn0_d)Ds>&b=!+B6qR{JKxrT9n;p64us4bPq{sFevKprlIU%Yrr1
zPm>+fy0#uqK)&{`$>oN`evl?z>R${F^5oxBbo&4$WD0h=0lwE(w&nb(ResZ=uyqaI
z`QCO^--Wk>oIaQ82S0oTwl9<pGPBt>RM9jM{D0tyF1YNz*|Su<G7(xqnq;m1+8Ssn
zt4mhHgA!WHbW)XW7^oyr8m1iwS?_go-s@2}UA4_}xPCtG`n#*=kDh+v^4@&NJ1c0t
zhqvy5ZiwwlWXq3`NY}2aG2<_K<$`b(+a`KR$$XjwFQ;W3<yQO#FSd9wlJUlchZ*B|
zv46n`hj>OlnY!_&7H>KR4>P`3g_~~LQABY(pfmEx!c8}`bh9yYiy1f2G$1PGUd%Jo
zz{L=;m%9n)mT*3%f0*%k!ntYUG#U^Z`N~bWu!M^-gqZOjU4RSIr&TzhGje2mEG^w~
z%nW13cSz@!hK;NSY(_qrx#3n8ZZ)QLn1AuTi)>}cA~GN|^3V-N!q+oD9D{~h<I~#Z
z(yJ~S_O4e<kexwJ?j`Jucg`(pjctoAQ`^JYkkBY5F+%uybr9@2aCZ^2#+D!H?}+z3
z>=hW{>+L~CT_32ma}AQXpW@-c(ksYte*`CwlZNi$jtaeiq*p?&vxXP?@DS!L(<S{Y
z-gJNZT;6h}pDxEyp&xW0+L7kXIakL)`((0D#PmN<O9KRxuoD`yf(A7S1utDiDI}A;
z4=WuhZ4kBtZON&DLQ`6D6VfITwSJJ)I(hplj^iYdlLQbTe-vdQawB<b4b*540nM7b
zN;oF0hc|B>_^2Rc31k8ju14D!9m6<?EodH!<S<EGTd#RcrQcx3#W&voE(v+S8X}iG
za(n>hZX}`sDaD9%Kfdh&QebF7gNXstg5t9`g4f|@L$6#owTgQOH?1debnk*1ja}Cj
z-K4js{1ksJSzh1!RdZzyrxJjgN(v@C@MwKg;maZ&uH_*v(uvm!<kLRsH{OOtj(s4r
z6o4J$o|MnY^UHFMnv#_FIiWcvOY+DFWn{WKjJ)bU1NAWE-^3H(nEUz@G=H=G5O)Fz
zO4~|SoB{v<W|MsrR{>y?)e}4c)sqqwKs>h%004MwFLQKxY-MvUcx`M@O927^02BZK
z00;m803iS`T}3G*0RR9{0ssIJ0000000001000000Hu?T6g~m?lg$((2AvWB0001u
Ca_`ju

delta 1077
zcmV-51j_sIDfTI_oC6NUcx&jO0ssJ93IG5Y0F#Ro8h@3RTW^~%6vyB9DG=`o7;+b(
zN*A@;L)B{Sw#UgK2e1kl*-o>4`x#7}l-f~wLHJyJe*XI!-TW;J=M5Mqv|77p?7I$B
z3Eh(_Tf0AB?nVpO;R08CT+j;E?myt}P5dP~QufMo0O80ED!z7eA?lUq@g#>5^D(WV
zlAe^Z5`PQXWSPfn26*2lm4z4h{>&>esa(~=ihZUJnx-UyElmz3R6=#apuj@n^PJS&
zjiWu0zGgfQgLLg~&}xf9*NdaJ^e+NOe(~=JynBWM5&?TDfa^3P+fn-3IKNAi-=umU
za_?HIC+6%S#fL)tpvP}OvRuecrn=e|3miv`9)BIycFx;xa^QlNs)xo9M@i$~=m2RY
zRY59v7EF4HEGptH0TFpZ!Zd9!>9TIGXT3<#MI)9W_3QQ2-|cw*$mwUM?CqzrlN?tW
zyifO}AX=3`4?nQ<v@wm^7JM&PE-=$zn?i3fnoT0_?Q$3=yEebW3pHM7sd#I`vjt}+
zvVTw!_<b^EA5F|;6HPW5LS_qo6uDWqNlOugVUJGPM{_gXRMSm|Osg%phPnn;67xEq
zDgozx#LjLeoN2<@5dCbyPge<NO2kRfBUJXKnQ*QN=R*i>!S^Pxxsqt<_rR3hvMClC
zY%yeVZNU!~(H4pgEqiRrKAM{0mKtt3M1M6~@S}-rsmKD<BU5(Y42JykN$(Fqvqopu
z!>+9*7hKTSu5*kmn*v?jMz)jPIJeel+m2`}H5yL)gvv1rErjO_1KBPCvx>H8+ww!*
z&G1pf&S4`w-x+AorD2UWu0a%bQ`|pOItS|S5AWhhQsWFBFp~?2I|Zb&sy~+J1|?H3
znD$@Ly8V+!@{S7mU^%sN`HTbBv?OmzsW>&7hmw96#{U7cU<OkO1xcM~b0w3w4=WvM
zLlL$EZON&DLQ`6D6VfITwSJJ)I(hplPU0lsk`N$&Bg#VLM)KAgsL>t*nl*Qoa7<Va
zZ{9laQ9;NO$OI-_jkYm5hH(&E&^#2$VUoDEUh|krzrl`+Z@vLs5;DaaB9}aJd;sQd
zAfgjeiUH|<eA@%0z|evQ69c9N#TRb`ufu3VuUt8`ihBp6*0VUccR>xtuIq|!(pyu0
zioX_GukZb;xiW`S2|!IH1rr{4w7#kEb&(F&@(>s4#A^leX`l3wx6>lWK9E@oz>aax
z%ID<yWjRMpNy_`2(43MbnKJsoO0qhPyy?FH^=ZhziD$qu_w6TW{s6Q15O)Fz#dvGz
zpaK8@T$6YcR{>;`&=WiY*$0#H2Njd06A+Wd4G;?e00001000000K1b;6d)W)ooI6<
v0RR9{0ssIJ0000000001000000I3g?!4DIYofIko{gcQPBL<=p00000Ib-Oa

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
index 41825a91b..aeabb2732 100644
--- a/modules/Data_Cleaning/Data_Cleaning.Rmd
+++ b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -376,14 +376,14 @@ set.seed(124)
 data_diet <- tibble(Diet = rep(c("A", "B", "B"),
                                times = 4), 
                     Treatment = c("Ginger", 
-                               "g",
+                               "Ginger",
                                "Other",
                                "peppermint", 
-                               "Peppermint",
-                               "G", 
+                               "peppermint",
+                               "Ginger", 
                                "Mint",
                                "O", 
-                               "ginger",
+                               "Ginger",
                                "mint",
                                "Mint",
                                "O"), 
@@ -394,7 +394,7 @@ data_diet <- tibble(Diet = rep(c("A", "B", "B"),
 
 
 ```{r, include = FALSE, echo = FALSE}
-#writexl::write_xlsx(data_diet, here::here("data", "cleaning_diet_data.xlsx")
+#writexl::write_xlsx(data_diet, here::here("data", "cleaning_diet_data.xlsx"))
 ```
 ## Reading in the data if it were an excel sheet
 Data is also here: 
@@ -448,14 +448,12 @@ Need quotes for new values! Tolerates quotes for old values.
 ```{r, eval = FALSE}
 
 data_diet %>% 
-  mutate(Treatment = recode(Treatment, G = "Ginger", 
-                                       g = "Ginger", 
-                                  ginger = "Ginger",
+  mutate(Treatment = recode(Treatment,  
                                        O = "Other",
                                     Mint = "Peppermint",
                                     mint = "Peppermint",
                               peppermint = "Peppermint")) %>%
-  count(Treatment, Diet)
+  count(Treatment)
 ```
 
 ## `recode()`
@@ -463,14 +461,12 @@ data_diet %>%
 ```{r, eval = TRUE}
 
 data_diet %>% 
-  mutate(Treatment = recode(Treatment, G = "Ginger", 
-                                       g = "Ginger", 
-                                  ginger = "Ginger",
+  mutate(Treatment = recode(Treatment,
                                        O = "Other",
                                     Mint = "Peppermint",
                                     mint = "Peppermint",
                               peppermint = "Peppermint")) %>%
-  count(Treatment, Diet)
+  count(Treatment)
 ```
 
 
@@ -492,9 +488,7 @@ Need quotes for conditions and new values!
 
 ```{r}
 data_diet %>% 
-  mutate(Treatment = case_when(Treatment == "G" ~ "Ginger", 
-                               Treatment == "g" ~ "Ginger", 
-                               Treatment == "ginger" ~ "Ginger",
+  mutate(Treatment = case_when(
                                Treatment == "O" ~ "Other",
                                Treatment == "Mint" ~ "Peppermint",
                                Treatment == "mint" ~ "Peppermint",
@@ -505,11 +499,13 @@ data_diet %>%
 
 ## What happened?{.codesmall}
 
+We seem to have `NA` values!
+
+We didn't specify what happens to values that were already `Other` or `Ginger`.
+
 ```{r, eval = FALSE}
 data_diet %>% 
-  mutate(Treatment = case_when(Treatment == "G" ~ "Ginger", 
-                               Treatment == "g" ~ "Ginger", 
-                               Treatment == "ginger" ~ "Ginger",
+  mutate(Treatment = case_when(
                                Treatment == "O" ~ "Other",
                                Treatment == "Mint" ~ "Peppermint",
                                Treatment == "mint" ~ "Peppermint",
@@ -524,7 +520,7 @@ data_diet
 ## `case_when()` drops unspecified values
 
 Note that automatically values not reassigned explicitly by
-`case_when()` will be `NA` unless otherwise specified.
+`case_when()` will be `NA` unless otherwise specified. 
 
 ::: codeexample
 ```{r, eval = FALSE}
@@ -544,9 +540,20 @@ or it can be the original values of the column
 
 ```{r}
 data_diet %>% 
-  mutate(Treatment = case_when(Treatment == "G" ~ "Ginger", 
-                               Treatment == "g" ~ "Ginger", 
-                               Treatment == "ginger" ~ "Ginger",
+  mutate(Treatment = case_when(
+                               Treatment == "O" ~ "Other",
+                               Treatment == "Mint" ~ "Peppermint",
+                               Treatment == "mint" ~ "Peppermint",
+                               Treatment == "peppermint" ~ "Peppermint",
+                                TRUE ~ Treatment)) %>%
+  count(Treatment)
+```
+
+## Typically it is good practice to include the TRUE statement
+
+```{r, eval = FALSE}
+data_diet %>% 
+  mutate(Treatment = case_when(
                                Treatment == "O" ~ "Other",
                                Treatment == "Mint" ~ "Peppermint",
                                Treatment == "mint" ~ "Peppermint",
@@ -555,13 +562,29 @@ data_diet %>%
   count(Treatment)
 ```
 
+You never know if you might be missing something - and if a value already was an NA it will stay that way. 
+
+## But maybe we want NA?
+
+Perhaps we want values that are O or Other to actually be NA, then `case_when` can be helpful for this. We simply specify everything else.
+
+```{r}
+data_diet %>% 
+  mutate(Treatment = case_when(Treatment == "Ginger" ~ "Ginger", 
+                               Treatment == "Mint" ~ "Peppermint",
+                               Treatment == "mint" ~ "Peppermint",
+                               Treatment == "peppermint" ~ "Peppermint")) %>%
+  count(Treatment)
+```
 
 ## More complicated case_when()
 
+`case_when` can do more complicated statements than `recode` and can match many patterns at a time.
+
 ```{r}
 data_diet %>% 
   mutate(Treatment = case_when(
-    Treatment %in% c("G", "g", "Ginger", "ginger") ~ "Ginger",
+    Treatment == "Ginger" ~ "Ginger", # keep it the same!
     Treatment %in% c("Mint", "mint", "Peppermint", "peppermint") ~ "Peppermint",
     Treatment %in% c("O", "Other") ~ "Other")) %>%
 
@@ -573,7 +596,7 @@ data_diet %>%
 
 ## Another reason for `case_when()`
 
-`case_when` can do very sophisticated comparisons
+`case_when` can do very sophisticated comparisons!
 
 ```{r}
 
@@ -592,22 +615,6 @@ head(data_diet)
 data_diet %>% 
   count(Diet, Effect)
 ```
-## Taking a look at the data
-
-```{r, echo = FALSE}
-diet_effect <- data_diet %>% 
-  count(Diet, Effect)
-
-data_diet %>% 
-  count(Diet, Effect)%>%
-  ggplot(aes(x = Effect,y = n, fill = Diet)) + 
-  geom_col(position = position_dodge()) +
-  labs(y = "Individuals", 
-       title = "Effect of diet A & B on participants")
-
-
-```
-
 
 # Working with strings
 

From 6db4ad095295573c6dc1d5396560eb65efd62716 Mon Sep 17 00:00:00 2001
From: carriewright11 <cwright2@fredhutch.org>
Date: Wed, 10 Jan 2024 02:27:42 -0500
Subject: [PATCH 16/16] adding showing more of what is happening in recoding
 and case_when so we dont overwrite only

---
 modules/Data_Cleaning/Data_Cleaning.Rmd | 78 ++++++++++++++++---------
 1 file changed, 52 insertions(+), 26 deletions(-)

diff --git a/modules/Data_Cleaning/Data_Cleaning.Rmd b/modules/Data_Cleaning/Data_Cleaning.Rmd
index aeabb2732..f9cf6c12f 100644
--- a/modules/Data_Cleaning/Data_Cleaning.Rmd
+++ b/modules/Data_Cleaning/Data_Cleaning.Rmd
@@ -433,7 +433,7 @@ could be hectic! In `dplyr` you can use the `recode` function.
 ```{r, eval = FALSE}
 # General Format - this is not code!
 {data_input} %>%
-  mutate({variable_to_fix} = recode({Variable_fixing}, {old_value} = {new_value},
+  mutate({variable_to_fix_or_new} = recode({Variable_fixing}, {old_value} = {new_value},
                                        {another_old_value} = {new_value}))
 
 ```
@@ -448,12 +448,12 @@ Need quotes for new values! Tolerates quotes for old values.
 ```{r, eval = FALSE}
 
 data_diet %>% 
-  mutate(Treatment = recode(Treatment,  
+  mutate(Treatment_recoded = recode(Treatment,  
                                        O = "Other",
                                     Mint = "Peppermint",
                                     mint = "Peppermint",
                               peppermint = "Peppermint")) %>%
-  count(Treatment)
+  count(Treatment, Treatment_recoded)
 ```
 
 ## `recode()`
@@ -461,14 +461,27 @@ data_diet %>%
 ```{r, eval = TRUE}
 
 data_diet %>% 
-  mutate(Treatment = recode(Treatment,
+  mutate(Treatment_recoded = recode(Treatment,
                                        O = "Other",
                                     Mint = "Peppermint",
                                     mint = "Peppermint",
                               peppermint = "Peppermint")) %>%
-  count(Treatment)
+  count(Treatment, Treatment_recoded)
 ```
+## Can update or overwrite variables with recode too!
+
+Just use the same variable name to change the variable within mutate.
+
+```{r, eval = TRUE}
 
+data_diet %>% 
+  mutate(Treatment= recode(Treatment,
+                                       O = "Other",
+                                    Mint = "Peppermint",
+                                    mint = "Peppermint",
+                              peppermint = "Peppermint")) %>%
+  count(Treatment)
+```
 
 ## Or you can use `case_when()`
 
@@ -488,12 +501,12 @@ Need quotes for conditions and new values!
 
 ```{r}
 data_diet %>% 
-  mutate(Treatment = case_when(
+  mutate(Treatment_recoded = case_when(
                                Treatment == "O" ~ "Other",
                                Treatment == "Mint" ~ "Peppermint",
                                Treatment == "mint" ~ "Peppermint",
                                Treatment == "peppermint" ~ "Peppermint"))  %>% 
-  count(Treatment)
+  count(Treatment, Treatment_recoded)
 
 ```
 
@@ -512,10 +525,6 @@ data_diet %>%
                                Treatment == "peppermint" ~ "Peppermint")) 
 ```
 
-## Original data
-```{r}
-data_diet
-```
 
 ## `case_when()` drops unspecified values
 
@@ -540,34 +549,47 @@ or it can be the original values of the column
 
 ```{r}
 data_diet %>% 
-  mutate(Treatment = case_when(
+  mutate(Treatment_recoded = case_when(
                                Treatment == "O" ~ "Other",
                                Treatment == "Mint" ~ "Peppermint",
                                Treatment == "mint" ~ "Peppermint",
                                Treatment == "peppermint" ~ "Peppermint",
                                 TRUE ~ Treatment)) %>%
-  count(Treatment)
+  count(Treatment, Treatment_recoded)
 ```
 
 ## Typically it is good practice to include the TRUE statement
 
+You never know if you might be missing something - and if a value already was an NA it will stay that way. 
+
 ```{r, eval = FALSE}
 data_diet %>% 
-  mutate(Treatment = case_when(
+  mutate(Treatment_recoded = case_when(
                                Treatment == "O" ~ "Other",
                                Treatment == "Mint" ~ "Peppermint",
                                Treatment == "mint" ~ "Peppermint",
                                Treatment == "peppermint" ~ "Peppermint",
                                 TRUE ~ Treatment)) %>%
-  count(Treatment)
+  count(Treatment, Treatment_recoded)
 ```
 
-You never know if you might be missing something - and if a value already was an NA it will stay that way. 
 
 ## But maybe we want NA?
 
 Perhaps we want values that are O or Other to actually be NA, then `case_when` can be helpful for this. We simply specify everything else.
 
+```{r}
+data_diet %>% 
+  mutate(Treatment_recoded = case_when(Treatment == "Ginger" ~ "Ginger", 
+                               Treatment == "Mint" ~ "Peppermint",
+                               Treatment == "mint" ~ "Peppermint",
+                               Treatment == "peppermint" ~ "Peppermint")) %>%
+  count(Treatment, Treatment_recoded)
+```
+## case_when() can also overwrite/update a variable
+
+Just like recode, just need to specify what we want in the first part of `mutate`.
+
 ```{r}
 data_diet %>% 
   mutate(Treatment = case_when(Treatment == "Ginger" ~ "Ginger", 
@@ -575,6 +597,7 @@ data_diet %>%
                                Treatment == "mint" ~ "Peppermint",
                                Treatment == "peppermint" ~ "Peppermint")) %>%
   count(Treatment)
+
 ```
 
 ## More complicated case_when()
@@ -583,12 +606,12 @@ data_diet %>%
 
 ```{r}
 data_diet %>% 
-  mutate(Treatment = case_when(
+  mutate(Treatment_recode = case_when(
     Treatment == "Ginger" ~ "Ginger", # keep it the same!
     Treatment %in% c("Mint", "mint", "Peppermint", "peppermint") ~ "Peppermint",
     Treatment %in% c("O", "Other") ~ "Other")) %>%
 
-  count(Treatment)
+  count(Treatment, Treatment_recode)
 
 ```
 
@@ -598,6 +621,8 @@ data_diet %>%
 
 `case_when` can do very sophisticated comparisons!
 
+Here we create a new variable called `Effect`.
+
 ```{r}
 
 data_diet <- data_diet %>% 
@@ -710,7 +735,7 @@ count(data_diet, Treatment)
 ```{r, eval = FALSE}
 
 data_diet %>% 
-  mutate(Treatment = recode(Treatment, G = "Ginger", 
+  mutate(Treatment_recoded = recode(Treatment, G = "Ginger", 
                                        g = "Ginger", 
                                   ginger = "Ginger",
                                        O = "Other",
@@ -725,10 +750,11 @@ But we still might miss a strange value
 
 ```{r, eval = FALSE}
 data_diet %>% 
-  mutate(Treatment = case_when(
+  mutate(Treatment_recoded = case_when(
     Treatment %in% c("G", "g", "Ginger", "ginger") ~ "Ginger",
     Treatment %in% c("Mint", "mint", "Peppermint", "peppermint") ~ "Peppermint",
-    Treatment %in% c("O", "Other") ~ "Other"))
+    Treatment %in% c("O", "Other") ~ "Other",
+    TRUE ~ Treatment))
 ```
 
 ## `case_when()` improved with `stringr`
@@ -737,14 +763,14 @@ data_diet %>%
 
 ```{r}
 data_diet %>% 
-  mutate(Treatment = case_when(
-    str_detect(string = Treatment, pattern = "^g|^G") ~ "Ginger",
+  mutate(Treatment_recoded = case_when(
     str_detect(string = Treatment, pattern = "int") ~ "Peppermint",
-    str_detect(string = Treatment, pattern = "^o|^O") ~ "Other")) %>%
-  count(Treatment)
+    str_detect(string = Treatment, pattern = "^o|^O") ~ "Other",
+    TRUE ~ Treatment)) %>%
+  count(Treatment, Treatment_recoded)
 ```
 
-This is a more robust solution! It will catch typos as long as first letter is correct.
+This is a more robust solution! It will catch typos as long as first letter is correct or there is part of the word mint.
 
 ## That's better!