-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscripts_texto_nota.R
180 lines (131 loc) · 6.23 KB
/
scripts_texto_nota.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
library(palmerpenguins)
library(tidyverse)
# exportar como csv -------------------------------------------------------
# tidyverse
write_csv(penguins, "penguins1.csv")
# R base
write.csv(penguins, "penguins2.csv")
# readr -------------------------------------------------------------------
# leer archivos
penguins_tb <- read_csv("penguins1.csv")
# R base
penguins_df <- read.csv("penguins2.csv")
# tibble ------------------------------------------------------------------
# lo lee como un tibble
head(penguins_tb, n = 3)
# R base lo lee como un data frame
head(penguins_df, n = 3)
# dplyr -------------------------------------------------------------------
# calcular la media de la longitud y profundidad
# del pico por isla, especie, sexo y año
penguins_tb_mean <- penguins_tb |>
group_by(island, species, year) |>
summarise(bill_length_mm_mean = mean(bill_length_mm, na.rm = TRUE),
bill_depth_mm_mean = mean(bill_depth_mm, na.rm = TRUE),
.groups = "drop")
# R base
penguins_df_mean <- aggregate(cbind(bill_length_mm, bill_depth_mm) ~ island + species + year,
data = penguins_tb,
FUN = function(x) mean(x, na.rm = TRUE))
colnames(penguins_df_mean)[4:5] <- c("bill_length_mm_mean", "bill_depth_mm_mean")
# tidyr -------------------------------------------------------------------
# organizar los datos en formato largo en base
# a la longitud y profundidad del pico
penguins_tb_mean_long <- penguins_tb_mean |>
pivot_longer(cols = c(bill_length_mm_mean, bill_depth_mm_mean),
names_to = "bill_variable",
values_to = "bill_value")
# R base
penguins_df_mean_long <- reshape(
data = penguins_df_mean,
varying = list(c("bill_length_mm_mean", "bill_depth_mm_mean")),
v.names = "bill_value",
times = c("bill_length_mm_mean", "bill_depth_mm_mean"),
timevar = "bill_variable",
direction = "long"
)
rownames(penguins_df_mean_long) <- NULL
penguins_df_mean_long$id <- NULL
# stringr -----------------------------------------------------------------
# extraer las dos primeras palabras de la variable
# que hemos creado uniendo la longitud y profundidad del pico
penguins_tb_mean_long_str <- penguins_tb_mean_long |>
mutate(bill_str = word(bill_variable, start = 1L, end = 2L, sep = "_"))
# R base
penguins_df_mean_long$bill_str <- sapply(strsplit(penguins_df_mean_long$bill_variable, "_"),
function(x) paste(x[1:2], collapse = "_"))
penguins_df_mean_long_str <- penguins_df_mean_long
# forcats -----------------------------------------------------------------
# reordenar los niveles del factor especies manualmente
penguins_tb_mean_long_str_for <- penguins_tb_mean_long_str |>
mutate(species = fct_relevel(species, c("Chinstrap", "Adelie", "Gentoo")))
# fct_relevel() no altera directamente el orden de los niveles en la tabla,
# si no que los reordena internamente para determinados procesos, como por
# ejemplo, una mejor visualizacion en los graficos
# R base
penguins_df_mean_long_str$species <- factor(penguins_df_mean_long_str$species,
levels = c("Chinstrap", "Adelie", "Gentoo"))
penguins_df_mean_long_str_for <- penguins_df_mean_long_str
# ggplot2 -----------------------------------------------------------------
# generar una grafica mostrando la longitud y
# profundidad del pico de cada especie
ggplot(penguins_tb_mean_long_str_for,
aes(x = bill_str, y = bill_value, color = species)) +
scale_color_manual(values = c("red", "green", "blue")) +
geom_boxplot() +
labs(x = "variable", y = "mm")
# R base
boxplot(bill_value ~ species * bill_str,
data = penguins_df_mean_long_str_for,
xlab = "species & variable", ylab = "mm",
col = c("red", "green", "blue"))
# purrr -------------------------------------------------------------------
# generar una grafica para cada especie
plot_species_tidy <- function(species_name) {
penguin_species_plot <- penguins_tb_mean_long_str_for |>
filter(species == species_name) |>
ggplot(aes(x = bill_str, y = bill_value)) +
geom_boxplot() +
labs(x = "variable", y = "mm") +
ggtitle(species_name)
return(penguin_species_plot)
}
map(.x = levels(penguins_tb_mean_long_str_for$species),
.f = plot_species_tidy)
# R base
plot_species_base <- function(species_name) {
species_data <- subset(penguins_df_mean_long_str_for, species == species_name)
penguin_species_plot <- boxplot(bill_value ~ bill_str, data = species_data,
xlab = "variable", ylab = "mm",
main = species_name)
return(penguin_species_plot)
}
lapply(X = levels(penguins_df_mean_long_str_for$species),
FUN = plot_species_base)
# pipe -------------------------------------------------------------------
# grafica de tendencia media anual de masa corporal para cada especie
plot_mass_island <- read_csv("penguins1.csv") |>
group_by(year, species) |>
summarise(body_mass_g = mean(body_mass_g, na.rm = TRUE)) |>
ggplot(aes(x = year, y = body_mass_g, col = species)) +
geom_line() +
scale_x_continuous(breaks = c(2007, 2008, 2009)) +
labs(x = "year", y = "mean body mass (g)")
plot_mass_island
# R base
penguins_df <- read.csv("penguins2.csv")
aggregated_data <- aggregate(body_mass_g ~ year + species, data = penguins,
FUN = function(x) mean(x, na.rm = TRUE))
par(mar = c(5, 4, 4, 8), xpd = TRUE)
plot(aggregated_data$year, aggregated_data$body_mass_g, type = "n", xlab = "year", ylab = "mean body mass (g)", xaxt = "n")
species_data_list <- split(aggregated_data, aggregated_data$species)
plot_species_line <- function(data, color) {
lines(data$year, data$body_mass_g, col = color, type = "o")
}
species_colors <- c("Adelie" = "red", "Chinstrap" = "green", "Gentoo" = "blue")
lapply(names(species_data_list), function(species) {
plot_species_line(species_data_list[[species]], species_colors[[species]])
})
axis(1, at = c(2007, 2008, 2009))
legend("topright", inset = c(-0.3, 0.3),
legend = names(species_colors), col = species_colors, lty = 1, title = "Species")