-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCIS-dump-NZAC.R
483 lines (367 loc) · 20.9 KB
/
CIS-dump-NZAC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
## R Script to process data exported from the CIS databases ##
# Author: B.S. Weir (2017-2022)
#============Load all the packages needed================
library(tidyverse)
library(lubridate)
library(RColorBrewer)
library(janitor)
#============Load and subset data================
NZAC.as.imported.df <- read_csv("NZAC-export-29-jul-2019.csv",
guess_max = Inf, #assign column types
show_col_types = FALSE) %>%
glimpse()
NZAC.df <- NZAC.as.imported.df %>%
distinct(AccessionNumber, .keep_all= TRUE) %>% #remove dupes
glimpse()
#============Check imported data for issues================
# get duplicates based due to component duplication
# may need correction in CIS if TaxonName_C2 = NA, export as a CSV
NZAC.dupes <- NZAC.as.imported.df %>%
get_dupes(AccessionNumber) %>%
select(AccessionNumber, dupe_count, CurrentName, TaxonName_C2, Substrate_C2, PartAffected_C2) %>%
#filter(is.na(TaxonName_C2)) %>% #comment this out to get all
write_csv(file='./outputs/NZAC/NZAC.dupes.csv')
#============Quick data check================
#have a quick look at the data
head(NZAC.df)
#save a summary of the data to txt
NZAC.string.factors <- read.csv("NZAC-export-29-jul-2019.csv",
stringsAsFactors = TRUE) %>%
summary(maxsum=25) %>%
capture.output(file='./outputs/NZAC/NZAC-summary.txt')
#============Type Specimens================
#ggplot code for type Specimens
d <- subset(NZAC.df,!(TypeStatus == ""))
attach(d) #this means we don't need the $ sign
require(ggplot2)
p <- ggplot(d, aes(TypeStatus)) + labs(title = "Types in the NZAC") + labs(x = "'Kind' of type", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_types.png', width=10, height=10)
#another one showing just the number of types in each kind of culture?
#ggplot code for type Specimens factored by Specimen type
d <- subset(NZAC.df,!(TypeStatus == ""))
attach(d) #this means we don't need the $ sign
require(ggplot2)
p <- ggplot(d, aes(TypeStatus, fill=SpecimenType)) + labs(title = "Types in the NZAC") + labs(x = "'Kind' of type", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC.types.by.kind.png', width=10, height=10)
#============Kingdom Level barcharts================
#plain code for a kingdom barchart
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(SpecimenType)) + labs(title = "Specimens in the NZAC by Specimen type") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_kingdoms.png', width=7, height=7)
#kingdoms in GenBank
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(SpecimenType, fill=GenBank)) + labs(title = "Specimens in the NZAC in GenBank") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
#ggsave(print_bars, file='NZAC_kingdoms_genbank.png', width=7, height=7)
#kingdoms with literature
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(SpecimenType, fill=Literature)) + labs(title = "Specimens in the NZAC in Literature") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_kingdoms_Literature.png', width=7, height=7)
#kingdoms with images
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(SpecimenType, fill=Images)) + labs(title = "Specimens in the NZAC with images") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_kingdoms_images.png', width=7, height=7)
#could also do a stacked bar chart with images, genbank, literature all on one chart.
#kingdoms by Occurrence Description
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(SpecimenType, fill=OccurrenceDescription)) + labs(title = "Specimens in the NZAC by occurrence in NZ") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_kingdoms_occurrence.png', width=7, height=7)
#CollectionEventMethod
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(CollectionEventMethod)) + labs(title = "Specimens in the NZAC by Collection Event Method") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_CollectionEventMethod.png', width=7, height=7)
#kingdoms by Occurrence Description
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(OccurrenceDescription)) + labs(title = "Specimens in the NZAC by occurrence in NZ") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_kingdoms_occurrence2.png', width=7, height=7)
#kingdoms by Order Status
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(SpecimenType, fill= LoanStatus)) + labs(title = "NZAC Order Status") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_kingdoms_ LoanStatus.png', width=7, height=7)
#kingdoms by last updated by
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(SpecimenType, fill= UpdatedBy)) + labs(title = "NZAC Last updated by") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_kingdoms_updated_by.png', width=7, height=7)
#need a kingdoms by NZ Specimens??
#============High Taxonomy================
#Phylum
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(Phylum)) + labs(title = "NZAC by phylum") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_phylum.png', width=10, height=10)
#ggplot code for Class
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(Class)) + labs(title = "NZAC by class") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_class.png', width=10, height=10)
#ggplot code for Order
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(Order)) + labs(title = "NZAC by order") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_order.png', width=10, height=10)
#ggplot code for Order
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(Order, fill=SpecimenType)) + labs(title = "NZAC by order") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_order-speciemtype.png', width=10, height=10)
#ggplot code for Family
attach(NZAC.df)
require(ggplot2)
p <- ggplot(NZAC.df, aes(Family)) + labs(title = "NZAC by family") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
#ggsave(print_bars, file='NZAC_family.png', width=20, height=10)
# ----- fungal taxon grouping -----
f <- subset(NZAC.df, SpecimenType == "Fungal Culture")
#ggplot code for fungal Phylum
attach(f)
require(ggplot2)
p <- ggplot(f, aes(Phylum)) + labs(title = "NZAC by fungal phylum") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_fungal-phylum.png', width=10, height=10)
#ggplot code for fungal Class
attach(f)
require(ggplot2)
p <- ggplot(f, aes(Class)) + labs(title = "NZAC by fungal class") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_fungal-class.png', width=10, height=10)
#ggplot code for fungal Order
attach(f)
require(ggplot2)
p <- ggplot(f, aes(Order)) + labs(title = "NZAC by fungal order") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_fungal-order.png', width=10, height=10)
#ggplot code for fungal Family
attach(f)
require(ggplot2)
p <- ggplot(f, aes(Family)) + labs(title = "NZAC by fungal family") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_fungal-family.png', width=20, height=10)
#============Other names================
# error Kingdom is missing
names.present.fungi <- subset(NZAC.df,(Kingdom == "Fungi" & OccurrenceDescription == "Present"))
summary(names.present.fungi, maxsum=40)
#ggplot code for fungal Phylum
require(ggplot2)
p <- ggplot(names, aes(names$Phlum)) + labs(title = "names by phylum") + labs(x = "Taxon", y = "number of names")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='names-phylum.png', width=10, height=10)
#ggplot code for fungal Phylum
require(ggplot2)
p <- ggplot(names, aes(names$Phlum, fill=OccurrenceDescription)) + labs(title = "names by phylum") + labs(x = "Taxon", y = "number of names")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='names-phylum-occurrence.png', width=10, height=10)
#ggplot code for Kingdom
require(ggplot2)
p <- ggplot(names, aes(names$Kingdom, fill=OccurrenceDescription)) + labs(title = "names by Kingdom") + labs(x = "Taxon", y = "number of names")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='names-Kindom-occurrence.png', width=10, height=10)
#ggplot code for Kingdom biostatus
require(ggplot2)
p <- ggplot(names, aes(names$Kingdom, fill=BioStatusDescription)) + labs(title = "names by Kingdom") + labs(x = "Taxon", y = "number of names")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='names-Kindom-biostatus.png', width=10, height=10)
#ggplot code for Fungal Family present in NZ
require(ggplot2)
p <- ggplot(names.present.fungi, aes(names.present.fungi$Family)) + labs(title = "names by family in NZ") + labs(x = "Taxon", y = "number of species")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='names-Family-occurrence-NZ.png', width=10, height=35)
#============Countries================
NZAC.df.NZ <- subset(NZAC.df,(Country == "New Zealand"))
attach(NZAC.df.NZ)
require(ggplot2)
p <- ggplot(NZAC.df, aes(NZAreaCode)) + labs(title = "NZ Specimens in the NZAC by Area Code") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_NZAreaCode.png', width=7, height=7)
#ggplot code for country
cy <- subset(NZAC.df,!(Country == ""))
require(ggplot2)
con <- ggplot(cy, aes(Country)) + labs(title = "Top 10 Countries") + labs(x = "Country", y = "number")
con <- con + theme(axis.text.x=element_text(angle=-90, hjust=0))
con + geom_bar()+ coord_flip()
print_bars <- con + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_country.png', width=10, height=10)
#ggplot code for top ten countries by specimen type
positions <- c("New Zealand", "United States", "Australia", "United Kingdom", "Brazil", "Japan", "India", "China", "Italy", "France")
c <- subset(NZAC.df, (Country == "New Zealand" | Country == "United States" | Country == "Australia" | Country == "United Kingdom" | Country == "Brazil" | Country == "Japan" | Country == "India" | Country == "France" | Country == "China" | Country == "Italy"))
attach(c) #this means we don't need the $ sign
require(ggplot2)
con <- ggplot(c, aes(Country, fill=SpecimenType)) + labs(title = "Top 10 Countries in the NZAC") + labs(x = "Country", y = "number of isolates")
con <- con + theme(axis.text.x=element_text(angle=-90, hjust=0))
con + geom_bar()+ coord_flip() + scale_x_discrete(limits = positions)
print_bars <- con + geom_bar()+ coord_flip() + scale_x_discrete(limits = positions)
ggsave(print_bars, file='NZAC_country_by_kind.png', width=6, height=5)
ggsave(print_bars, file='NZAC_country_by_kind.svg', width=6, height=5)
ggsave(print_bars, file='NZAC_country_by_kind.eps', width=6, height=5)
#ggplot code for pacific country
c <- subset(NZAC.df, (Country == "Fiji" | Country == "American Samoa" | Country == "Cook Islands" | Country == "Solomon Islands" | Country == "Micronesia" | Country == "New Caledonia" | Country == "Niue" | Country == "Norfolk Island" | Country == "Samoa" | Country == "Vanuatu"))
attach(c) #this means we don't need the $ sign
require(ggplot2)
con <- ggplot(c, aes(Country, fill=SpecimenType)) + labs(title = "Pacific Countries Specimens in the NZAC") + labs(x = "Country", y = "number of isolates")
con <- con + theme(axis.text.x=element_text(angle=-90, hjust=0))
con + geom_bar()+ coord_flip()
print_bars <- con + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC-pacific-countries.png', width=10, height=10)
#ggplot code for country
positions <- c("United States", "Australia", "United Kingdom", "Brazil", "Japan", "India", "China", "France", "Italy", "Canada")
c <- subset(NZAC.df, (Country == "Canada" | Country == "United States" | Country == "Australia" | Country == "United Kingdom" | Country == "Brazil" | Country == "Japan" | Country == "India" | Country == "France" | Country == "China" | Country == "Italy"))
attach(c) #this means we don't need the $ sign
require(ggplot2)
con <- ggplot(c, aes(Country, fill=SpecimenType)) + labs(title = "Top 10 Countries in the NZAC (not including NZ)") + labs(x = "Country", y = "number of isolates")
con <- con + theme(axis.text.x=element_text(angle=-90, hjust=0))
con + geom_bar()+ coord_flip() + scale_x_discrete(limits = positions)
print_bars <- con + geom_bar()+ coord_flip() + scale_x_discrete(limits = positions)
ggsave(print_bars, file='NZAC_country_by_kind_not_nz.png', width=10, height=10)
## could make a pseudo dataset manually replaceing all non target countries with "other"
#============Over time================
# also do a trend line of growth. so do a scatterplot and fir a trend line to project growth.
# Do on recived date and check for any blanks
#can do a culmalative graph?
attach(NZAC.df)
require(ggplot2)
di <- ggplot(NZAC.df, aes(as.Date(CollectionDateISO))) + labs(title = "Collection dates of NZAC Specimens") + labs(x = "Date of collection", y = "Number of Specimens" , fill = "")
di <- di + scale_x_date()
di + geom_histogram(binwidth=365.25) # this is a bin of two years binwidth=730
dip <- di + geom_histogram(binwidth=365.25)
ggsave(dip, file='NZAC-collection-dates.png', width=5, height=5)
ggsave(dip, file='NZAC-collection-dates.svg', width=5, height=5)
ggsave(dip, file='NZAC-collection-dates.eps', width=5, height=5)
NZAC.df$topcontrib <- ifelse(NZAC.df$Contributor == "NZP", "NZP", "other")
NZAC.df$topcontrib
attach(NZAC.df) #this means we don't need the $ sign
require(ggplot2)
ditc <- ggplot(NZAC.df, aes(as.Date(IsolationDateISO, fill=Contributor))) + labs(title = "Isolation dates of NZAC Specimens") + labs(x = "Date of isolation", y = "Number of Specimens" , fill = "")
ditc <- ditc + scale_x_date()
ditc + geom_histogram(binwidth=365.25) # this is a bin of two years binwidth=730
ditcp <- ditc + geom_histogram(binwidth=365.25)
ggsave(dip, file='NZAC-isolation-dates2.png', width=4, height=3)
attach(NZAC.df) #this means we don't need the $ sign
require(ggplot2)
dr <- ggplot(NZAC.df, aes(as.Date(ReceivedDateISO))) + labs(title = "Received dates of NZAC Specimens") + labs(x = "Date of Receipt", y = "Number of Specimens" , fill = "") #Alternatively, dates can be specified by a numeric value, representing the number of days since January 1, 1970. To input dates stored as the day of the year, the origin= argument can be used to interpret numeric dates relative to a different date.
dr <- dr + scale_x_date()
dr + geom_histogram(binwidth=365.25) + geom_hline(yintercept=392, linetype=2) + scale_x_continuous(breaks = scales::pretty_breaks(n = 10))
drp <- dr + geom_histogram(binwidth=365.25) + geom_hline(yintercept=392, linetype=2)
ggsave(drp, file='NZAC-received-dates.png', width=10, height=10)
## CAN we do this by organism too?
attach(NZAC.df) #this means we don't need the $ sign
require(ggplot2)
dr <- ggplot(NZAC.df, aes(as.Date(ReceivedDateISO),fill=SpecimenType)) + labs(title = "Received dates of NZAC Specimens") + labs(x = "Date of Receipt", y = "Number of Specimens" , fill = "") #Alternatively, dates can be specified by a numeric value, representing the number of days since January 1, 1970. To input dates stored as the day of the year, the origin= argument can be used to interpret numeric dates relative to a different date.
dr <- dr + scale_x_date()
dr + geom_hline(yintercept=392, linetype=3) + geom_histogram(binwidth=365.25)
drp <- dr + geom_histogram(binwidth=365.25) + geom_hline(yintercept=392, linetype=2)
ggsave(drp, file='NZAC-received-dates-organism.png', width=15, height=10)
sum2 <- ggplot_build(drp) #this extracts the values from the histogram
sum2
attach(NZAC.df) #this means we don't need the $ sign
require(ggplot2)
dr <- ggplot(NZAC.df, aes(as.Date(ReceivedDateISO),fill=topcontrib)) + labs(title = "Main Contributors to the NZAC collection") + labs(x = "Date of Receipt", y = "Number of Specimens" , fill = "") #Alternatively, dates can be specified by a numeric value, representing the number of days since January 1, 1970. To input dates stored as the day of the year, the origin= argument can be used to interpret numeric dates relative to a different date.
dr <- dr + scale_x_date()
dr + geom_hline(yintercept=392, linetype=3) + geom_histogram(binwidth=365.25)
drp <- dr + geom_histogram(binwidth=365.25) + geom_hline(yintercept=392, linetype=2)
ggsave(drp, file='NZAC-received-dates-contributor.png', width=15, height=10)
NZAC.df$topcontrib <- ifelse(NZAC.df$Contributor == "NZP", "NZP", "other")
NZAC.df$topcontrib
#ggplot code for collections over the years
attach(NZAC.df) #this means we don't need the $ sign
require(ggplot2)
con <- ggplot(c, aes(Country, fill=SpecimenType)) + labs(title = "Pacific Countries Specimens in the NZAC") + labs(x = "Country", y = "number of isolates")
con <- con + theme(axis.text.x=element_text(angle=-90, hjust=0))
con + geom_histogram()+ coord_flip()
print_bars <- con + geom_histogram()+ coord_flip()
ggsave(print_bars, file='NZAC-pacific-countries.png', width=10, height=10)
#ggplot code for collections over the years in NZ
c <- subset(NZAC.df, (Country == "New Zealand"))
#also need something that plots monthly e.g. fungi versus collection month.
#======MAPS========
#Using GGPLOT, plot the Base World Map
mp <- NULL
mapWorld <- borders("world", colour="gray50", fill="gray50") # create a layer of borders
mp <- ggplot() + mapWorld
#Now Layer the cities on top
mp <- mp+ geom_point(aes(x=visit.x, y=visit.y) ,color="blue", size=3)
mp
#======On Hosts========
# subset out kiwifruit
NZAC.df.kiwifruit <- subset(NZAC.df,(TaxonName_C2 == "Actinidia deliciosa"))
#ggplot code for bacterial Class
attach(b)
require(ggplot2)
p <- ggplot(NZAC.df.kiwifruit, aes(Family)) + labs(title = "Family of microbes on kiwifruit in the NZAC") + labs(x = "Taxon", y = "number of isolates")
p <- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
p + geom_bar()+ coord_flip()
print_bars <- p + geom_bar()+ coord_flip()
ggsave(print_bars, file='NZAC_kiwifruit-family.png', width=10, height=10)