Skip to content

Commit

Permalink
fungi taxonomy working - tested on vida LCs
Browse files Browse the repository at this point in the history
  • Loading branch information
stevenpbachman committed May 24, 2024
1 parent 1df22ec commit 3388666
Show file tree
Hide file tree
Showing 16 changed files with 4,126 additions and 117 deletions.
2 changes: 1 addition & 1 deletion R/get_name_keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#' to get native ranges. To see a wider range of plausible matches adjust 'tax_status' and 'match' to 'any'.

# add option to determine which sources you want to search e.g. WCVP for plants, or IF for fungi
get_name_keys <- function(df, name_column, tax_status = "accepted", match = "single", kingdom = "plantae") {
get_name_keys <- function(df, name_column, tax_status = "any", match = "single", kingdom = "plantae") {
# search terms
search_names <- as.vector(unlist(df[, name_column]))

Expand Down
10 changes: 6 additions & 4 deletions R/make_sis_csvs.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@

make_sis_csvs <-
function(unique_id, wcvp_ipni_id, first_name, second_name, email,
institution, gbif_ref, native_ranges, kingdom = "plantae", powo_ref = FALSE) {
institution, gbif_ref = NULL, native_ranges,
family, genus, species, taxonomicAuthority,
kingdom = "plantae", powo_ref = FALSE) {

if (kingdom == "plantae") {

Expand All @@ -26,9 +28,9 @@ make_sis_csvs <-
plantspecific <- sis_plantspecific(unique_id, kingdom)
habitats <- sis_habitats(unique_id)
credits <- sis_credits(unique_id, first_name, second_name, email, affiliation = institution)
taxonomy <- sis_taxonomy(unique_id, family, genus, species, taxonomicAuthority)

# need to embed map into the function, but these are a bit awkward - try again later
taxonomy <- purrr::map2(unique_id, wcvp_ipni_id, sis_taxonomy) %>% dplyr::bind_rows()
references <- purrr::map_dfr(unique_id, sis_references, powo_ref = powo_ref, gbif_ref)

return(
Expand All @@ -48,14 +50,14 @@ make_sis_csvs <-
plantspecific <- sis_plantspecific(unique_id, kingdom)
habitats <- sis_habitats(unique_id)
credits <- sis_credits(unique_id, first_name, second_name, email, affiliation = institution)
taxonomy <- sis_taxonomy(unique_id, family,genus, species, taxonomicAuthority)

# need to embed map into the function, but these are a bit awkward - try again later
#taxonomy <- purrr::map2(unique_id, wcvp_ipni_id, sis_taxonomy) %>% dplyr::bind_rows()
references <- purrr::map_dfr(unique_id, sis_references, gbif_ref, powo_ref)

return(
list(allfields = allfields, assessments = assessments, plantspecific = plantspecific,
habitats = habitats, credits = credits, references = references
habitats = habitats, credits = credits, taxonomy = taxonomy, references = references
)
)
}
Expand Down
43 changes: 35 additions & 8 deletions R/name_search_gbif.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ name_search_gbif = function(name, species_rank = TRUE, match = "single", gbif_ta
default_tbl = gbif_name_tbl_(name)

# search using verbose to get fuzzy alternatives
matches = rgbif::name_backbone_verbose(name = name,
strict = FALSE)
matches = rgbif::name_backbone_verbose(name = name, strict = FALSE, rank = 'species')

# bind together in case there are missing data
matches = dplyr::bind_rows(matches$alternatives, matches$data)
Expand All @@ -47,26 +46,50 @@ name_search_gbif = function(name, species_rank = TRUE, match = "single", gbif_ta

results$searchName = name # adding the search name back in to results table

results = dplyr::select(results, colnames(default_tbl))
results = dplyr::arrange(results, dplyr::desc(confidence))
}
# check if family column exists (not for some fungi)
if ("family" %in% colnames(results)) {
# family column exists
results = dplyr::select(results, colnames(default_tbl))
results = dplyr::arrange(results, dplyr::desc(confidence))
}
else
# family column does not exist (for some fungi)
{
default_tbl = default_tbl %>% dplyr::select(-family)
results = dplyr::select(results, colnames(default_tbl))
results$family <- "incertae sedis"
results = dplyr::arrange(results, dplyr::desc(confidence))
}

# option to filter on GBIF accepted species only
if (gbif_tax_stat == "any"){
results = results
} else {
if (gbif_tax_stat == "accepted") {
results = dplyr::filter(results, status == "ACCEPTED")}
results = dplyr::filter(results, status == "ACCEPTED")
}
}

# option to filter on maximum confidence from GBIF search - one option only "single"
# or allow list of options "any"
if (match == "single") {
results = dplyr::slice_max(results, confidence, n = 1, with_ties = FALSE)
} else {
if (match == "any")
if (match == "any") {
results = results
}
}

#Check if the 'scientificName' column contains NA values
if (!is.na(results$scientificName)) {
gen_sp_auth <- rgbif::name_parse(results$scientificName) %>%
dplyr::select(genusorabove, specificepithet, authorship) %>%
dplyr::rename(genus = genusorabove,
species = specificepithet,
taxonomicAuthority = authorship)
#results <- results %>% dplyr::select(-genus, -species, -taxonomicAuthority)
results <- dplyr::bind_cols(results, gen_sp_auth)
}

return(results)

Expand All @@ -85,6 +108,10 @@ gbif_name_tbl_ = function(query) {
status = NA_character_,
rank = NA_character_,
confidence = NA_integer_,
#family = NA_character_
family = NA_character_
#genus = NA_character_,
#species = NA_character_,
#taxonomicAuthority = NA_character_
)
}
}
2 changes: 1 addition & 1 deletion R/sis_references.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @export
#'

sis_references = function(unique_id, gbif_ref, powo_ref) {
sis_references = function(unique_id, gbif_ref = NULL, powo_ref = NULL) {

LCr_ref <-
tibble::tibble(
Expand Down
18 changes: 6 additions & 12 deletions R/sis_taxonomy.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,21 @@
#' @return Returns an SIS compliant csv file
#' @export

sis_taxonomy = function(unique_id, wcvp_ipni_id){

name_info <- powo_lookup(wcvp_ipni_id)
sis_taxonomy = function(unique_id, family, genus, species, taxonomicAuthority){

tax = tibble::tibble(
internal_taxon_id = unique_id,
family = name_info$family,
genus = name_info$genus,
species = name_info$species,
taxonomicAuthority = name_info$authors)
family = family,
genus = genus,
species = species,
taxonomicAuthority = taxonomicAuthority
)

tax = dplyr::inner_join(tax, iucn_taxonomy, by="family")
tax = tax[c(1, 7:10, 2:5)]

tax$infratype = ""
tax$infra_name = ""
tax$infra_authority = ""

tax <- tax %>%
dplyr::rename("kingdom" = "kingdomName") %>%
dplyr::rename("phylum" = "phylumName")

return(tax)
}
49 changes: 41 additions & 8 deletions data-raw/fungi_workflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,51 @@ institution = "Royal Botanic Gardens, Kew"
# species list
lc_species <-
data.frame(sp = c(
"Russula amethystina",
"Lactarius decipiens",
"Poa annua"
"Asplenium longissimum Blume",
"Asplenium macrophyllum Sw.",
"Asplenium nidus L.",
"Boerhavia repens L. ",
"Canavalia cathartica Thouars",
"Cassytha filiformis L.",
"Dactyloctenium ctenoides (Steud.) Bosser",
"Euphorbia stoddartii Fosberg",
"Ipomoea violacea L.",
"test test",
"Lagrezia micrantha (Bak.)Schinz."
#"lntsia bijuga Colebr.) Kuntze",
# "Lepturus repens G.Forst.) R.Br.",
# "Triumfetta procumbens Forst.",
# #"lpomoea pes-caprae (L.) R.Br.",
# #"Pisonia grandis R.Br.",
# "Pteris tripartita Sw.",
# "Scaevola taccada (Gaertn.) Roxb.",
# "Sida pusilla Cav.",
# "Stenotaphrum micranthum (Desv.) C.E. Hubb.",
# #"Thelypteris opulentum (Kaulf.) J.P.Roux",
# "Triumfetta procumbens Forst."
))
print(lc_species)

# gbif_names_out <- purrr::map_dfr(chagos_names$name_in,
# name_search_gbif,
# match = match,
# gbif_tax_stat = "any")

# get the keys
lc_keys <-
get_name_keys(
lc_species,
name_column = "sp",
tax_status = "accepted",
#lc_species,
Confident.LC.fungi.1.3.24_VS,
#name_column = "sp",
name_column = "acceptedNameAuthor",
tax_status = "any",
match = "single",
kingdom = "fungi" # note this needs to change to remove the WCVP name check
kingdom = "fungi" # setting to fungi removes the WCVP check
)

# filter out the not accepted if you like
lc_keys <- lc_keys %>% dplyr::filter(GBIF_status == "ACCEPTED")

# make the LC points csv file
lc_points <- make_LC_points(lc_keys, range_check = FALSE)

Expand All @@ -42,7 +71,11 @@ sis_occs <- make_sis_csvs(unique_id = lc_keys$GBIF_usageKey,
second_name = "Bachman",
email = "[email protected]",
institution = "Royal Botanic Gardens, Kew",
gbif_ref = lc_points$citation,
family = lc_keys$GBIF_family,
genus = lc_keys$GBIF_genus,
species = lc_keys$GBIF_species,
taxonomicAuthority = lc_keys$GBIF_taxonomicAuthority,
#gbif_ref = lc_points$citation,
#native_ranges = lc_points$native_ranges, # remove this for fungi
kingdom = "fungi" # set to fungi
)
Expand Down
Loading

0 comments on commit 3388666

Please sign in to comment.