Skip to content

Commit

Permalink
closes #6. fungi csvs working
Browse files Browse the repository at this point in the history
  • Loading branch information
stevenpbachman committed May 31, 2024
1 parent 45244fb commit 93bd920
Show file tree
Hide file tree
Showing 22 changed files with 232 additions and 171 deletions.
17 changes: 7 additions & 10 deletions R/get_name_keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#'
#' @param df character) Data frame with taxon names
#' @param name_col (character) Column for taxon names. Include taxonomic authority for better matching
#' @param tax_status (character) Default `any` provides all taxonomic, otherwise 'accepted'
#' @param match (character) Controls the output of the search. Use `single` to
#' force a single match result that has the highest confidence or `any` to return
#' all possible matches sorted in order of confidence
Expand All @@ -12,24 +11,23 @@
#'
#' @return Returns a data frame with accepted GBIF and POWO identifiers
#' @export
#' @details Designed for batch processing. Default setting (tax_status = "accepted" and match = "single")
#' returns an accepted name with the best single match against GBIF and POWO name backbones.
#' Final list may return fewer names if there are discrepancies e.g. accepted
#' @details Designed for batch processing. Default setting (match = "single", kingdom = "plantae")
#' returns an accepted name with the best single match against GBIF and POWO name backbones (GBIF
#' only for kingdom = "fungi"). Final list may return fewer names if there are discrepancies e.g. accepted
#' in GBIF, but not in POWO. Output data frame includes GBIF 'usageKey' that can be used with
#' [`get_gbif_occs()`] to get occurrences from GBIF, and 'wcvp_ipni_id' that can be used with [`powo_range()`]
#' to get native ranges. To see a wider range of plausible matches adjust 'tax_status' and 'match' to 'any'.
#' to get native ranges. To see a wider range of plausible matches adjust 'match' to 'any'.

# add option to determine which sources you want to search e.g. WCVP for plants, or IF for fungi
get_name_keys <- function(df, name_column, tax_status = "any", match = "single", kingdom = "plantae") {
get_name_keys <- function(df, name_column, match = "single", kingdom = "plantae") {
# search terms
search_names <- as.vector(unlist(df[, name_column]))

# get the GBIF keys
gbif_names_out <-
purrr::map_dfr(search_names,
name_search_gbif,
match = match,
gbif_tax_stat = tax_status)
match = match)
colnames(gbif_names_out) <- paste0("GBIF", "_", colnames(gbif_names_out))

keys_df <- gbif_names_out
Expand All @@ -39,8 +37,7 @@ get_name_keys <- function(df, name_column, tax_status = "any", match = "single",
# get the POWO keys
powo_names_out <-
name_search_powo(df = df,
name_column = name_column,
powo_tax_stat = tax_status)
name_column = name_column)

# bind them together - need to fix when WCVP returns multiple matches
keys_df <-
Expand Down
5 changes: 2 additions & 3 deletions R/get_native_range.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
#' Get native ranges for taxa
#'
#' @param keys (data frame) Contain identifier for the taxon e.g. derived from [`get_name_keys()`]
#' @param keys (name_col) Column name for the identifier
#'
#' @return (data frame) A list of places where a taxon occurs.
#' @return (data frame) A list of 'botanical countries' (World Geogrpahic Scheme for Recording Plant
#' Distributions) where a taxon occurs.
#' @export
#'
#' @details Currently one option to get native ranges from (Plants of the
Expand All @@ -14,7 +14,6 @@
get_native_range <- function(keys) { #,name_col

# get the search ids
#search_ids <- as.vector(unlist(keys[, name_col]))
search_ids <- as.vector(unlist(keys[, "wcvp_ipni_id"]))

# run the powo range function through map_dfr
Expand Down
12 changes: 8 additions & 4 deletions R/get_occs_range.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,26 @@
#'
#' @param sis_points (dataframe) SIS points file
#'
#' @return Occurrence dataframe with ISO two-digit codes added
#' @return Occurrence dataframe with ISO two-digit codes added and internal id

get_occs_range <- function(sis_points) {

sf_use_s2(FALSE)
sf::sf_use_s2(FALSE)

# get unique lat longs
sis_points_sf <- sf::st_as_sf(sis_points, coords = c("dec_long","dec_lat"))

# prepare the tdwg polygon data
st_crs(LCr::tdwg_level3) <- st_crs(tdwg_level3)
st_crs(sis_points_sf) <- st_crs(tdwg_level3)
sf::st_crs(tdwg_level3) <- sf::st_crs(tdwg_level3)
sf::st_crs(sis_points_sf) <- sf::st_crs(tdwg_level3)

# do the intersect
out <- sf::st_join(sis_points_sf, tdwg_level3)

# tidy up the table with just the required fields
out <- out %>% dplyr::select("LEVEL3_COD", "sci_name", "internal_taxon_id") %>%
sf::st_drop_geometry() %>% unique() %>% tidyr::drop_na() %>% dplyr::arrange(sci_name)

return(out)

}
3 changes: 1 addition & 2 deletions R/make_LC_points.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @param institution (character) Name of institution or affiliation
#' @param range_check (boolean) TRUE if you want to carry out occurrence cleaning with native range filter
#'
#' @return (list) Includes the GBIF citation, and "points" the cleaned SIS compatible point file
#' @return (list) Includes the GBIF "citation", and "points" the cleaned SIS compatible point file
#' @export
#' @details Designed for batch processing using keys derived from [`get_name_keys()`]

Expand Down Expand Up @@ -52,7 +52,6 @@ make_LC_points <-
res_list <- list("citation" = gbif_ref, "points" = final_points)
}

#res_list <- list("citation" = gbif_ref, "points" = final_points)
return(res_list)

}
92 changes: 68 additions & 24 deletions R/make_sis_csvs.R
Original file line number Diff line number Diff line change
@@ -1,64 +1,108 @@


#' Generate all SIS connect csv files
#'
#' @param keys (data frame) Derived from [`get_name_keys()`] function. Must include at least GBIF_usageKey to obtain GBIF occurrences
#' @param unique_id (character) Unique identifier - default is the GBIF usage key
#' @param first_name (character) First name of assessor
#' @param second_name (character) Second name of assessor
#' @param email (character) Email of assessor
#' @param institution (character) Name of institution or affiliation
#' @param gbif_ref (data frame) A GBIF download citation according to IUCN format.
#' @param powo_ref (data frame) A citation for use of POWO according to IUCN format.
#' @param native_ranges (data frame) Native ranges derived from [`get_native_range()`]
#' @param family (character) Field containing the family
#' @param genus (character) Field containing the genus
#' @param species (character) Field containing the specific epithet
#' @param taxonomicAuthority (character) Field containing the taxonomic authority
#' @param kingdom (character) Default is 'plantae', but can also be 'fungi'
#'
#' @return Returns an SIS compliant zip file
#' @export

make_sis_csvs <-
function(unique_id, wcvp_ipni_id, first_name, second_name, email,
institution, gbif_ref = NULL, native_ranges,
family, genus, species, taxonomicAuthority,
kingdom = "plantae", powo_ref = FALSE) {

function(unique_id,
wcvp_ipni_id,
first_name,
second_name,
email,
institution,
gbif_ref = NULL,
powo_ref = FALSE,
native_ranges = NULL,
family,
genus,
species,
taxonomicAuthority,
kingdom = "plantae")
{
if (kingdom == "plantae") {

# get most of the csvs here
countries <- sis_countries(unique_id, wcvp_ipni_id, native_ranges)
if (!is.null(native_ranges)) {
countries <- sis_countries(native_ranges)
}
allfields <- sis_allfields(unique_id)
assessments <- sis_assessments(unique_id)
plantspecific <- sis_plantspecific(unique_id, kingdom)
habitats <- sis_habitats(unique_id)
credits <- sis_credits(unique_id, first_name, second_name, email, affiliation = institution)
taxonomy <- sis_taxonomy(unique_id, family, genus, species, taxonomicAuthority)

# need to embed map into the function, but these are a bit awkward - try again later
references <- purrr::map_dfr(unique_id, sis_references, powo_ref = powo_ref, gbif_ref)
# need to embed map into the function, but refs a bit awkward - try again later
references <- purrr::map_dfr(unique_id, sis_references, powo_ref = powo_ref, gbif_ref = gbif_ref)

return(
list(allfields = allfields, assessments = assessments, plantspecific = plantspecific,
habitats = habitats, taxonomy = taxonomy, credits = credits, references = references,
countries = countries
# list of default results - these should always be generated
results <-
list(
allfields = allfields,
assessments = assessments,
plantspecific = plantspecific,
habitats = habitats,
credits = credits,
taxonomy = taxonomy,
references = references
)
)

# countries df depends on native ranges, so only add countries if exists
if (exists("countries")) {
results$countries <- countries
}

return(results)
}

if (kingdom == "fungi") {

# get most of the csvs here
#countries <- sis_countries(unique_id, wcvp_ipni_id, native_ranges)
if (!is.null(native_ranges)) {
countries <- sis_countries(native_ranges)
}
allfields <- sis_allfields(unique_id)
assessments <- sis_assessments(unique_id)
plantspecific <- sis_plantspecific(unique_id, kingdom)
habitats <- sis_habitats(unique_id)
credits <- sis_credits(unique_id, first_name, second_name, email, affiliation = institution)
taxonomy <- sis_taxonomy(unique_id, family,genus, species, taxonomicAuthority)
taxonomy <- sis_taxonomy(unique_id, family, genus, species, taxonomicAuthority)

# need to embed map into the function, but these are a bit awkward - try again later
# need to embed map into the function, but refs a bit awkward - try again later
references <- purrr::map_dfr(unique_id, sis_references, gbif_ref, powo_ref)

return(
list(allfields = allfields, assessments = assessments, plantspecific = plantspecific,
habitats = habitats, credits = credits, taxonomy = taxonomy, references = references
# list of default results - these should always be generated
results <-
list(
allfields = allfields,
assessments = assessments,
plantspecific = plantspecific,
habitats = habitats,
credits = credits,
taxonomy = taxonomy,
references = references
)
)
}

# countries df depends on native ranges, so only add countries if exists
if (exists("countries")) {
results$countries <- countries
}

return(results)

}
}
15 changes: 11 additions & 4 deletions R/make_sis_occs.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ make_sis_occs <- function(occs_clean, first_name = "", second_name = "", institu
basisOfRecord,
elevation,
catalogNumber,
speciesKey,
gbifID
)

Expand All @@ -42,6 +43,7 @@ make_sis_occs <- function(occs_clean, first_name = "", second_name = "", institu
)

sis_points <- dplyr::mutate(sis_points,
internal_taxon_id = speciesKey,
source = paste0("https://www.gbif.org/occurrence/", gbifID),
yrcompiled = format(Sys.Date(), "%Y"),
citation = institution,
Expand All @@ -56,15 +58,20 @@ make_sis_occs <- function(occs_clean, first_name = "", second_name = "", institu
basisofrec,
"FOSSIL_SPECIMEN" = "FossilSpecimen",
"HUMAN_OBSERVATION" = "HumanObservation",
"LITERATURE" = "",
"LIVING_SPECIMEN" = "LivingSpecimen",
"MACHINE_OBSERVATION" = "MachineObservation",
"OBSERVATION" = "",
"PRESERVED_SPECIMEN" = "PreservedSpecimen",
"UNKNOWN" = "Unknown"
"UNKNOWN" = "Unknown",
"OCCURRENCE" = "HumanObservation",
"LITERATURE" = "",
"OBSERVATION" = "HumanObservation",
"MATERIAL_ENTITY" = "",
"MATERIAL_SAMPLE" = "PreservedSpecimen",
"MATERIAL_CITATION" = "PreservedSpecimen"
)
)
sis_points <- dplyr::select(sis_points, -gbifID)

sis_points <- dplyr::select(sis_points, -speciesKey,-gbifID)

return(sis_points)
}
22 changes: 10 additions & 12 deletions R/name_search_gbif.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
#' @param match (character) Controls the output of the search. Use `single` to
#' force a single match result that has the highest confidence or `any` to return
#' all possible matches sorted in order of confidence
#' @param gbif_tax_stat (character) Default `any` provides all taxonomic
#' matches, otherwise `accepted` returns only accepted names according to GBIF
#'
#' @return Returns a data frame with initial search term and matching name(s)
#' @export
Expand All @@ -21,8 +19,8 @@

name_search_gbif = function(name,
species_rank = TRUE,
match = "single",
gbif_tax_stat = "any") {
match = "single") {
#gbif_tax_stat = "any") { #removed as was not working well
# set up default results table
default_tbl = gbif_name_tbl_(name)

Expand Down Expand Up @@ -65,14 +63,14 @@ name_search_gbif = function(name,
results = dplyr::arrange(results, dplyr::desc(confidence))
}

# option to filter on GBIF accepted species only
if (gbif_tax_stat == "any") {
results = results
} else {
if (gbif_tax_stat == "accepted") {
results = dplyr::filter(results, status == "ACCEPTED")
}
}
# # option to filter on GBIF accepted species only
# if (gbif_tax_stat == "any") {
# results = results
# } else {
# if (gbif_tax_stat == "accepted") {
# results = dplyr::filter(results, status == "ACCEPTED")
# }
# }

# option to filter on maximum confidence from GBIF search - one option only "single"
# or allow list of options "any"
Expand Down
18 changes: 8 additions & 10 deletions R/name_search_powo.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,21 @@
#'
#' @param df (data frame) Taxon name(s)
#' @param name_column (string) Column that contains the name(s)
#' @param powo_tax_stat (character) Default `any` provides all taxonomic
#' matches, otherwise `accepted` returns only accepted names according to POWO
#'
#' @return Returns a data frame with initial search term and matching name(s)
#' @export


name_search_powo <- function(df, name_column, powo_tax_stat = "any"){
name_search_powo <- function(df, name_column){
#name_df <- data.frame(name = name)
results <- rWCVP::wcvp_match_names(names_df = df, name_col = name_column)

# allow user to filter on accepted name only
if (powo_tax_stat == "any"){
results = results
} else {
if (powo_tax_stat == "accepted") {
results = dplyr::filter(results, wcvp_status == "Accepted")
}}
# # allow user to filter on accepted name only
# if (powo_tax_stat == "any"){
# results = results
# } else {
# if (powo_tax_stat == "accepted") {
# results = dplyr::filter(results, wcvp_status == "Accepted")
# }}
return(results)
}
Loading

0 comments on commit 93bd920

Please sign in to comment.