diff --git a/DESCRIPTION b/DESCRIPTION index 154034e..dd9434c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cori.data.fcc Title: Process FCC data -Version: 0.1.0 +Version: 0.1.2 Authors@R: person(given="Olivier", family="Leroy", email="olivier.leroy@ruralinnovation.us", role = c("aut", "cre")) Description: Functions to get and process FCC data. @@ -11,20 +11,18 @@ RoxygenNote: 7.3.2 Depends: R (>= 2.10) LazyData: true -Suggests: - testthat (>= 3.0.0), - pkgdown, +Suggests: dplyr, DT, knitr, - rmarkdown + pkgdown, + rmarkdown, + testthat (>= 3.0.0) Config/testthat/edition: 3 -Imports: - curl, +Imports: + duckdb, jsonlite, - utils, DBI, - duckdb, stringi URL: https://ruralinnovation.github.io/cori.data.fcc/ Config/Needs/website: rmarkdown diff --git a/NAMESPACE b/NAMESPACE index 2f90b6c..800a7aa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,8 +9,5 @@ export(get_frn_nbm_bl) export(get_nbm_available) export(get_nbm_bl) export(get_nbm_release) -export(set_user_agent) -export(user_agent) import(DBI) -import(duckdb) importFrom(stringi,stri_pad_left) diff --git a/NEWS.md b/NEWS.md index 82a24f2..90bf9b2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,28 @@ +# cori.data.fcc 0.1.2 + +## Minor improvements + +* Use CORI-brewed `download_file` function; global user_agent no longer needed + + +## cori.data.fcc 0.1.1 + +## Minor improvements + +* correct typos in data stories Thanks @camdenblatchly + +* update with FCC new API request that is needed a referer + +* bring back the changelog that you are reading + + # cori.data.fcc 0.1.0 ## Major Changes ### New datasets -* Add NBM Block: CORI opinionted version designed at the Census block level +* Add NBM Block: CORI opinionated version designed at the Census block level * Add NBM raws, past 4 releases @@ -14,7 +32,7 @@ * `get_nbm_bl()`allows you to get all block from one county -* `get_county_nbm_raws()` allows you to get raws NBM data for a specific county and for a release, by default the last one. +* `get_county_nbm_raws()` allows you to get raw NBM data for a specific county and for a release, by default the last one. ### Updated functions @@ -28,7 +46,7 @@ ## Major Changes -* Provides way to acess Form 477 +* Provides way to access Form 477 * Provides data story on Form 477 @@ -39,8 +57,9 @@ * Organize reference of function by themes -# cori.data.fcc (first release) +# cori.data.fcc 0.0.0 -* First functions released. +* First release! + - FCC BDC download functions -* Improve functions with a user_agent defined in aaa.R #9 \ No newline at end of file +* Improve functions with a user_agent defined in aaa.R #9 diff --git a/R/aaa.R b/R/aaa.R deleted file mode 100644 index 41f423f..0000000 --- a/R/aaa.R +++ /dev/null @@ -1,40 +0,0 @@ -the <- new.env(parent = emptyenv()) -the$user_agent <- paste0("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)", - " AppleWebKit/537.36 (KHTML, like Gecko)", - " Chrome/112.0.0.0 Safari/537.36") - -#' Seting user agent for function -#' @export -#' -#'@examples -#'\dontrun{ -#' user_agent() -#'} - -user_agent <- function() { - the$user_agent -} - -#' Change User-Agent for a specific session -#' -#' Functions in this package use a specific User-Agent. -#' You can change it for a specific R session. -#' -#' @param user_agent a string representing an User-Agent -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' set_user_agent( -#' paste0("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36", -#' " (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36") -#' ) -#' } - -set_user_agent <- function(user_agent) { - stopifnot(is.character(user_agent)) - old <- the$user_agent - the$user_agent <- user_agent - invisible(old) -} \ No newline at end of file diff --git a/R/dl_nbm.R b/R/dl_nbm.R index f567411..a42bf2f 100644 --- a/R/dl_nbm.R +++ b/R/dl_nbm.R @@ -7,7 +7,6 @@ #' @param release_date a string can be "December 31, 2023" or "June 30, 2023" #' @param data_type a string "Fixed Broadband" #' @param data_category a string "Nationwide" -#' @param user_agent a string set up by default #' @param ... additional parameters for download.file() #' #' @return Zipped csv @@ -23,8 +22,7 @@ dl_nbm <- function(path_to_dl = "~/data_swamp", release_date = "June 30, 2023", data_type = "Fixed Broadband", - data_category = "Nationwide", - user_agent = the$user_agent, ...) { + data_category = "Nationwide", ...) { # clean my mess prev_timeout <- getOption("timeout") on.exit(options(timeout = prev_timeout), add = TRUE) @@ -52,8 +50,15 @@ dl_nbm <- function(path_to_dl = "~/data_swamp", next } - try(utils::download.file(url = paste0(base_url, one_release_to_dl$id[i], "/1"), - destfile = dest_file, - headers = c("User-Agent" = user_agent), ...)) + get_data_url <- paste0(base_url, one_release_to_dl$id[i], "/1") + + res <- download_file(get_data_url, dest_file) + + # Check res + if (!(dest_file %in% res)) { + message(paste0("Error in download result: ", res)) + stop(sprintf("Downloading %s failed", get_data_url)) + } + } } \ No newline at end of file diff --git a/R/download_file.R b/R/download_file.R new file mode 100644 index 0000000..3edc2f7 --- /dev/null +++ b/R/download_file.R @@ -0,0 +1,38 @@ +#' Download file function (replacement for download.file) +#' +#' @param remote_file_url URL to download file from +#' @param local_file_path Local path to save file to +#' @return path to local file +#' +#' @examples +#' \dontrun{ +#' system("mkdir -p ~/data_swamp") +#' retrieved_file <- download_file( +#' "https://archive.org/offshoot_assets/assets/ia-logo-2c2c2c.03bd7e88c8814d63d0fc..svg", +#' "~/data_swamp/archive.svg") +#' } +#' +#' +download_file <- function (remote_file_url, local_file_path) { + res <- NULL + res <- system( + sprintf( + paste0("curl '%s' --compressed ", + "-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0' ", + "-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8' ", + "-H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' ", + "-H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' ", + "-H 'Referer: https://broadbandmap.fcc.gov/data-download' ", + "-H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' ", + "-H 'Sec-GPC: 1' -H 'Priority: u=0, i' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers' ", + "-o %s"), + remote_file_url, local_file_path + ) + ) + + if (is.null(res) || res > 0) { + return(res) + } else { + return(invisible(local_file_path)) + } +} diff --git a/R/f477.R b/R/f477.R index 85a9349..f4772f3 100644 --- a/R/f477.R +++ b/R/f477.R @@ -14,18 +14,17 @@ #' #' @export #' @import DBI -#' @import duckdb #' #'@examples #'\dontrun{ -#'NC <- get_f477(state_abbr = "NC") +#' NC <- get_f477(state_abbr = "NC") #'} get_f477 <- function(state_abbr, frn = "all") { state_abbr <- state_abbr_lookup(state_abbr) - con <- DBI::dbConnect(duckdb()) + con <- DBI::dbConnect(duckdb::duckdb()) DBI::dbExecute(con, sprintf("SET temp_directory ='%s';", tempdir())) on.exit(DBI::dbDisconnect(con), add = TRUE) diff --git a/R/get_county_nbm_raw.R b/R/get_county_nbm_raw.R index a17891c..cd61658 100644 --- a/R/get_county_nbm_raw.R +++ b/R/get_county_nbm_raw.R @@ -18,18 +18,17 @@ #' #' @export #' @import DBI -#' @import duckdb #' #'@examples #'\dontrun{ -#'guilford_cty <- get_county_nbm_raw(geoid_co = "37081") +#' guilford_cty <- get_county_nbm_raw(geoid_co = "37081") #'} get_county_nbm_raw <- function(geoid_co, frn = "all", release = "2023-12-01") { # do I need a look up for county? - con <- DBI::dbConnect(duckdb()) + con <- DBI::dbConnect(duckdb::duckdb()) DBI::dbExecute(con, sprintf("SET temp_directory ='%s';", tempdir())) on.exit(DBI::dbDisconnect(con), add = TRUE) diff --git a/R/get_frn_nbm_bl.R b/R/get_frn_nbm_bl.R index 1c7b159..1e9fa50 100644 --- a/R/get_frn_nbm_bl.R +++ b/R/get_frn_nbm_bl.R @@ -18,18 +18,17 @@ #' #' @export #' @import DBI -#' @import duckdb #' #'@examples #'\dontrun{ -#' skymesh <- get_frn_nbm_bl("0027136753") +#' skymesh <- get_frn_nbm_bl("0027136753") #'} get_frn_nbm_bl <- function(frn) { if (nchar(frn) != 10L) stop("frn should be a 10-digit string") - con <- DBI::dbConnect(duckdb()) + con <- DBI::dbConnect(duckdb::duckdb()) DBI::dbExecute(con, sprintf("SET temp_directory ='%s';", tempdir())) on.exit(DBI::dbDisconnect(con), add = TRUE) diff --git a/R/get_nbm_available.R b/R/get_nbm_available.R index 148c292..a770f08 100644 --- a/R/get_nbm_available.R +++ b/R/get_nbm_available.R @@ -5,8 +5,7 @@ #' paste0("https://broadbandmap.fcc.gov/nbm/", #' "map/api/national_map_process/nbm_get_data_download/") #' ``` -#' @param get_data_url a string providing NBM filing API. -#' @param user_agent a string set up by default +#' @param get_root_url a string providing NBM filing API. #' #' @return A data frame. #' @export @@ -16,40 +15,32 @@ #' head(nbm_data) get_nbm_available <- function( - get_data_url = paste0("https://broadbandmap.fcc.gov/nbm/map/", - "api/national_map_process/nbm_get_data_download/"), - user_agent = the$user_agent) { + get_root_url = paste0("https://broadbandmap.fcc.gov/nbm/map/", + "api/national_map_process/nbm_get_data_download/") +) { + # get csv to dl only get a table with all link to be downloaded get_csv_to_dl <- function(release_file, release_nb) { - get_data_url <- paste0(get_data_url, + get_data_url <- paste0(get_root_url, release_file[release_nb, "process_uuid"]) - # h <- curl::new_handle() - # curl::handle_setheaders(h, "User-Agent" = user_agent) - # - # raw_dat <- curl::curl_fetch_memory(get_data_url) - # - # csv_to_dl <- jsonlite::fromJSON(rawToChar(raw_dat$content))$data - res <- system( - sprintf( - paste0("curl '%s' --compressed ", - "-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0' ", - "-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8' ", - "-H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' ", - "-H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' ", - "-H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' ", - "-H 'Sec-GPC: 1' -H 'Priority: u=0, i' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers'" - ), - get_data_url - ), - intern = TRUE) + dest_file <- paste0(tempdir(), "/", release_file[release_nb, "process_uuid"], ".json") + + res <- download_file(get_data_url, dest_file) + + # Check res + if (!(dest_file %in% res)) { + message(paste0("Error in download result: ", res)) + stop(sprintf("Downloading %s failed", get_data_url)) + } + csv_to_dl <- jsonlite::fromJSON(res)[["data"]] csv_to_dl[["release"]] <- release_file[release_nb, "filing_subtype"] return(csv_to_dl) } - release <- cori.data.fcc::get_nbm_release() + release <- get_nbm_release() release @@ -63,4 +54,4 @@ get_nbm_available <- function( slim_all_data <- all_data[, col_to_keep] return(slim_all_data) -} \ No newline at end of file +} diff --git a/R/get_nbm_bl.R b/R/get_nbm_bl.R index a07bca1..281feb1 100644 --- a/R/get_nbm_bl.R +++ b/R/get_nbm_bl.R @@ -13,16 +13,17 @@ #' #' @export #' @import DBI -#' @import duckdb #' -#'@examples -#' nbm_bl <- get_nbm_bl(geoid_co = "47051") +#' @examples +#' \dontrun{ +#' nbm_bl <- get_nbm_bl(geoid_co = "47051") +#' } get_nbm_bl <- function(geoid_co) { if (nchar(geoid_co) != 5L) stop("geoid_co should be a 5-digit string") - con <- DBI::dbConnect(duckdb()) + con <- DBI::dbConnect(duckdb::duckdb()) DBI::dbExecute(con, sprintf("SET temp_directory ='%s';", tempdir())) on.exit(DBI::dbDisconnect(con), add = TRUE) diff --git a/R/get_nbm_release.R b/R/get_nbm_release.R index 51b290c..9eadc3f 100644 --- a/R/get_nbm_release.R +++ b/R/get_nbm_release.R @@ -1,7 +1,6 @@ #' Get a list of release available in FCC NBM #' #' @param filing_url a string providing NBM filing API. Default is "https://broadbandmap.fcc.gov/nbm/map/api/published/filing" -#' @param user_agent set a default user agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:128.0) Gecko/20100101 Firefox/128.0" #' #' @return A data frame. #' @export @@ -9,27 +8,17 @@ #' @examples #' nbm <- get_nbm_release() -get_nbm_release <- function(filing_url = "https://broadbandmap.fcc.gov/nbm/map/api/published/filing", - user_agent = the$user_agent) { - # h <- curl::new_handle() - # curl::handle_setheaders(h, - # "User-Agent" = user_agent) - # req <- curl::curl_fetch_memory(filing_url, handle = h) - # release <- jsonlite::fromJSON(rawToChar(req$content))$data +get_nbm_release <- function(filing_url = "https://broadbandmap.fcc.gov/nbm/map/api/published/filing") { - res <- system( - sprintf( - paste0("curl '%s' --compressed ", - "-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0' ", - "-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8' ", - "-H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' ", - "-H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' ", - "-H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' ", - "-H 'Sec-GPC: 1' -H 'Priority: u=0, i' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers'" - ), - filing_url - ), - intern = TRUE) + dest_file <- paste0(tempdir(), "/filing.json") + + res <- download_file(filing_url, dest_file) + + # Check res + if (!(dest_file %in% res)) { + message(paste0("Error in download result: ", res)) + stop(sprintf("Downloading %s failed", filing_url)) + } release <- jsonlite::fromJSON(res)[["data"]] return(release) diff --git a/README.md b/README.md index 4ede10d..9a3ea97 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,13 @@ You can install the development version of `cori.data.fcc` from devtools::install_github("ruralinnovation/cori.data.fcc") ``` +::: {.callout-note} +We were encountering problems with `devtools::check()` related to specific dependencies. Make sure to have the latest version of these packages: +- `waldo` +- `duckdb` +::: + + ## Examples ``` r diff --git a/_pkgdown.yml b/_pkgdown.yml index de99d9a..abfff01 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -19,6 +19,9 @@ navbar: href: articles/NBM.html - text: "FCC's Form 477" href: articles/f477.html + Changelog: + text: News + href: news/index.html reference: - title: NBM @@ -40,8 +43,6 @@ reference: - fcc_dictionary - check_frn - fcc_provider - - user_agent - - set_user_agent diff --git a/man/dl_nbm.Rd b/man/dl_nbm.Rd index b2c6e48..dfdd7c2 100644 --- a/man/dl_nbm.Rd +++ b/man/dl_nbm.Rd @@ -9,7 +9,6 @@ dl_nbm( release_date = "June 30, 2023", data_type = "Fixed Broadband", data_category = "Nationwide", - user_agent = the$user_agent, ... ) } @@ -22,8 +21,6 @@ dl_nbm( \item{data_category}{a string "Nationwide"} -\item{user_agent}{a string set up by default} - \item{...}{additional parameters for download.file()} } \value{ diff --git a/man/get_county_nbm_raw.Rd b/man/get_county_nbm_raw.Rd index d69858f..bebd850 100644 --- a/man/get_county_nbm_raw.Rd +++ b/man/get_county_nbm_raw.Rd @@ -29,6 +29,6 @@ Source data: FCC Broadband Funding Map } \examples{ \dontrun{ -guilford_cty <- get_county_nbm_raw(geoid_co = "37081") + guilford_cty <- get_county_nbm_raw(geoid_co = "37081") } } diff --git a/man/get_f477.Rd b/man/get_f477.Rd index 365f04f..817da3c 100644 --- a/man/get_f477.Rd +++ b/man/get_f477.Rd @@ -25,6 +25,6 @@ Source data: FCC Form 477 } \examples{ \dontrun{ -NC <- get_f477(state_abbr = "NC") + NC <- get_f477(state_abbr = "NC") } } diff --git a/man/get_frn_nbm_bl.Rd b/man/get_frn_nbm_bl.Rd index 9a74ec9..552e889 100644 --- a/man/get_frn_nbm_bl.Rd +++ b/man/get_frn_nbm_bl.Rd @@ -30,6 +30,6 @@ Data Source: FCC Broadband Data Collection } \examples{ \dontrun{ -skymesh <- get_frn_nbm_bl("0027136753") + skymesh <- get_frn_nbm_bl("0027136753") } } diff --git a/man/get_nbm_available.Rd b/man/get_nbm_available.Rd index c0942fe..d01c24c 100644 --- a/man/get_nbm_available.Rd +++ b/man/get_nbm_available.Rd @@ -5,15 +5,12 @@ \title{Get a list of files availables in FCC servers} \usage{ get_nbm_available( - get_data_url = paste0("https://broadbandmap.fcc.gov/nbm/map/", - "api/national_map_process/nbm_get_data_download/"), - user_agent = the$user_agent + get_root_url = paste0("https://broadbandmap.fcc.gov/nbm/map/", + "api/national_map_process/nbm_get_data_download/") ) } \arguments{ -\item{get_data_url}{a string providing NBM filing API.} - -\item{user_agent}{a string set up by default} +\item{get_root_url}{a string providing NBM filing API.} } \value{ A data frame. diff --git a/man/get_nbm_bl.Rd b/man/get_nbm_bl.Rd index 11412a7..537c12c 100644 --- a/man/get_nbm_bl.Rd +++ b/man/get_nbm_bl.Rd @@ -22,5 +22,7 @@ Use \code{get_fcc_dictionary("nbm_block")} to get a description of the date. Data Source: FCC Broadband Data Collection } \examples{ -nbm_bl <- get_nbm_bl(geoid_co = "47051") +\dontrun{ + nbm_bl <- get_nbm_bl(geoid_co = "47051") +} } diff --git a/man/get_nbm_release.Rd b/man/get_nbm_release.Rd index f7015ac..821dcf5 100644 --- a/man/get_nbm_release.Rd +++ b/man/get_nbm_release.Rd @@ -5,14 +5,11 @@ \title{Get a list of release available in FCC NBM} \usage{ get_nbm_release( - filing_url = "https://broadbandmap.fcc.gov/nbm/map/api/published/filing", - user_agent = the$user_agent + filing_url = "https://broadbandmap.fcc.gov/nbm/map/api/published/filing" ) } \arguments{ \item{filing_url}{a string providing NBM filing API. Default is "https://broadbandmap.fcc.gov/nbm/map/api/published/filing"} - -\item{user_agent}{set a default user agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:128.0) Gecko/20100101 Firefox/128.0"} } \value{ A data frame. diff --git a/man/set_user_agent.Rd b/man/set_user_agent.Rd deleted file mode 100644 index 3da233e..0000000 --- a/man/set_user_agent.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aaa.R -\name{set_user_agent} -\alias{set_user_agent} -\title{Change User-Agent for a specific session} -\usage{ -set_user_agent(user_agent) -} -\arguments{ -\item{user_agent}{a string representing an User-Agent} -} -\description{ -Functions in this package use a specific User-Agent. -You can change it for a specific R session. -} -\examples{ -\dontrun{ -set_user_agent( - paste0("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36", - " (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36") -) -} -} diff --git a/man/user_agent.Rd b/man/user_agent.Rd deleted file mode 100644 index a201e52..0000000 --- a/man/user_agent.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aaa.R -\name{user_agent} -\alias{user_agent} -\title{Seting user agent for function} -\usage{ -user_agent() -} -\description{ -Seting user agent for function -} -\examples{ -\dontrun{ -user_agent() -} -} diff --git a/tests/testthat/test-aaa.R b/tests/testthat/test-aaa.R deleted file mode 100644 index 33b7acf..0000000 --- a/tests/testthat/test-aaa.R +++ /dev/null @@ -1,13 +0,0 @@ -test_that("the$user_agent should be string", { - expect_type(the$user_agent, "character") -}) - -test_that("error if set_user_agent", { - expect_error(set_user_agent(1)) -}) - -test_that("seting user agent should change it", { - set_user_agent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36") - expect_equal(the$user_agent, - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36") -}) \ No newline at end of file