Skip to content

Commit

Permalink
use download_file and fix devtools check
Browse files Browse the repository at this point in the history
  • Loading branch information
dancingfrog committed Dec 20, 2024
1 parent 5017f59 commit 3e0149b
Show file tree
Hide file tree
Showing 23 changed files with 115 additions and 200 deletions.
16 changes: 7 additions & 9 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: cori.data.fcc
Title: Process FCC data
Version: 0.1.1
Version: 0.1.2
Authors@R:
person(given="Olivier", family="Leroy", email="[email protected]", role = c("aut", "cre"))
Description: Functions to get and process FCC data.
Expand All @@ -11,20 +11,18 @@ RoxygenNote: 7.3.2
Depends:
R (>= 2.10)
LazyData: true
Suggests:
testthat (>= 3.0.0),
pkgdown,
Suggests:
dplyr,
DT,
knitr,
rmarkdown
pkgdown,
rmarkdown,
testthat (>= 3.0.0)
Config/testthat/edition: 3
Imports:
curl,
Imports:
duckdb,
jsonlite,
utils,
DBI,
duckdb,
stringi
URL: https://ruralinnovation.github.io/cori.data.fcc/
Config/Needs/website: rmarkdown
Expand Down
3 changes: 0 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,5 @@ export(get_frn_nbm_bl)
export(get_nbm_available)
export(get_nbm_bl)
export(get_nbm_release)
export(set_user_agent)
export(user_agent)
import(DBI)
import(duckdb)
importFrom(stringi,stri_pad_left)
16 changes: 12 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
# cori.data.fcc 0.1.1
# cori.data.fcc 0.1.2

## Minor improvements

* Use CORI-brewed `download_file` function; global user_agent no longer needed


## cori.data.fcc 0.1.1

## Minor improvements

Expand Down Expand Up @@ -50,8 +57,9 @@
* Organize reference of function by themes


# cori.data.fcc (first release)
# cori.data.fcc 0.0.0

* First functions released.
* First release!
- FCC BDC download functions

* Improve functions with a user_agent defined in aaa.R #9
* Improve functions with a user_agent defined in aaa.R #9
40 changes: 0 additions & 40 deletions R/aaa.R

This file was deleted.

28 changes: 9 additions & 19 deletions R/dl_nbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#' @param release_date a string can be "December 31, 2023" or "June 30, 2023"
#' @param data_type a string "Fixed Broadband"
#' @param data_category a string "Nationwide"
#' @param user_agent a string set up by default
#' @param ... additional parameters for download.file()
#'
#' @return Zipped csv
Expand All @@ -23,8 +22,7 @@
dl_nbm <- function(path_to_dl = "~/data_swamp",
release_date = "June 30, 2023",
data_type = "Fixed Broadband",
data_category = "Nationwide",
user_agent = the$user_agent, ...) {
data_category = "Nationwide", ...) {
# clean my mess
prev_timeout <- getOption("timeout")
on.exit(options(timeout = prev_timeout), add = TRUE)
Expand All @@ -51,24 +49,16 @@ dl_nbm <- function(path_to_dl = "~/data_swamp",
print(paste(dest_file, "already downloaded, skipping it"))
next
}

get_data_url <- paste0(base_url, one_release_to_dl$id[i], "/1")
res <- system(
sprintf(
paste0("curl '%s' --compressed ",
"-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0' ",
"-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8' ",
"-H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' ",
"-H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' ",
"-H 'Referer: https://broadbandmap.fcc.gov/data-download' ",
"-H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' ",
"-H 'Sec-GPC: 1' -H 'Priority: u=0, i' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers' ",
"-o %s"),
get_data_url, dest_file
)
)
# unsure if an error in a system call return 0 consistantly but oh well
if (res != 0) {

res <- download_file(get_data_url, dest_file)

# Check res
if (!(dest_file %in% res)) {
message(paste0("Error in download result: ", res))
stop(sprintf("Downloading %s failed", get_data_url))
}

}
}
38 changes: 38 additions & 0 deletions R/download_file.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' Download file function (replacement for download.file)
#'
#' @param remote_file_url URL to download file from
#' @param local_file_path Local path to save file to
#' @return path to local file
#'
#' @examples
#' \dontrun{
#' system("mkdir -p ~/data_swamp")
#' retrieved_file <- download_file(
#' "https://archive.org/offshoot_assets/assets/ia-logo-2c2c2c.03bd7e88c8814d63d0fc..svg",
#' "~/data_swamp/archive.svg")
#' }
#'
#'
download_file <- function (remote_file_url, local_file_path) {
res <- NULL
res <- system(
sprintf(
paste0("curl '%s' --compressed ",
"-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0' ",
"-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8' ",
"-H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' ",
"-H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' ",
"-H 'Referer: https://broadbandmap.fcc.gov/data-download' ",
"-H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' ",
"-H 'Sec-GPC: 1' -H 'Priority: u=0, i' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers' ",
"-o %s"),
remote_file_url, local_file_path
)
)

if (is.null(res) || res > 0) {
return(res)
} else {
return(invisible(local_file_path))
}
}
5 changes: 2 additions & 3 deletions R/f477.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,17 @@
#'
#' @export
#' @import DBI
#' @import duckdb
#'
#'@examples
#'\dontrun{
#'NC <- get_f477(state_abbr = "NC")
#' NC <- get_f477(state_abbr = "NC")
#'}

get_f477 <- function(state_abbr, frn = "all") {

state_abbr <- state_abbr_lookup(state_abbr)

con <- DBI::dbConnect(duckdb())
con <- DBI::dbConnect(duckdb::duckdb())
DBI::dbExecute(con,
sprintf("SET temp_directory ='%s';", tempdir()))
on.exit(DBI::dbDisconnect(con), add = TRUE)
Expand Down
5 changes: 2 additions & 3 deletions R/get_county_nbm_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@
#'
#' @export
#' @import DBI
#' @import duckdb
#'
#'@examples
#'\dontrun{
#'guilford_cty <- get_county_nbm_raw(geoid_co = "37081")
#' guilford_cty <- get_county_nbm_raw(geoid_co = "37081")
#'}

get_county_nbm_raw <- function(geoid_co, frn = "all", release = "2023-12-01") {

# do I need a look up for county?

con <- DBI::dbConnect(duckdb())
con <- DBI::dbConnect(duckdb::duckdb())
DBI::dbExecute(con,
sprintf("SET temp_directory ='%s';", tempdir()))
on.exit(DBI::dbDisconnect(con), add = TRUE)
Expand Down
5 changes: 2 additions & 3 deletions R/get_frn_nbm_bl.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@
#'
#' @export
#' @import DBI
#' @import duckdb
#'
#'@examples
#'\dontrun{
#' skymesh <- get_frn_nbm_bl("0027136753")
#' skymesh <- get_frn_nbm_bl("0027136753")
#'}

get_frn_nbm_bl <- function(frn) {

if (nchar(frn) != 10L) stop("frn should be a 10-digit string")

con <- DBI::dbConnect(duckdb())
con <- DBI::dbConnect(duckdb::duckdb())
DBI::dbExecute(con,
sprintf("SET temp_directory ='%s';", tempdir()))
on.exit(DBI::dbDisconnect(con), add = TRUE)
Expand Down
37 changes: 13 additions & 24 deletions R/get_nbm_available.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#' "map/api/national_map_process/nbm_get_data_download/")
#' ```
#' @param get_root_url a string providing NBM filing API.
#' @param user_agent a string set up by default
#'
#' @return A data frame.
#' @export
Expand All @@ -17,41 +16,31 @@

get_nbm_available <- function(
get_root_url = paste0("https://broadbandmap.fcc.gov/nbm/map/",
"api/national_map_process/nbm_get_data_download/"),
user_agent = the$user_agent) {
"api/national_map_process/nbm_get_data_download/")
) {

# get csv to dl only get a table with all link to be downloaded
get_csv_to_dl <- function(release_file, release_nb) {
get_data_url <- paste0(get_root_url,
release_file[release_nb, "process_uuid"])
# h <- curl::new_handle()
# curl::handle_setheaders(h, "User-Agent" = user_agent)
#
# raw_dat <- curl::curl_fetch_memory(get_data_url)
#
# csv_to_dl <- jsonlite::fromJSON(rawToChar(raw_dat$content))$data

res <- system(
sprintf(
paste0("curl '%s' --compressed ",
"-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0' ",
"-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8' ",
"-H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' ",
"-H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' ",
"-H 'Referer: https://broadbandmap.fcc.gov/data-download' ",
"-H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' ",
"-H 'Sec-GPC: 1' -H 'Priority: u=0, i' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers'"
),
get_data_url
),
intern = TRUE)
dest_file <- paste0(tempdir(), "/", release_file[release_nb, "process_uuid"], ".json")

res <- download_file(get_data_url, dest_file)

# Check res
if (!(dest_file %in% res)) {
message(paste0("Error in download result: ", res))
stop(sprintf("Downloading %s failed", get_data_url))
}

csv_to_dl <- jsonlite::fromJSON(res)[["data"]]

csv_to_dl[["release"]] <- release_file[release_nb, "filing_subtype"]
return(csv_to_dl)
}

release <- cori.data.fcc::get_nbm_release()
release <- get_nbm_release()

release

Expand Down
9 changes: 5 additions & 4 deletions R/get_nbm_bl.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@
#'
#' @export
#' @import DBI
#' @import duckdb
#'
#'@examples
#' nbm_bl <- get_nbm_bl(geoid_co = "47051")
#' @examples
#' \dontrun{
#' nbm_bl <- get_nbm_bl(geoid_co = "47051")
#' }

get_nbm_bl <- function(geoid_co) {

if (nchar(geoid_co) != 5L) stop("geoid_co should be a 5-digit string")

con <- DBI::dbConnect(duckdb())
con <- DBI::dbConnect(duckdb::duckdb())
DBI::dbExecute(con,
sprintf("SET temp_directory ='%s';", tempdir()))
on.exit(DBI::dbDisconnect(con), add = TRUE)
Expand Down
31 changes: 10 additions & 21 deletions R/get_nbm_release.R
Original file line number Diff line number Diff line change
@@ -1,35 +1,24 @@
#' Get a list of release available in FCC NBM
#'
#' @param filing_url a string providing NBM filing API. Default is "https://broadbandmap.fcc.gov/nbm/map/api/published/filing"
#' @param user_agent set a default user agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:128.0) Gecko/20100101 Firefox/128.0"
#'
#' @return A data frame.
#' @export
#'
#' @examples
#' nbm <- get_nbm_release()

get_nbm_release <- function(filing_url = "https://broadbandmap.fcc.gov/nbm/map/api/published/filing",
user_agent = the$user_agent) {
# h <- curl::new_handle()
# curl::handle_setheaders(h,
# "User-Agent" = user_agent)
# req <- curl::curl_fetch_memory(filing_url, handle = h)
# release <- jsonlite::fromJSON(rawToChar(req$content))$data
get_nbm_release <- function(filing_url = "https://broadbandmap.fcc.gov/nbm/map/api/published/filing") {

res <- system(
sprintf(
paste0("curl '%s' --compressed ",
"-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:131.0) Gecko/20100101 Firefox/131.0' ",
"-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8' ",
"-H 'Accept-Language: en-US,en;q=0.5' -H 'Accept-Encoding: gzip, deflate, br, zstd' ",
"-H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' ",
"-H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: none' -H 'Sec-Fetch-User: ?1' ",
"-H 'Sec-GPC: 1' -H 'Priority: u=0, i' -H 'Pragma: no-cache' -H 'Cache-Control: no-cache' -H 'TE: trailers'"
),
filing_url
),
intern = TRUE)
dest_file <- paste0(tempdir(), "/filing.json")

res <- download_file(filing_url, dest_file)

# Check res
if (!(dest_file %in% res)) {
message(paste0("Error in download result: ", res))
stop(sprintf("Downloading %s failed", filing_url))
}
release <- jsonlite::fromJSON(res)[["data"]]

return(release)
Expand Down
Loading

0 comments on commit 3e0149b

Please sign in to comment.