From e821ff2833d8b2af5524d238d29572091ce7e216 Mon Sep 17 00:00:00 2001 From: Thomas Kluth Date: Fri, 18 May 2018 11:32:46 +0200 Subject: [PATCH] improve DB input --- src/000_run_pipeline.R | 3 ++- src/02_bikes_to_db.R | 40 +++++++++++++++++++++++++--------------- src/02_cars_to_db.R | 13 +++++++++---- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/000_run_pipeline.R b/src/000_run_pipeline.R index 9b141b1..d0b67aa 100644 --- a/src/000_run_pipeline.R +++ b/src/000_run_pipeline.R @@ -13,4 +13,5 @@ source("src/00_install_R_packages.R", echo = TRUE) source("src/01_download_data.R", echo = TRUE) source("src/02_cars_to_db.R", echo = TRUE) source("src/02_bikes_to_db.R", echo = TRUE) -source("src/03_temporal_features.R", echo = TRUE) +# TODO do we need this table / probably convert to weather table? +# source("src/03_temporal_features.R", echo = TRUE) diff --git a/src/02_bikes_to_db.R b/src/02_bikes_to_db.R index f8aee03..18b4253 100644 --- a/src/02_bikes_to_db.R +++ b/src/02_bikes_to_db.R @@ -6,7 +6,7 @@ # load libraries #### # use 00_install_R_packages.R for installing missing packages -sapply(c("dplyr", "DBI", "RSQLite", "tidyr", "lubridate"), +sapply(c("dplyr", "DBI", "RSQLite", "tidyr", "chron", "lubridate"), require, character.only = TRUE) file <- "data/raw/Fahrradzaehlstellen-Stundenwerte.csv" @@ -22,8 +22,13 @@ bikes <- # wide to long format gather(location, count, -date, -hour, -weather, -temperature, -windspeed) %>% mutate(date = as.character(dmy(date))) %>% - mutate(hour = as.integer(substring(hour, 1, 2))) %>% - mutate(vehicle = "bike") #%>% + mutate(year = as.integer(year(date))) %>% + mutate(month = as.integer(month(date))) %>% + mutate(day = as.integer(day(date))) %>% + mutate(weekday = wday(date, label = T, abbr = T)) %>% + mutate(weekend = is.weekend(date)) %>% + mutate(hour = as.integer(substring(hour, 1, 2))) %>% + mutate(vehicle = "bike") # write 'bikes' to SQLite database dir.create("data/database", showWarnings = F) @@ -31,19 +36,24 @@ con <- dbConnect(SQLite(), dbname = "data/database/traffic_data.sqlite") dbWriteTable(con, "bikes", bikes, row.names = F, overwrite = T) dbExecute(con, "CREATE INDEX timestamp_bikes on bikes (date, hour)") +dbExecute(con, "CREATE INDEX year_month_day_bikes on bikes (year, month, day, hour)") +# TODO: make the weather data an own table # add the same weather to cars table -cars <- dbGetQuery(conn = con, "SELECT location, count, date, hour, vehicle FROM cars") - -weather_from_bikes <- - bikes %>% - select(date, hour, weather, windspeed, temperature) %>% - filter(weather != "") - -cars <- - cars %>% - inner_join(., weather_from_bikes, by = c("date", "hour")) - -dbWriteTable(con, "cars", cars, row.names = F, overwrite = T) +# cars <- dbGetQuery(conn = con, "SELECT location, count, date, hour, vehicle FROM cars") +# +# weather_from_bikes <- +# bikes %>% +# select(date, hour, weather, windspeed, temperature) %>% +# filter(weather != "") +# +# cars <- +# cars %>% +# inner_join(., weather_from_bikes, by = c("date", "hour")) +# +# dbWriteTable(con, "cars", cars, row.names = F, overwrite = T) + +# for better performance, DB is read-only in shiny-app +dbExecute(con, "PRAGMA synchronous=OFF; PRAGMA journal_mode=OFF;") dbDisconnect(con) diff --git a/src/02_cars_to_db.R b/src/02_cars_to_db.R index 0ffeb64..44b6dcf 100644 --- a/src/02_cars_to_db.R +++ b/src/02_cars_to_db.R @@ -6,7 +6,7 @@ # load libraries #### # use 00_install_R_packages.R for installing missing packages -sapply(c("dplyr", "assertthat", "lubridate", "tidyr", "DBI", "RSQLite"), +sapply(c("dplyr", "assertthat", "lubridate", "chron", "tidyr", "DBI", "RSQLite"), require, character.only = TRUE) process_df <- function(df) { @@ -38,9 +38,8 @@ process_df <- function(df) { # filter to only add relevant location to the database # as of now: Roxel and all locations where also bicycles are counted relevant_locations <- - c("24020", "24100", "24140", "24010", "24120", "24130", "24030", # Roxel - # locations where (closeby) also bicycles are counted, in the same order as http://www.stadt-muenster.de/verkehrsplanung/verkehr-in-zahlen/radverkehrszaehlungen.html - "01080", # Neutor + c(# locations where (closeby) also bicycles are counted, in the same order as http://www.stadt-muenster.de/verkehrsplanung/verkehr-in-zahlen/radverkehrszaehlungen.html + "01080", # Neutor "04050", # Wolbecker Straße / Servatiiplatz "03052", # Hüfferstraße "07030", # Hammer Straße @@ -61,6 +60,11 @@ process_df <- function(df) { df <- df %>% gather(hour, count, -location, -date) %>% + mutate(year = as.integer(year(date))) %>% + mutate(month = as.integer(month(date))) %>% + mutate(day = as.integer(day(date))) %>% + mutate(weekday = wday(date, label = T, abbr = T)) %>% + mutate(weekend = is.weekend(date)) %>% # 'hour' to integer format mutate(hour = substring(hour, 2)) %>% mutate(hour = as.integer(hour)) %>% @@ -92,5 +96,6 @@ for (raw_file in raw_files) { } dbExecute(con, "CREATE INDEX timestamp_cars on cars (date, hour)") +dbExecute(con, "CREATE INDEX year_month_day_cars on cars (year, month, day, hour)") dbExecute(con, "CREATE INDEX location_cars on cars (location)") dbDisconnect(con)