-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraped-data-processor.R
58 lines (49 loc) · 1.48 KB
/
scraped-data-processor.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#' ---
#' title: "scraped-data-processor"
#' author: "JJayes"
#' date: "29/04/2021"
#' output: html_document
#' ---
#'
## ----setup, include=FALSE-----------------------------------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(glue)
library(janitor)
# knitr::purl("code/scraped-data-processor.Rmd", documentation = 2)
#'
## -----------------------------------------------------------------------------------------------------------
df <- read.csv("data/latest/ads_latest.csv")
df <- df %>%
as_tibble() %>%
janitor::clean_names()
#'
#' What do we need?
#'
## -----------------------------------------------------------------------------------------------------------
df <- df %>%
mutate(make_model = glue("{make} {model}"),
price = round(price),
year = round(year)) %>%
select(title,
price,
make_model,
province,
kilometers,
colour,
year,
ad_url,
text)
#'
#' Processing province
#'
## -----------------------------------------------------------------------------------------------------------
df <- df %>%
mutate(province = str_replace(province, "-", " "),
province = str_replace(province, "\\+", "-"),
province = str_to_title(province))
#'
#'
## -----------------------------------------------------------------------------------------------------------
write.csv(df, "data/latest/ads_latest_clean.csv")
#'