-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzoe_midterm_work.Rmd
125 lines (96 loc) · 3.25 KB
/
zoe_midterm_work.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
---
title: "midterm_work"
author: "Zoe Yoo"
date: "10/8/2021"
output: html_document
---
```{r setup, include=FALSE}
# You can set some global options for knitting chunks
knitr::opts_chunk$set(echo = TRUE)
# Load some libraries
library(tidyverse)
library(sf)
library(spdep)
library(caret)
library(ckanr)
library(FNN)
library(grid)
library(gridExtra)
library(ggcorrplot)
library(kableExtra)
library(jtools) # for regression model plots
library(ggstance) # to support jtools plots
library(osmdata)
library(knitr)
library(tidycensus)
library(scales)
# functions
root.dir = "C:/Users/zoeny/OneDrive - PennO365/GrSchool/Year1/MUSA 508 Public Policy Analytics/MUSA508/MUSA_Midterm"
source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")
palette5 <- c("#25CB10", "#5AB60C", "#8FA108", "#C48C04", "#FA7800")
```
```{r read_data}
boulder.sf <-
st_read(file.path(root.dir,"/studentData.geojson"), ) %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 'ESRI:102254', agr = "constant") %>%
st_transform('ESRI:102254')
boulder.sf <- mutate(boulder.sf, Age = 2021 - builtYear)
#parcels.sf <-
# st_read("https://opendata.arcgis.com/datasets/89ae49d4ddf246388ee5f5e952aa84db_0.geojson") %>%
# st_transform(st_crs(boulder.sf))
boulder.boundary <-
st_read("https://opendata.arcgis.com/datasets/964b8f3b3dbe401bb28d49ac93d29dc4_0.geojson") %>%
st_transform(st_crs(boulder.sf))
```
```{r}
ggplot() +
geom_sf(data = boulder.boundary, fill = "grey40") +
geom_sf(data = boulder.sf, aes(colour = q5(price)),
show.legend = "point", size = 2) +
scale_colour_manual(values = palette5,
labels=qBr(Boulder, CO,"price"),
name="Quintile\nBreaks") +
labs(title="Sale price, Boulder") +
mapTheme()
q <- getbb("Boulder County") %>%
opq() %>%
add_osm_feature("feature", "highway")
highway <- osmdata_sf(q)
#final map
ggplot(data=highway$osm_lines)
```
```{r preprocessing}
st_drop_geometry(boulder.sf) %>%
mutate(Age = 2021 - builtYear) %>%
dplyr::select(price, TotalFinishedSF, Age) %>%
filter(price <= 10000000, Age < 500) %>%
gather(Variable, Value, -price) %>%
ggplot(aes(Value, price)) +
geom_point(size = .5) + geom_smooth(method = "lm", se=F, colour = "#FA7800") +
facet_wrap(~Variable, ncol = 3, scales = "free") +
labs(title = "Price as a function of continuous variables") +
plotTheme()
```
```{r correlation_matrix, fig.width=12}
numericVars <-
select_if(st_drop_geometry(boulder.sf), is.numeric) %>%
na.omit()
ggcorrplot(
round(cor(numericVars), 1),
p.mat = cor_pmat(numericVars),
colors = c("#25CB10", "white", "#FA7800"),
type="lower",
insig = "blank") +
labs(title = "Correlation across numeric variables")
```
```{r mutlivariate_regression}
reg1 <- lm(price ~ ., data = st_drop_geometry(boulder.sf) %>%
dplyr::select(price, Age, TotalFinishedSF, designCodeDscr,
carStorageType, nbrBedRoom, bsmtType,
qualityCode, nbrRoomsNobath, nbrFullBaths,
mainfloorSF, AcDscr, playground.Buffer))
summary(reg1)
plot_summs(reg1)
```
```{r}
```