-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathR_code_Project.R
98 lines (84 loc) · 2.59 KB
/
R_code_Project.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
rm(list=ls())
library(car)
library(lme4)
library(tidyverse)
data=read.csv("/Users/halleh/Downloads/ECS-260---Brook-s-Law-Analysis-main/ScrapedRepoDataTestAlternate.csv")
data=na.omit(data)
data=data[data$PrePeriodAvgChurn>-1000,]
data=data[data$PrePeriodAvgChurn<1000,]
delta_churn=data["PostPeriodAvgChurn"]-data["PrePeriodAvgChurn"]
delta_churn=as.numeric(unlist(delta_churn))
delta_commits=data["PostPeriodAvgCommits"]-data["PrePeriodAvgCommits"]
delta_commits=as.numeric(unlist(delta_commits))
hist(delta_commits,
breaks=21,
main="Delta Commits",
xlab="Delta COmmits",
xlim=c(-10,10)
)
a=summary(delta_commits)
print("Summary of Delta Commits")
print(a)
hist(delta_churn,
breaks=201,
main="Delta Churns",
xlab="Delta Churns",
xlim=c(-1000,1000)
)
a=summary(delta_churn)
print("Summary of Delta Churns")
print(a)
a=t.test( data$PrePeriodAvgCommits,data$PostPeriodAvgCommits,paired=TRUE)
print("Summary of t-Test")
print(a)
a=cor.test( data$PrePeriodAvgCommits,data$PostPeriodAvgCommits,paired=TRUE)
print("Summary of cor-Test")
print(a)
#
a=t.test( data$PrePeriodAvgChurn,data$PostPeriodAvgChurn,paired=TRUE)
print("Summary of t-Test")
print(a)
a=cor.test( data$PrePeriodAvgChurn,data$PostPeriodAvgChurn,paired=TRUE)
print("Summary of cor-Test")
print(a)
#
model1 = lm(PrePeriodAvgCommits~PostPeriodAvgCommits, data=data)
print(summary(model1))
# vif(model1)
plot(model1)
print(anova(model1))
a=cor.test( data$PrePeriodAvgCommits,data$PrePeriodAvgChurn,paired=TRUE)
print("Summary of cor-Test")
print(a)
a=cor.test( data$PrePeriodAvgCommits,data$PostPeriodAvgChurn,paired=TRUE)
print("Summary of cor-Test")
print(a)
data2=reshape(data=data, idvar="PrePeriodAvgCommits",
varying = c("PrePeriodAvgChurn","PostPeriodAvgChurn"),
v.names = "Churn",
timevar = "Period",
new.row.names = 1:1000,
direction = "long")
data2=data2[data2$Churn>-1000,]
ggplot(data2,aes(x=PrePeriodAvgCommits,
y=Churn,
color=as.character(Period) ))+
geom_point()+
geom_smooth(method="lm")
a=t.test( data$PrePeriodAvgCommits,delta_churn,paired=TRUE)
print("Summary of t-Test")
print(a)
a=cor.test( data$PrePeriodAvgCommits,delta_churn,paired=TRUE)
print("Summary of cor-Test")
print(a)
model2 = lm(data$PrePeriodAvgCommits~data$PostPeriodAvgChurn, data=data)
print(summary(model2))
# vif(model2)
plot(model2)
print(anova(model2))
model3 = lm(data$PrePeriodAvgCommits~data$PrePeriodAvgChurn, data=data)
print(summary(model3))
# vif(model3)
plot(model3)
print(anova(model3))
#