-
-
Notifications
You must be signed in to change notification settings - Fork 66
/
Copy pathPCA-Iris Data
71 lines (63 loc) · 1.68 KB
/
PCA-Iris Data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Iris Data
data("iris")
str(iris)
summary(iris)
# Partition Data
set.seed(111)
ind <- sample(2, nrow(iris),
replace = TRUE,
prob = c(.8, .2))
training <- iris[ind==1,]
testing <- iris[ind==2,]
# Scatter Plots & Correlations
library(psych)
pairs.panels(training[,-5],
gap=0,
bg=c("red","yellow","blue")[training$Species],
pch=21)
# PCA
pc <- prcomp(training[,-5],
center = TRUE,
scale. = TRUE)
attributes(pc)
print(pc)
summary(pc)
plot(pc, type = "lines")
# Orthogonality of PCs
pairs.panels(pc$x,
gap=0,
bg=c("red","yellow","blue")[training$Species],
pch=21)
# Bi-Plot
library(ggbiplot)
g <- ggbiplot(pc,
obs.scale = 1,
var.scale = 1,
groups = training$Species,
ellipse = TRUE,
circle = TRUE,
ellipse.prob = 0.68)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal',
legend.position = 'top')
print(g)
# Prediction with Principal Components
trg <- predict(pc, training)
trg <- data.frame(trg, training[5])
tst <- predict(pc, testing)
tst <- data.frame(tst, testing[5])
# Multinomial Logistic regression with 1st two PCs
library(nnet)
trg$Species <- relevel(trg$Species, ref="setosa")
mymodel <- multinom(Species~PC1+PC2, data=trg)
summary(mymodel)
# Misclassification error & Confusion matrix - training
p <- predict(mymodel, trg)
tab <- table(p, trg$Species)
tab
1-sum(diag(tab))/sum(tab)
# Misclassification error & Confusion matrix - Testing
p1 <- predict(mymodel, tst)
tab1 <- table(p1, tst$Species)
tab1
1-sum(diag(tab1))/sum(tab1)