forked from hhenoida/dataanalytics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path4b1-AR-groceries.R
93 lines (80 loc) · 3.55 KB
/
4b1-AR-groceries.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Association Rules - Groceries data set ####
library(arules) #install first
library(arulesViz) #install first
library(datasets) # no need to install, just load it; reqd for Groceries
data('Groceries')
Groceries
#Structure of Groceries
str(Groceries)
Groceries
arules::LIST(Groceries[1:6]) #another view
arules::inspect(Groceries[1:5])
length(Groceries)
#LIST(Groceries[length(Groceries)-5:length(Groceries)])
#Find Frequent Itemset
#Find Frequent Itemset
frequentItems = eclat (Groceries, parameter = list(supp = 0.01, minlen= 2, maxlen = 5))
arules::inspect(frequentItems)
frequentItems
inspect(frequentItems[10:15])
#Descending Sort frequent items by count : 1 to 25 itemsets
inspect(sort (frequentItems, by="count", decreasing=TRUE)[1:25])
inspect(sort (frequentItems, by="count", decreasing=F)[1:25])
#Support is : support(A&B) = n(A&B)/ N
#Plot the Frequency Plot
itemFrequencyPlot(Groceries,topN = 15,type="absolute")
itemFrequencyPlot(Groceries, topN = 10, type='relative')
abline(h=0.15)
# Create rules and the relationship between items
#parameters are min filter conditions
rules = apriori(Groceries, parameter = list(supp = 0.005, conf = 0.5, minlen=2))
rules
inspect (rules[1:5])
#Sort Rules by confidence, lift and see the data
rulesc <- sort (rules, by="confidence", decreasing=TRUE)
inspect(rulesc[1:5])
rulesl <- sort (rules, by="lift", decreasing=TRUE)
inspect (rulesl[1:5])
#which items have strong confidence and lift
#How To Control The Number Of Rules in Output ?
#maxlen, minlen, supp, conf
rules2 = apriori (Groceries, parameter = list (supp = 0.001, conf = 0.5, minlen=2, maxlen=6))
inspect(rules2[1:50])
#legend to condition commands
# lhs - means left hand side, or antecendent
# rhs - mean right hand side, or consequent
# items - items, that make up itemsets
# %in% - matches any
# %ain% - matches all
# %pin% - matches partially
# default - no restrictions applied
# & - additional restrictions on lift, confidence etc.
#Find out what events were influenced by a given event
subset1 = subset(rules2, subset=rhs %in% "whole milk") # whole milk in rhs
inspect(subset1[1:10]) #show only 1st 10 rows ; change it to show more
#if no such pattern, no output or index out of range will be printed
subset1 = subset(rules2, subset=rhs %in% 'root vegetables' ) #this item in rhs
inspect(subset1[1:10])
subset2a = subset(rules2, subset=lhs %ain% c('baking powder','soda') ) #all items in lhs
inspect(subset2a)
subset2b = subset(rules2, subset=lhs %in% c('baking powder','soda') ) #any of these in lhs
inspect(subset2b[1:5])
subset3 = subset(rules2, subset=rhs %in% 'bottled beer' & confidence > .7, by = 'lift', decreasing = T)
inspect(subset3)
subset4 = subset(rules2, subset=lhs %in% 'bottled beer' & rhs %in% 'whole milk' )
inspect(subset4[1:5])
#Visualizing The Rules -----
plot(subset1[1:10])
plot(subset1[1:10], measure=c("support", "lift"), shading="confidence")
#
#we can create subset conditions at the time of creation of rules
#Find what factors influenced an event ‘X’
rules3 = apriori (data=Groceries, parameter=list (supp=0.002,conf = 0.8), appearance = list (default="lhs",rhs="whole milk"), control = list (verbose=F))
inspect(rules3[1:5])
inspect(rules3)
rules4 = apriori (data=Groceries, parameter=list (supp=0.001,conf = 0.4), appearance = list (default="rhs",lhs=c('tropical fruit','herbs')), control = list (verbose=F))
inspect(rules4[1:5])
inspect(rules4)
#end of AR
#understand the measures of AR - support, confidence, lift
#apply the rules; find frequent items, sort rules, subset rules on conditions ; plot rule; interpret them; bring changes to your strategy