-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathR_DA.R
76 lines (55 loc) · 1.64 KB
/
R_DA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#Finding the mean of pollutant and ids entered by the user
pollutantmean<-function(pollutant,id=1:332){
means<-vector()
for (i in id){
x<-paste('00',as.character(i),sep='')
y<-substr(x,nchar(x)-2,nchar(x))
df<-read.csv(paste(y,'.csv',sep=''))
a<-mean(df[,pollutant],na.rm=TRUE)
if (is.na(a)==FALSE){means<-c(means,a)}
}
mean(means)
}
#Finding the number of complete observations in each of the entered ids
complete<-function(id=1:332){
nobs<-vector()
for (i in id){
num=0
x<-paste('00',as.character(i),sep='')
y<-substr(x,nchar(x)-2,nchar(x))
df<-read.csv(paste(y,'.csv',sep=''))
null_data<-is.na(df[,c('sulfate','nitrate')])
for (j in 1:nrow(null_data)){
if (null_data[j,1]==FALSE & null_data[j,2]==FALSE){num<-num+1}
}
nobs<-c(nobs,num)
}
nobs
data.frame(i,nobs)
}
#Finding the correlation of the pollutants for ids having complete entries more than the
#given threshold value
corr<-function(threshold=0){
nobs<-vector()
for (i in 1:332){
num=0
x<-paste('00',as.character(i),sep='')
y<-substr(x,nchar(x)-2,nchar(x))
df<-read.csv(paste(y,'.csv',sep=''))
null_data<-is.na(df[,c('sulfate','nitrate')])
for (j in 1:nrow(null_data)){
if (null_data[j,1]==FALSE & null_data[j,2]==FALSE){num<-num+1}
}
if (num>threshold){
c=cor(df[,'sulfate'],df['nitrate'],use='complete.obs')
nobs<-c(nobs,c)
}
}
nobs
}
pollutantmean('nitrate',2:25)
pollutantmean('sulphate')
complete(10:20)
complete()
corr(threshold=1000)
corr()