-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMatch by name
29 lines (22 loc) · 992 Bytes
/
Match by name
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
breachdata<-readLines("/Users/jjw6286/Downloads/CSR_databreach/Namesfirm.csv")
allsymble<-unique(data[,1:3])
library(stringdist)
matchfirmwithsymbles<-function(input, allsymble){
section<-unlist(strsplit(input,","))
ID<-section[1]
time<-unlist(strsplit(as.matrix(section[2]),"/"))
day<-time[2]
month<-time[1]
year<-paste("20",time[3], sep="")
firmname<-as.matrix(paste(section[3:length(section)],collapse=" "))
listdist<-stringdist(firmname,allsymble[,1],method="qgram")
match<-allsymble[which(listdist==min(listdist))[1],]
dist<-listdist[which(listdist==min(listdist))[1]]
output<-data.frame(day)
output<-cbind(output,month,year,firmname,match,dist,ID)
}
results<-lapply(breachdata[2:length(breachdata)],matchfirmwithsymbles, allsymble)
library(data.table)
results<-rbindlist(results)
#handclean
write.table(results,file="/Users/jjw6286/Downloads/CSR_databreach/publicdatabreach.csv", sep=",",row.names=FALSE)