forked from imironica/Support-Tickets-Classification
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathpreprocess.R
79 lines (60 loc) · 3.14 KB
/
preprocess.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
library(dplyr)
dfEmails = read.csv("./datasets/Emails.csv")
dfIncidents = read.csv("./datasets/incidents.csv")
dfRequests = read.csv("./datasets/sc_req_item.csv")
dfEmails$category = ""
dfEmails$sub_category1 = ""
dfEmails$sub_category2 = ""
dfIncidents$ticket_type = "Incident"
dfRequests$ticket_type = "Request"
dfRequests$urgency = ""
dfRequests$impact = ""
dfRequests$priority = ""
columnsEmail <- c('Summary', 'Description', 'Record.Type',
'Business.service..task.','category', 'sub_category1', 'sub_category2')
columnsIncident <- c('Summary', 'Description', 'ticket_type',
'Business.Service..Incident.', 'Category', 'Sub.Category', 'Sub.Category.2')
columnsRequest <- c('short_description', 'description', 'ticket_type',
'business_service', 'u_category', 'u_sub_category', 'u_sub_category_2')
columnsExport <- c('title', 'body', 'ticket_type',
'business_service','category', 'sub_category1', 'sub_category2'
)
dfEmailsExport <- dfEmails[,columnsEmail]
dfIncidentsExport <- dfIncidents[,columnsIncident]
dfRequestsExport <- dfRequests[,columnsRequest]
colnames(dfEmailsExport) <- columnsExport
colnames(dfIncidentsExport) <- columnsExport
colnames(dfRequestsExport) <- columnsExport
write.csv(dfEmailsExport,"preprocessed_emails.csv", fileEncoding = "UTF-8", row.names = FALSE)
write.csv(dfRequestsExport,"preprocessed_requests.csv", fileEncoding = "UTF-8", row.names = FALSE)
write.csv(dfIncidentsExport,"preprocessed_incidents.csv", fileEncoding = "UTF-8", row.names = FALSE)
library(dplyr)
library(plotly)
dfTickets = bind_rows(dfIncidentsExport, dfRequestsExport)
#ticket_type
dfTickets %>% group_by(ticket_type) %>%
summarise(TicketsType = length(body)) %>% ungroup() -> ticket_types
plot_ly(ticket_types, x = ~ticket_type) %>%
add_trace(y = ~TicketsType, name = 'Total %', showlegend=TRUE, type = 'bar', mode = 'lines+markers') %>%
layout(title="Number of tickets type",
xaxis = list(title = "Type of tickets",showticklabels = TRUE, tickangle = 45, tickfont = list(size = 8)),
yaxis = list(title = "Total"),
hovermode = 'compare')
#category
dfTickets %>% group_by(category) %>%
summarise(Category = length(body)) %>% ungroup() -> categories
plot_ly(categories, x = ~category) %>%
add_trace(y = ~Category, name = 'Total', showlegend=TRUE, type = 'bar', mode = 'legendgroup') %>%
layout(title="Number of categories",
xaxis = list(title = "Categories",showticklabels = TRUE, tickangle = 45, tickfont = list(size = 9)),
yaxis = list(title = "Total"),
hovermode = 'compare')
#business_service
dfTickets %>% group_by(business_service) %>%
summarise(BusinessService = length(body)) %>% ungroup() -> business_services
plot_ly(business_services, x = ~business_service) %>%
add_trace(y = ~BusinessService, name = 'Total', showlegend=TRUE, type = 'bar', mode = 'legendgroup') %>%
layout(title="Number of business services",
xaxis = list(title = "Business services",showticklabels = TRUE, tickangle = 45, tickfont = list(size = 9)),
yaxis = list(title = "Total"),
hovermode = 'compare')