-
Notifications
You must be signed in to change notification settings - Fork 0
/
eda_app.py
78 lines (67 loc) · 3.38 KB
/
eda_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import streamlit as st
import numpy as np
import joblib
import os
import pickle
Dataset_Info = """
Dataset Cyberbullying Tweets
Dataset ini berisi lebih dari 47.000 tweet Data telah diseimbangkan untuk memuat sekitar 8.000 instansi untuk setiap kelas.
Informasi Kategori:
- Usia: Rentang usia dari pengguna tweet.
- Etnis: Informasi tentang latar belakang etnis dari pengguna.
- Jenis Kelamin: Jenis kelamin dari pengguna tweet.
- Agama: Informasi tentang agama pengguna tweet.
Jenis Penindasan Maya Lainnya: Kategori-kategori khusus dari penindasan maya yang mungkin terjadi.
Bukan Penindasan Maya: Kategori untuk tweet yang tidak terkait dengan penindasan maya.
Source :
https://www.kaggle.com/datasets/andrewmvd/cyberbullying-classification
"""
# Dictionary for images before cleaning
images_before = {
"Cyberbullying Type Count": "images/Output per cyberbullying type.png",
"Average Tweet Length": "images/average tweet length.png",
"Word Cloud for not cyberbullying": "images/word cloud for non cyberbullying before.png",
"Word Cloud for gender": "images/word cloud for gender before.png",
"Word Cloud for religion": "images/word cloud for religion before.png",
"Word Cloud for age": "images/word cloud for age before.png",
"Word Cloud for ethnicity": "images/word cloud for ethnicity before.png",
"Word Cloud for other cyberbullying": "images/word cloud for other cyberbullying before.png"
}
image_cleaning = {
"Process for cleaning data": "images/cleaning data.png",
"Results from cleaned data": "images/result from cleaning data.png",
"Eesults of the data table after data cleaning": "images/df cleaning data head.png"
}
# Dictionary for images after cleaning
images_after = {
"Average Tweet Length": "images/average tweet length after.png",
"Word Cloud for not cyberbullying": "images/word cloud for non cyberbullying after.png",
"Word Cloud for gender": "images/word cloud for gender after.png",
"Word Cloud for religion": "images/word cloud for religion after.png",
"Word Cloud for age": "images/word cloud for age after.png",
"Word Cloud for ethnicity": "images/word cloud for ethnicity after.png"
}
@st.cache_data
def load_model(model_file):
model_path = os.path.join("models", model_file)
loaded_model = joblib.load(open(model_path, "rb"))
return loaded_model
def run_eda_app():
st.subheader("EDA section")
with st.expander("Dataset Information"):
st.markdown(Dataset_Info)
st.subheader("EDA preprocessing before cleaning")
# Loop through images_before dictionary
for caption, image_path in images_before.items():
with st.expander(caption):
st.image(image_path, caption=caption)
st.subheader("Process data cleaning")
# Loop through image_cleaning dictionary
for caption, image_path in image_cleaning.items():
with st.expander(caption):
st.image(image_path, caption=caption)
st.subheader("EDA preprocessing after cleaning")
# Loop through images_after dictionary
for caption, image_path in images_after.items():
with st.expander(caption):
st.image(image_path, caption=caption)