-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean.py
20 lines (16 loc) · 1.07 KB
/
clean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import pandas as pd
import numpy as np
import datetime as dt
import helpers as hp
import streamlit as st
dashboard = pd.read_csv(r"dashboard.csv", encoding= 'unicode_escape')
cols = ['unit','name','email','date_of_birth','phone number','next of kin','phone number.1','title','grade','started_at_ahc','qualification','english_level','residence',
'old','housing','med.ins','allowance','gender','turnover','cause','ended_at_ahc','year']
dashboard = dashboard[cols]
dashboard.rename(columns=hp.rename_cols, inplace=True)
dashboard.iloc[:, 13:19] = dashboard[dashboard.columns[13:19].tolist()].applymap(hp.replace_values)
dashboard.loc[:, ["date_of_birth", "started_at_ahc", "ended_at_ahc"]] = dashboard.loc[:, ["date_of_birth", "started_at_ahc", "ended_at_ahc"]].applymap(hp.to_datetime)
dashboard["age"] = hp.subtract_from_current_date(dashboard["date_of_birth"])
dashboard["years_of_experience"] = dashboard.apply(hp.years_of_experience, axis=1)
dashboard["interval_experience"] = dashboard["years_of_experience"].apply(hp.intervals)
dashboard.to_csv("final_dataset.csv", index=False)