-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathriver_insects.py
53 lines (37 loc) · 1.16 KB
/
river_insects.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from river.datasets import synth
from river import evaluate
from river import metrics
from river import tree
from river import dummy
from river import preprocessing
import pandas as pd
import itertools
gen = synth.SEA()#synth.Agrawal(classification_function=0, seed=42)
dataset = iter(gen.take(10))
print(next(dataset), "dit is dataset")
data = pd.read_csv("USP/INSECTS_abrupt_imbalanced.csv",header=None)
#retrieve labels
labels = data[33].to_list()
print(data[33].value_counts())
#labels = [True if label==1 else False for label in labels]
#data = data.drop(data.columns[[0, 1, 8]], axis=1)
print(data.head())
data_list = []
def itfunc(k):
scaler = preprocessing.MinMaxScaler()
for i in range(k):
x = {idx:feature for idx,feature in enumerate(data.loc[i])}
scaler.learn_one(x)
x = scaler.transform_one(x)
print(x)
y = labels[i]
yield x, y
data_test = iter(itfunc(10))
model = tree.ExtremelyFastDecisionTreeClassifier(
grace_period=100,
delta=1e-5,
min_samples_reevaluate=100
)
metric = metrics.MCC()
print(evaluate.progressive_val_score(data_test, model, metric))
print(model.n_drifts_detected())