Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
YGZWQZD committed Sep 16, 2022
1 parent 785abd1 commit d70ad87
Show file tree
Hide file tree
Showing 54 changed files with 418 additions and 2,043 deletions.
Binary file modified Imgs/Overview.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
27 changes: 11 additions & 16 deletions LAMDA_SSL/Algorithm/Classification/Assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@
import LAMDA_SSL.Config.Assemble as config

class Assemble(InductiveEstimator,ClassifierMixin):
def __init__(self, base_estimater=config.base_estimater,T=config.T,alpha=config.alpha,
def __init__(self, base_estimator=config.base_estimator,T=config.T,alpha=config.alpha,
beta=config.beta,evaluation=config.evaluation,verbose=config.verbose,file=config.file):
# >> Parameter:
# >> - base_model: A base learner for ensemble learning.
# >> - base_estimator: A base learner for ensemble learning.
# >> - T: the number of base learners. It is also the number of iterations.
# >> - alpha: the weight of each sample when the sampling distribution is updated.
# >> - Beta: used to initialize the sampling distribution of labeled data and unlabeled data.

self.base_estimater=base_estimater
self.base_estimator=base_estimator
self.T=T
self.alpha=alpha
self.beta=beta
self.KNN=KNeighborsClassifier(n_neighbors=1)
self.KNN=KNeighborsClassifier(n_neighbors=3)
self.evaluation = evaluation
self.verbose=verbose
self.file=file
Expand Down Expand Up @@ -53,10 +53,11 @@ def fit(self,X,y,unlabeled_X):
for i in range(u):
sample_weight[i+l]=(1-self.beta)/u
unlabeled_y=self.KNN.fit(X,y).predict(unlabeled_X)
classfier=copy.deepcopy(self.base_estimater)

classfier=copy.deepcopy(self.base_estimator)
X_all=np.concatenate((X,unlabeled_X))
y_all=np.concatenate((y,unlabeled_y))
classfier.fit(X_all,y_all,sample_weight=sample_weight)
classfier.fit(X_all,y_all,sample_weight=sample_weight*(l+u))

for i in range(self.T):
self.f.append(classfier)
Expand All @@ -65,11 +66,10 @@ def fit(self,X,y,unlabeled_X):
for _ in range(l+u):
if _y_all[_]!=y_all[_]:
epsilon+=sample_weight[_]
if epsilon>0.5:
break
w=np.log((1-epsilon)/epsilon)*0.5
self.w.append(w)

if epsilon>0.5:
break
probas=self.predict_proba(X_all)
logits = np.max(probas, axis=1)
unlabeled_y=self.predict(unlabeled_X)
Expand All @@ -85,9 +85,8 @@ def fit(self,X,y,unlabeled_X):
X_sample=X_all[idx_sample]
y_sample=y_all[idx_sample]
sample_weight_sample=sample_weight[idx_sample]
classfier=copy.deepcopy(self.base_estimater)
classfier=copy.deepcopy(self.base_estimator)
classfier.fit(X_sample,y_sample,sample_weight_sample)

return self

def evaluate(self,X,y=None):
Expand All @@ -98,7 +97,6 @@ def evaluate(self,X,y=None):
self.y_score = self.predict_proba(X)
self.y_pred=self.predict(X)


if self.evaluation is None:
return None
elif isinstance(self.evaluation,(list,tuple)):
Expand All @@ -125,7 +123,4 @@ def evaluate(self,X,y=None):
if self.verbose:
print(performance, file=self.file)
self.performance=performance
return performance



return performance
3 changes: 0 additions & 3 deletions LAMDA_SSL/Algorithm/Classification/Co_Training.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,6 @@ def predict_proba(self, X,X_2=None):
for i, (y1_i_dist, y2_i_dist) in enumerate(zip(y1_proba, y2_proba)):
y_proba[i][0] = (y1_i_dist[0] + y2_i_dist[0]) / 2
y_proba[i][1] = (y1_i_dist[1] + y2_i_dist[1]) / 2

_epsilon = 0.0001
assert all(abs(sum(y_dist) - 1) <= _epsilon for y_dist in y_proba)
return y_proba

def evaluate(self,X,y=None):
Expand Down
2 changes: 2 additions & 0 deletions LAMDA_SSL/Algorithm/Classification/GAT.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def start_fit(self):
self._network.train()

def init_optimizer(self):
self._optimizer = copy.deepcopy(self.optimizer)
if isinstance(self._optimizer,BaseOptimizer):
grouped_parameters=[
dict(params=self._network.conv1.parameters(), weight_decay=self.weight_decay),
Expand All @@ -95,6 +96,7 @@ def init_train_dataloader(self):
def init_train_dataset(self, X=None, y=None, unlabeled_X=None,
edge_index=None,train_mask=None,labeled_mask=None,
unlabeled_mask=None,val_mask=None,test_mask=None):
self._train_dataset = copy.deepcopy(self.train_dataset)
if isinstance(X,Dataset):
X=X.data
if not isinstance(X,Data):
Expand Down
2 changes: 2 additions & 0 deletions LAMDA_SSL/Algorithm/Classification/GCN.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def start_fit(self):
self._network.train()

def init_optimizer(self):
self._optimizer = copy.deepcopy(self.optimizer)
if isinstance(self._optimizer,BaseOptimizer):
grouped_parameters=[
dict(params=self._network.conv1.parameters(), weight_decay=self.weight_decay),
Expand All @@ -92,6 +93,7 @@ def init_train_dataloader(self):
def init_train_dataset(self, X=None, y=None, unlabeled_X=None,
edge_index=None,train_mask=None,labeled_mask=None,
unlabeled_mask=None,val_mask=None,test_mask=None):
self._train_dataset = copy.deepcopy(self.train_dataset)
if isinstance(X,Dataset):
X=X.data
if not isinstance(X,Data):
Expand Down
4 changes: 3 additions & 1 deletion LAMDA_SSL/Algorithm/Classification/ImprovedGAN.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def fit_batch_loop(self,valid_X=None,valid_y=None):
self.valid_performance.update({"epoch_" + str(self._epoch) + "_it_" + str(self.it_epoch): self.performance})

def init_optimizer(self):
self._optimizer = copy.deepcopy(self.optimizer)
if isinstance(self._optimizer,(list,tuple)):
self._optimizerG=self._optimizer[0]
self._optimizerD = self._optimizer[1]
Expand Down Expand Up @@ -217,6 +218,7 @@ def init_optimizer(self):


def init_scheduler(self):
self._scheduler = copy.deepcopy(self.scheduler)
if isinstance(self._scheduler,(list,tuple)):
self._schedulerG=self._scheduler[0]
self._schedulerD = self._scheduler[1]
Expand Down Expand Up @@ -260,7 +262,7 @@ def get_loss_D(self,train_result_D):
logz_label, logz_unlabel, logz_fake = self.log_sum_exp(lb_logits), \
self.log_sum_exp(ulb_logits), \
self.log_sum_exp(fake_logits) # log ∑e^x_i
prob_label = torch.gather(lb_logits, 1, lb_y.unsqueeze(1)) # log e^x_label = x_label
prob_label = torch.gather(lb_logits, 1, lb_y.unsqueeze(1).long()) # log e^x_label = x_label
sup_loss = -torch.mean(prob_label) + torch.mean(logz_label)
unsup_loss = 0.5 * (-torch.mean(logz_unlabel) + torch.mean(F.softplus(logz_unlabel)) + # real_data: log Z/(1+Z)
torch.mean(F.softplus(logz_fake)))
Expand Down
1 change: 1 addition & 0 deletions LAMDA_SSL/Algorithm/Classification/LadderNetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def start_fit(self):
self._network.train()

def init_optimizer(self):
self._optimizer = copy.deepcopy(self.optimizer)
if isinstance(self._optimizer,BaseOptimizer):
self._optimizer=self._optimizer.init_optimizer(params=self._network.parameters())

Expand Down
7 changes: 3 additions & 4 deletions LAMDA_SSL/Algorithm/Classification/LapSVM.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import copy
from torch.utils.data.dataset import Dataset
import LAMDA_SSL.Config.LapSVM as config
from scipy import sparse

class LapSVM(InductiveEstimator,ClassifierMixin):
# Binary
Expand Down Expand Up @@ -60,7 +61,6 @@ def fit(self,X,y,unlabeled_X):

self.X=np.vstack([X,unlabeled_X])
Y=np.diag(y)

if self.distance_function == 'knn':
if self.neighbor_mode=='connectivity':
W = kneighbors_graph(self.X, self.n_neighbor, mode='connectivity',include_self=False)
Expand All @@ -81,8 +81,7 @@ def fit(self,X,y,unlabeled_X):
W = self.distance_function(self.X, self.X)
else:
W=rbf_kernel(self.X,self.X,self.gamma_d)

L = np.diag(np.array(W.sum(0))) - W
L = sparse.csr_matrix(np.diag(np.array(W.sum(0))) - W)

if self.kernel_function == 'rbf':
K = rbf_kernel(self.X,self.X,self.gamma_k)
Expand All @@ -95,12 +94,12 @@ def fit(self,X,y,unlabeled_X):
K = self.kernel_function(self.X, self.X)
else:
K = rbf_kernel(self.X, self.X, self.gamma_k)

num_labeled=X.shape[0]
num_unlabeled=unlabeled_X.shape[0]
J = np.concatenate([np.identity(num_labeled), np.zeros(num_labeled * num_unlabeled).reshape(num_labeled, num_unlabeled)], axis=1)
alpha_star = np.linalg.inv(2 * self.gamma_A * np.identity(num_labeled + num_unlabeled) \
+ ((2 * self.gamma_I) / (num_labeled + num_unlabeled) ** 2) * L.dot(K)).dot(J.T).dot(Y)

Q = Y.dot(J).dot(K).dot(alpha_star)
Q = (Q+Q.T)/2

Expand Down
1 change: 1 addition & 0 deletions LAMDA_SSL/Algorithm/Classification/SDNE.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def init_train_dataloader(self):
def init_train_dataset(self, X=None, y=None, unlabeled_X=None,
edge_index=None,train_mask=None,labeled_mask=None,
unlabeled_mask=None,val_mask=None,test_mask=None):
self._train_dataset = copy.deepcopy(self.train_dataset)
if isinstance(X,Dataset):
X=X.data
if not isinstance(X,Data):
Expand Down
5 changes: 1 addition & 4 deletions LAMDA_SSL/Algorithm/Classification/SSVAE.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def start_fit(self):
self.dim_in=self.dim_in if self.dim_in is not None else \
self._train_dataset.labeled_dataset.X.shape[1:]
if self.network is None:
self.network=VAE.SSVAE( dim_in=self.dim_in,num_classes=self.num_classes,dim_z=self.dim_z,
self.network=VAE.SSVAE(dim_in=self.dim_in,num_classes=self.num_classes,dim_z=self.dim_z,
dim_hidden_de=self.dim_hidden_de,activations_de=self.activations_de,
dim_hidden_en_y=self.dim_hidden_en_y, activations_en_y=self.activations_en_y,
dim_hidden_en_z=self.dim_hidden_en_z, activations_en_z=self.activations_en_z,
Expand All @@ -154,7 +154,6 @@ def train(self,lb_X=None,lb_y=None,ulb_X=None,lb_idx=None,ulb_idx=None,*args,**k
lb_X = lb_X[0] if isinstance(lb_X,(list,tuple)) else lb_X
lb_y=lb_y[0] if isinstance(lb_y,(list,tuple)) else lb_y
ulb_X=ulb_X[0]if isinstance(ulb_X,(list,tuple)) else ulb_X

lb_X=lb_X.view(lb_X.shape[0],-1).bernoulli()
ulb_X = ulb_X.view(ulb_X.shape[0], -1).bernoulli()
lb_q_y = self._network.encode_y(lb_X)
Expand Down Expand Up @@ -204,8 +203,6 @@ def get_loss(self,train_result,*args,**kwargs):
loss=sup_loss+unsup_loss+cls_loss
return loss



def optimize(self,loss,*args,**kwargs):
self._network.zero_grad()
loss.backward()
Expand Down
74 changes: 31 additions & 43 deletions LAMDA_SSL/Base/DeepModelMixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,39 +132,6 @@ def __init__(self, train_dataset=None,

self.parallel=parallel
self.verbose=verbose
self._optimizer=copy.deepcopy(self.optimizer)
self._network=copy.deepcopy(self.network)
self._scheduler=copy.deepcopy(self.scheduler)

self._train_sampler=copy.deepcopy(self.train_sampler)
self._labeled_sampler = copy.deepcopy(self.labeled_sampler)
self._unlabeled_sampler = copy.deepcopy(self.unlabeled_sampler)
self._valid_sampler = copy.deepcopy(self.valid_sampler)
self._test_sampler=copy.deepcopy(self.test_sampler)

self._train_batch_sampler=copy.deepcopy(self.train_batch_sampler)
self._labeled_batch_sampler = copy.deepcopy(self.labeled_batch_sampler)
self._unlabeled_batch_sampler = copy.deepcopy(self.unlabeled_batch_sampler)
self._valid_batch_sampler=copy.deepcopy(self.valid_batch_sampler)
self._test_batch_sampler=copy.deepcopy(self.test_batch_sampler)

self._train_dataset=copy.deepcopy(self.train_dataset)
self._labeled_dataset = copy.deepcopy(self.labeled_dataset)
self._unlabeled_dataset = copy.deepcopy(self.unlabeled_dataset)
self._valid_dataset = copy.deepcopy(self.valid_dataset)
self._test_dataset=copy.deepcopy(self.test_dataset)

self._train_dataloader=copy.deepcopy(self.train_dataloader)
self._labeled_dataloader = copy.deepcopy(self.labeled_dataloader)
self._unlabeled_dataloader = copy.deepcopy(self.unlabeled_dataloader)
self._valid_dataloader=copy.deepcopy(self.valid_dataloader)
self._test_dataloader = copy.deepcopy(self.test_dataloader)

self._augmentation=copy.deepcopy(self.augmentation)
self._evaluation=copy.deepcopy(self.evaluation)
self._parallel=copy.deepcopy(self.parallel)

self._epoch=0
self.it_epoch=0
self.it_total=0
self.loss=None
Expand All @@ -177,18 +144,11 @@ def __init__(self, train_dataset=None,
if isinstance(file,str):
file=open(file,"w")
self.file=file
self._estimate_dataloader=None
self._estimator_type=None
if self._network is not None:
self.init_model()
self.init_ema()
self.init_optimizer()
self.init_scheduler()
self.init_augmentation()
self.init_transform()


def init_model(self):
self._network = copy.deepcopy(self.network)
self._parallel = copy.deepcopy(self.parallel)
if self.device is None:
self.device='cpu'
if self.device is not 'cpu':
Expand All @@ -205,6 +165,7 @@ def init_ema(self):
self.ema=None

def init_optimizer(self):
self._optimizer=copy.deepcopy(self.optimizer)
if isinstance(self._optimizer,BaseOptimizer):
no_decay = ['bias', 'bn']
grouped_parameters = [
Expand All @@ -216,6 +177,7 @@ def init_optimizer(self):
self._optimizer=self._optimizer.init_optimizer(params=grouped_parameters)

def init_scheduler(self):
self._scheduler=copy.deepcopy(self.scheduler)
if isinstance(self._scheduler,BaseScheduler):
self._scheduler=self._scheduler.init_scheduler(optimizer=self._optimizer)

Expand All @@ -228,6 +190,7 @@ def init_epoch(self):
self.epoch=ceil(self.num_it_total/self.num_it_epoch)

def init_augmentation(self):
self._augmentation = copy.deepcopy(self.augmentation)
if self._augmentation is not None:
if isinstance(self._augmentation, dict):
self.weak_augmentation = self._augmentation['augmentation'] \
Expand All @@ -250,6 +213,7 @@ def init_transform(self):
self._train_dataset.add_unlabeled_transform(self.weak_augmentation, dim=1, x=0, y=0)

def init_train_dataset(self,X=None,y=None,unlabeled_X=None, *args, **kwargs):
self._train_dataset=copy.deepcopy(self.train_dataset)
if isinstance(X,TrainDataset):
self._train_dataset=X
elif isinstance(X,Dataset) and y is None:
Expand All @@ -258,6 +222,15 @@ def init_train_dataset(self,X=None,y=None,unlabeled_X=None, *args, **kwargs):
self._train_dataset.init_dataset(labeled_X=X, labeled_y=y,unlabeled_X=unlabeled_X)

def init_train_dataloader(self):
self._train_dataloader=copy.deepcopy(self.train_dataloader)
self._labeled_dataloader = copy.deepcopy(self.labeled_dataloader)
self._unlabeled_dataloader = copy.deepcopy(self.unlabeled_dataloader)
self._train_sampler=copy.deepcopy(self.train_sampler)
self._labeled_sampler = copy.deepcopy(self.labeled_sampler)
self._unlabeled_sampler = copy.deepcopy(self.unlabeled_sampler)
self._train_batch_sampler=copy.deepcopy(self.train_batch_sampler)
self._labeled_batch_sampler = copy.deepcopy(self.labeled_batch_sampler)
self._unlabeled_batch_sampler = copy.deepcopy(self.unlabeled_batch_sampler)
if self._train_dataloader is not None:
self._labeled_dataloader,self._unlabeled_dataloader=self._train_dataloader.init_dataloader(dataset=self._train_dataset,
sampler=self._train_sampler,
Expand Down Expand Up @@ -351,12 +324,21 @@ def end_fit(self, *args, **kwargs):
def fit(self,X=None,y=None,unlabeled_X=None,valid_X=None,valid_y=None):
self.init_train_dataset(X,y,unlabeled_X)
self.init_train_dataloader()
if self.network is not None:
self.init_model()
self.init_ema()
self.init_optimizer()
self.init_scheduler()
self.init_augmentation()
self.init_transform()
self.start_fit()
self.fit_epoch_loop(valid_X,valid_y)
self.end_fit()
return self

def init_estimate_dataset(self, X=None,valid=False):
self._valid_dataset = copy.deepcopy(self.valid_dataset)
self._test_dataset=copy.deepcopy(self.test_dataset)
if valid:
if isinstance(X,Dataset):
self._valid_dataset=X
Expand All @@ -369,6 +351,12 @@ def init_estimate_dataset(self, X=None,valid=False):
self._test_dataset=self._test_dataset.init_dataset(X=X)

def init_estimate_dataloader(self,valid=False):
self._valid_dataloader=copy.deepcopy(self.valid_dataloader)
self._test_dataloader = copy.deepcopy(self.test_dataloader)
self._valid_sampler = copy.deepcopy(self.valid_sampler)
self._test_sampler=copy.deepcopy(self.test_sampler)
self._valid_batch_sampler=copy.deepcopy(self.valid_batch_sampler)
self._test_batch_sampler=copy.deepcopy(self.test_batch_sampler)
if valid:
self._estimate_dataloader=self._valid_dataloader.init_dataloader(self._valid_dataset,
sampler=self._valid_sampler,
Expand Down Expand Up @@ -402,7 +390,7 @@ def predict_batch_loop(self):
idx=to_device(idx,self.device)
X=X[0] if isinstance(X,(list,tuple)) else X
X=to_device(X,self.device)
_est=self.estimate(X=X,idx=idx)
_est = self.estimate(X=X,idx=idx)
_est = _est[0] if isinstance(_est,(list,tuple)) else _est
self.y_est=torch.cat((self.y_est,_est),0)
self.end_predict_batch()
Expand Down
2 changes: 1 addition & 1 deletion LAMDA_SSL/Config/Assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from LAMDA_SSL.Evaluation.Classifier.AUC import AUC
from LAMDA_SSL.Evaluation.Classifier.Confusion_Matrix import Confusion_Matrix

base_estimater=SVC(C=1.0,kernel='rbf',probability=True,gamma='auto')
base_estimator=SVC(C=1.0,kernel='rbf',probability=True,gamma='auto')
T=100
alpha=1
beta=0.9
Expand Down
2 changes: 1 addition & 1 deletion LAMDA_SSL/Config/PseudoLabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,4 @@
threshold=0.95
lambda_u=1
warmup=0.4
mu=1,
mu=1
Loading

0 comments on commit d70ad87

Please sign in to comment.