Skip to content

Commit

Permalink
update more scripts to accomodate integration of NCES2 and ROCES
Browse files Browse the repository at this point in the history
  • Loading branch information
Jean-KOUAGOU committed Jan 10, 2025
1 parent 5caefa4 commit 4b5fa54
Show file tree
Hide file tree
Showing 12 changed files with 23 additions and 37 deletions.
13 changes: 10 additions & 3 deletions examples/concept_learning_cv_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
import platform
import pandas as pd
from ontolearn.knowledge_base import KnowledgeBase
from ontolearn.concept_learner import CELOE, OCEL, EvoLearner, NCES, CLIP
from ontolearn.concept_learner import CELOE, EvoLearner, NCES, CLIP
from ontolearn.refinement_operators import ExpressRefinement, ModifiedCELOERefinement
from ontolearn.learners import Drill, TDL
from ontolearn.learners import Drill, TDL, OCEL
from ontolearn.learning_problem import PosNegLPStandard
from ontolearn.metrics import F1
from owlapy.owl_individual import OWLNamedIndividual, IRI
Expand Down Expand Up @@ -75,9 +75,11 @@ def dl_concept_learning(args):
with open(args.lps) as json_file:
settings = json.load(json_file)
kb = KnowledgeBase(path=args.kb)

ocel = OCEL(knowledge_base=kb,
quality_func=F1(),
max_runtime=args.max_runtime)

celoe = CELOE(knowledge_base=kb,
quality_func=F1(),
max_runtime=args.max_runtime)
Expand All @@ -93,6 +95,7 @@ def dl_concept_learning(args):
nces = NCES(knowledge_base_path=args.kb,
quality_func=F1(),
path_of_embeddings=args.path_of_nces_embeddings,
path_of_trained_models=args.path_of_nces_trained_models,
learner_names=["LSTM", "GRU", "SetTransformer"],
num_predictions=100,
verbose=0)
Expand Down Expand Up @@ -146,7 +149,9 @@ def dl_concept_learning(args):
neg={OWLNamedIndividual(i) for i in train_neg})

test_lp = PosNegLPStandard(pos={OWLNamedIndividual(i) for i in test_pos},

neg={OWLNamedIndividual(i) for i in test_neg})

print("OCEL starts..", end="\t")
start_time = time.time()
pred_ocel = ocel.fit(train_lp).best_hypotheses()
Expand All @@ -168,6 +173,7 @@ def dl_concept_learning(args):
print(f"OCEL Test Quality: {test_f1_ocel:.3f}", end="\t")
print(f"OCEL Runtime: {rt_ocel:.3f}")


print("CELOE starts..", end="\t")
start_time = time.time()
pred_celoe = celoe.fit(train_lp).best_hypotheses()
Expand All @@ -184,7 +190,7 @@ def dl_concept_learning(args):
# Reporting
data.setdefault("Train-F1-CELOE", []).append(train_f1_celoe)
data.setdefault("Test-F1-CELOE", []).append(test_f1_celoe)
data.setdefault("RT-CELOE", []).append(rt_ocel)
data.setdefault("RT-CELOE", []).append(rt_celoe)
print(f"CELOE Train Quality: {train_f1_celoe:.3f}", end="\t")
print(f"CELOE Test Quality: {test_f1_celoe:.3f}", end="\t")
print(f"CELOE Runtime: {rt_celoe:.3f}")
Expand Down Expand Up @@ -318,6 +324,7 @@ def dl_concept_learning(args):
help="Knowledge base")
parser.add_argument("--path_drill_embeddings", type=str, default=None)
parser.add_argument("--path_of_nces_embeddings", type=str, default=None)
parser.add_argument("--path_of_nces_trained_models", type=str, default=None)
parser.add_argument("--path_of_clip_embeddings", type=str, default=None)
parser.add_argument("--report", type=str, default="report.csv")
parser.add_argument("--random_seed", type=int, default=1)
Expand Down
2 changes: 2 additions & 0 deletions ontolearn/base_nces.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def __init__(self, knowledge_base_path, nces2_or_roces, quality_func, num_predic
self.vocab = {vocab[i]: i for i in range(len(vocab))}
if quality_func is None:
self.quality_func = F1()
else:
self.quality_func = quality_func
self.num_predictions = num_predictions
self.auto_train = auto_train
self.proj_dim = proj_dim
Expand Down
4 changes: 1 addition & 3 deletions ontolearn/clip_architectures.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
# -----------------------------------------------------------------------------

import torch, torch.nn as nn
import random
from typing import List
from ontolearn.nces_modules import *
from ontolearn.nces_modules import ISAB, PMA

class LengthLearner_LSTM(nn.Module):
"""LSTM architecture"""
Expand Down
7 changes: 1 addition & 6 deletions ontolearn/clip_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,10 @@
import copy
import torch
from tqdm import trange
from collections import defaultdict
import os
import json
from torch.optim.lr_scheduler import ExponentialLR
from torch.nn import functional as F
from torch.nn.utils import clip_grad_value_
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import f1_score, accuracy_score
import time

Expand Down Expand Up @@ -68,17 +65,15 @@ def get_optimizer(self, length_predictor, optimizer='Adam'): # pragma: no cover
def show_num_learnable_params(self):
print("*"*20+"Trainable model size"+"*"*20)
size = sum([p.numel() for p in self.clip.length_predictor.parameters()])
size_ = 0
print("Length Predictor: ", size)
print("*"*20+"Trainable model size"+"*"*20)
print()
return size

def train(self, train_dataloader, save_model=True, optimizer='Adam', record_runtime=True):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if isinstance(self.clip.length_predictor, list):
self.clip.length_predictor = copy.deepcopy(self.clip.length_predictor[0])
model_size = self.show_num_learnable_params()
self.show_num_learnable_params()
if device.type == "cpu":
print("Training on CPU, it may take long...")
else:
Expand Down
13 changes: 7 additions & 6 deletions ontolearn/concept_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,15 +599,16 @@ def __init__(self,
self.output_size = output_size
self.num_examples = num_examples
self.path_of_embeddings = path_of_embeddings
assert os.path.isfile(self.path_of_embeddings), '!!! Wrong path for CLIP embeddings'
self.instance_embeddings = pd.read_csv(path_of_embeddings, index_col=0)
self.input_size = self.instance_embeddings.shape[1]
if self.path_of_embeddings:
assert os.path.isfile(self.path_of_embeddings), '!!! Wrong path for CLIP embeddings'
self.instance_embeddings = pd.read_csv(path_of_embeddings, index_col=0)
self.input_size = self.instance_embeddings.shape[1]
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.length_predictor = self.get_length_predictor()

def get_length_predictor(self):
def load_model(predictor_name, load_pretrained):
if predictor_name is None:
if predictor_name is None or not self.path_of_embeddings:
return []
if predictor_name == 'SetTransformer':
model = LengthLearner_SetTransformer(self.input_size, self.output_size, proj_dim=256, num_heads=4,
Expand Down Expand Up @@ -724,7 +725,7 @@ def fit(self, *args, **kwargs):
else:
self._max_runtime = self.max_runtime

if (self.pretrained_predictor_name is not None) and (self.length_predictor is not None):
if (self.pretrained_predictor_name is not None) and self.length_predictor[0] != []:
x_pos, x_neg = self.pos_neg_to_tensor(list(self._learning_problem.kb_pos)[:self.num_examples],
list(self._learning_problem.kb_neg)[:self.num_examples])
max_length = self.predict_length(self.length_predictor, x_pos, x_neg)
Expand Down Expand Up @@ -870,7 +871,7 @@ def get_synthesizer(self, path=None):
self.vocab = vocab
self.inv_vocab = inv_vocab
except Exception as e:
print(e+'\n')
print(e,'\n')
raise FileNotFoundError(f"{path} does not contain at least one of `vocab.json, inv_vocab.npy or embedding_config.json`")
elif self.load_pretrained and self.path_of_trained_models and glob.glob(self.path_of_trained_models + "/*.pt"):
# Read pretrained model's vocabulary and config files
Expand Down
5 changes: 1 addition & 4 deletions ontolearn/nces_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@
import torch
from torch.utils.data import DataLoader
from tqdm import trange
from collections import defaultdict
import os, random
import os
import json
from torch.optim.lr_scheduler import ExponentialLR
from torch.nn import functional as F
Expand Down Expand Up @@ -161,8 +160,6 @@ def map_to_token(self, idx_array):


def train_step(self, batch, model, emb_model, optimizer, device, triples_dataloader=None):
soft_acc, hard_acc = [], []
train_losses = []
if emb_model:
try:
triples_batch = next(triples_dataloader)
Expand Down
5 changes: 0 additions & 5 deletions tests/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@
from ontolearn.refinement_operators import ExpressRefinement
from ontolearn.knowledge_base import KnowledgeBase
from owlapy.parser import DLSyntaxParser
import sys
from ontolearn.metrics import F1
import time
import random
import unittest
import os
import warnings
warnings.filterwarnings("ignore")
Expand Down
3 changes: 1 addition & 2 deletions tests/test_clip_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
import os
import json
import random
import unittest
import warnings
warnings.filterwarnings("ignore")
import os

def seed_everything():
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
Expand Down
2 changes: 0 additions & 2 deletions tests/test_nces.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from ontolearn.concept_learner import NCES
from ontolearn.knowledge_base import KnowledgeBase
from owlapy.parser import DLSyntaxParser
from ontolearn.metrics import F1
from ontolearn.learning_problem import PosNegLPStandard
import subprocess
import random
import unittest
import os
Expand Down
2 changes: 0 additions & 2 deletions tests/test_nces2.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from ontolearn.concept_learner import NCES2
from ontolearn.knowledge_base import KnowledgeBase
from owlapy.parser import DLSyntaxParser
from ontolearn.metrics import F1
from ontolearn.learning_problem import PosNegLPStandard
import subprocess
import random
import unittest
import os
Expand Down
2 changes: 0 additions & 2 deletions tests/test_nces_trainer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from ontolearn.concept_learner import NCES
import time
import random
import unittest
import os
import json
import numpy as np
import torch
import pathlib
Expand Down
2 changes: 0 additions & 2 deletions tests/test_roces.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from ontolearn.concept_learner import ROCES
from ontolearn.knowledge_base import KnowledgeBase
from owlapy.parser import DLSyntaxParser
from ontolearn.metrics import F1
from ontolearn.learning_problem import PosNegLPStandard
import subprocess
import random
import unittest
import os
Expand Down

0 comments on commit 4b5fa54

Please sign in to comment.