update more scripts to accomodate integration of NCES2 and ROCES

dice-group · Jan 10, 2025 · 4b5fa54 · 4b5fa54
1 parent 5caefa4
commit 4b5fa54
Show file tree

Hide file tree

Showing 12 changed files with 23 additions and 37 deletions.
diff --git a/examples/concept_learning_cv_evaluation.py b/examples/concept_learning_cv_evaluation.py
@@ -13,9 +13,9 @@
 import platform
 import pandas as pd
 from ontolearn.knowledge_base import KnowledgeBase
-from ontolearn.concept_learner import CELOE, OCEL, EvoLearner, NCES, CLIP
+from ontolearn.concept_learner import CELOE, EvoLearner, NCES, CLIP
 from ontolearn.refinement_operators import ExpressRefinement, ModifiedCELOERefinement
-from ontolearn.learners import Drill, TDL
+from ontolearn.learners import Drill, TDL, OCEL
 from ontolearn.learning_problem import PosNegLPStandard
 from ontolearn.metrics import F1
 from owlapy.owl_individual import OWLNamedIndividual, IRI
@@ -75,9 +75,11 @@ def dl_concept_learning(args):
     with open(args.lps) as json_file:
         settings = json.load(json_file)
     kb = KnowledgeBase(path=args.kb)
+
     ocel = OCEL(knowledge_base=kb,
                 quality_func=F1(),
                 max_runtime=args.max_runtime)
+
     celoe = CELOE(knowledge_base=kb,
                   quality_func=F1(),
                   max_runtime=args.max_runtime)
@@ -93,6 +95,7 @@ def dl_concept_learning(args):
     nces = NCES(knowledge_base_path=args.kb,
                 quality_func=F1(),
                 path_of_embeddings=args.path_of_nces_embeddings,
+                path_of_trained_models=args.path_of_nces_trained_models,
                 learner_names=["LSTM", "GRU", "SetTransformer"],
                 num_predictions=100,
                 verbose=0)
@@ -146,7 +149,9 @@ def dl_concept_learning(args):
                                         neg={OWLNamedIndividual(i) for i in train_neg})
 
             test_lp = PosNegLPStandard(pos={OWLNamedIndividual(i) for i in test_pos},
+
                                        neg={OWLNamedIndividual(i) for i in test_neg})
+
             print("OCEL starts..", end="\t")
             start_time = time.time()
             pred_ocel = ocel.fit(train_lp).best_hypotheses()
@@ -168,6 +173,7 @@ def dl_concept_learning(args):
             print(f"OCEL Test Quality: {test_f1_ocel:.3f}", end="\t")
             print(f"OCEL Runtime: {rt_ocel:.3f}")
 
+
             print("CELOE starts..", end="\t")
             start_time = time.time()
             pred_celoe = celoe.fit(train_lp).best_hypotheses()
@@ -184,7 +190,7 @@ def dl_concept_learning(args):
             # Reporting
             data.setdefault("Train-F1-CELOE", []).append(train_f1_celoe)
             data.setdefault("Test-F1-CELOE", []).append(test_f1_celoe)
-            data.setdefault("RT-CELOE", []).append(rt_ocel)
+            data.setdefault("RT-CELOE", []).append(rt_celoe)
             print(f"CELOE Train Quality: {train_f1_celoe:.3f}", end="\t")
             print(f"CELOE Test Quality: {test_f1_celoe:.3f}", end="\t")
             print(f"CELOE Runtime: {rt_celoe:.3f}")
@@ -318,6 +324,7 @@ def dl_concept_learning(args):
                         help="Knowledge base")
     parser.add_argument("--path_drill_embeddings", type=str, default=None)
     parser.add_argument("--path_of_nces_embeddings", type=str, default=None)
+    parser.add_argument("--path_of_nces_trained_models", type=str, default=None)
     parser.add_argument("--path_of_clip_embeddings", type=str, default=None)
     parser.add_argument("--report", type=str, default="report.csv")
     parser.add_argument("--random_seed", type=int, default=1)

diff --git a/ontolearn/base_nces.py b/ontolearn/base_nces.py
@@ -61,6 +61,8 @@ def __init__(self, knowledge_base_path, nces2_or_roces, quality_func, num_predic
         self.vocab = {vocab[i]: i for i in range(len(vocab))}
         if quality_func is None:
             self.quality_func = F1()
+        else:
+            self.quality_func = quality_func
         self.num_predictions = num_predictions
         self.auto_train = auto_train
         self.proj_dim = proj_dim

diff --git a/ontolearn/clip_architectures.py b/ontolearn/clip_architectures.py
@@ -23,9 +23,7 @@
 # -----------------------------------------------------------------------------
 
 import torch, torch.nn as nn
-import random
-from typing import List
-from ontolearn.nces_modules import *    
+from ontolearn.nces_modules import ISAB, PMA
 
 class LengthLearner_LSTM(nn.Module):
     """LSTM architecture"""

diff --git a/ontolearn/clip_trainer.py b/ontolearn/clip_trainer.py
@@ -26,13 +26,10 @@
 import copy
 import torch
 from tqdm import trange
-from collections import defaultdict
 import os
 import json
 from torch.optim.lr_scheduler import ExponentialLR
-from torch.nn import functional as F
 from torch.nn.utils import clip_grad_value_
-from torch.nn.utils.rnn import pad_sequence
 from sklearn.metrics import f1_score, accuracy_score
 import time
 
@@ -68,17 +65,15 @@ def get_optimizer(self, length_predictor, optimizer='Adam'):  # pragma: no cover
     def show_num_learnable_params(self):
         print("*"*20+"Trainable model size"+"*"*20)
         size = sum([p.numel() for p in self.clip.length_predictor.parameters()])
-        size_ = 0
         print("Length Predictor: ", size)
         print("*"*20+"Trainable model size"+"*"*20)
         print()
-        return size
 
     def train(self, train_dataloader, save_model=True, optimizer='Adam', record_runtime=True):
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         if isinstance(self.clip.length_predictor, list):
             self.clip.length_predictor = copy.deepcopy(self.clip.length_predictor[0])
-        model_size = self.show_num_learnable_params()
+        self.show_num_learnable_params()
         if device.type == "cpu":
             print("Training on CPU, it may take long...")
         else:

diff --git a/ontolearn/concept_learner.py b/ontolearn/concept_learner.py
@@ -599,15 +599,16 @@ def __init__(self,
         self.output_size = output_size
         self.num_examples = num_examples
         self.path_of_embeddings = path_of_embeddings
-        assert os.path.isfile(self.path_of_embeddings), '!!! Wrong path for CLIP embeddings'
-        self.instance_embeddings = pd.read_csv(path_of_embeddings, index_col=0)
-        self.input_size = self.instance_embeddings.shape[1]
+        if self.path_of_embeddings:
+            assert os.path.isfile(self.path_of_embeddings), '!!! Wrong path for CLIP embeddings'
+            self.instance_embeddings = pd.read_csv(path_of_embeddings, index_col=0)
+            self.input_size = self.instance_embeddings.shape[1]
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.length_predictor = self.get_length_predictor()
 
     def get_length_predictor(self):
         def load_model(predictor_name, load_pretrained):
-            if predictor_name is None:
+            if predictor_name is None or not self.path_of_embeddings:
                 return []
             if predictor_name == 'SetTransformer':
                 model = LengthLearner_SetTransformer(self.input_size, self.output_size, proj_dim=256, num_heads=4,
@@ -724,7 +725,7 @@ def fit(self, *args, **kwargs):
         else:
             self._max_runtime = self.max_runtime
 
-        if (self.pretrained_predictor_name is not None) and (self.length_predictor is not None):
+        if (self.pretrained_predictor_name is not None) and self.length_predictor[0] != []:
             x_pos, x_neg = self.pos_neg_to_tensor(list(self._learning_problem.kb_pos)[:self.num_examples],
                                                   list(self._learning_problem.kb_neg)[:self.num_examples])
             max_length = self.predict_length(self.length_predictor, x_pos, x_neg)
@@ -870,7 +871,7 @@ def get_synthesizer(self, path=None):
                 self.vocab = vocab
                 self.inv_vocab = inv_vocab
             except Exception as e:
-                print(e+'\n')
+                print(e,'\n')
                 raise FileNotFoundError(f"{path} does not contain at least one of `vocab.json, inv_vocab.npy or embedding_config.json`")
         elif self.load_pretrained and self.path_of_trained_models and glob.glob(self.path_of_trained_models + "/*.pt"):
             # Read pretrained model's vocabulary and config files

diff --git a/ontolearn/nces_trainer.py b/ontolearn/nces_trainer.py
@@ -28,8 +28,7 @@
 import torch
 from torch.utils.data import DataLoader
 from tqdm import trange
-from collections import defaultdict
-import os, random
+import os
 import json
 from torch.optim.lr_scheduler import ExponentialLR
 from torch.nn import functional as F
@@ -161,8 +160,6 @@ def map_to_token(self, idx_array):
 
 
     def train_step(self, batch, model, emb_model, optimizer, device, triples_dataloader=None):
-        soft_acc, hard_acc = [], []
-        train_losses = []
         if emb_model:
             try:
                 triples_batch = next(triples_dataloader)

diff --git a/tests/test_clip.py b/tests/test_clip.py
@@ -2,11 +2,6 @@
 from ontolearn.refinement_operators import ExpressRefinement
 from ontolearn.knowledge_base import KnowledgeBase
 from owlapy.parser import DLSyntaxParser
-import sys
-from ontolearn.metrics import F1
-import time
-import random
-import unittest
 import os
 import warnings
 warnings.filterwarnings("ignore")

diff --git a/tests/test_clip_trainer.py b/tests/test_clip_trainer.py
@@ -7,10 +7,9 @@
 import os
 import json
 import random
-import unittest
 import warnings
 warnings.filterwarnings("ignore")
-import os
+
 def seed_everything():
     seed = 42
     os.environ['PYTHONHASHSEED'] = str(seed)

diff --git a/tests/test_nces.py b/tests/test_nces.py
@@ -1,9 +1,7 @@
 from ontolearn.concept_learner import NCES
 from ontolearn.knowledge_base import KnowledgeBase
 from owlapy.parser import DLSyntaxParser
-from ontolearn.metrics import F1
 from ontolearn.learning_problem import PosNegLPStandard
-import subprocess
 import random
 import unittest
 import os

diff --git a/tests/test_nces2.py b/tests/test_nces2.py
@@ -1,9 +1,7 @@
 from ontolearn.concept_learner import NCES2
 from ontolearn.knowledge_base import KnowledgeBase
 from owlapy.parser import DLSyntaxParser
-from ontolearn.metrics import F1
 from ontolearn.learning_problem import PosNegLPStandard
-import subprocess
 import random
 import unittest
 import os

diff --git a/tests/test_nces_trainer.py b/tests/test_nces_trainer.py
@@ -1,9 +1,7 @@
 from ontolearn.concept_learner import NCES
-import time
 import random
 import unittest
 import os
-import json
 import numpy as np
 import torch
 import pathlib

diff --git a/tests/test_roces.py b/tests/test_roces.py
@@ -1,9 +1,7 @@
 from ontolearn.concept_learner import ROCES
 from ontolearn.knowledge_base import KnowledgeBase
 from owlapy.parser import DLSyntaxParser
-from ontolearn.metrics import F1
 from ontolearn.learning_problem import PosNegLPStandard
-import subprocess
 import random
 import unittest
 import os