defined test data

epigen · Oct 21, 2019 · c2515b0 · c2515b0
1 parent e280eaf
commit c2515b0
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 6 deletions.
diff --git a/Analysis_Scripts/Analysis_CollectOutputs.R b/Analysis_Scripts/Analysis_CollectOutputs.R
@@ -13,9 +13,9 @@ message(paste("Usage: ", usage))
 opt_parser = OptionParser(usage = usage);
 args <- parse_args(opt_parser, positional_arguments = 2)
 input.folder <- args$args[1]
-# input.folder <- "~/projects_shared/pathway_learning/results_analysis/46_03_TCR_opt/"
+# input.folder <- "opt/"
 output.folder <- args$args[2]
-# output.folder <- "~/projects_shared/pathway_learning/results_analysis/46_03_TCR_opt/"
+# output.folder <- "."
 if(!dir.exists(input.folder)) message("Missing folder of trained KPNNs :", input.folder, " please provide an existing folder")
 print(paste("Processing trained KPNNs from folder:", input.folder))
 dir.create(output.folder,recursive = TRUE)

diff --git a/KPNN_Function.py b/KPNN_Function.py
@@ -357,10 +357,11 @@ def indexInList(l2, l1):
 
     # Go through each group, identify indices for this group
     for idx, x in enumerate(test_groups):
-        if not idx in train_idx: # if the test set is defined, do not include those barcodes in the further steps
+        if not idx in test_idx: # if the test set is defined, do not include those barcodes in the further steps
             test_groups_list[x].append(idx)
 
     # Split indices into test and train set
+    print("OUTPUTS:")
     print(outputs)
     test_idx = [] if not test_def else test_idx # if the test_idx is already defined then we keep this
     val_idx = []
@@ -384,12 +385,12 @@ def indexInList(l2, l1):
         train_idx = train_idx + train_idx_x
 
 
-print(test_idx[1:5])
 # Final assertions for the split
 assert len(test_idx) + len(val_idx) + len(train_idx) == len(barcodes), "Error assigning test, training, and validation set"
 assert len(set(test_idx) & set(val_idx)) == 0, "Error assigning test, training, and validation set"
 assert len(set(test_idx) & set(train_idx)) == 0, "Error assigning test, training, and validation set"
 assert len(set(train_idx) & set(val_idx)) == 0, "Error assigning test, training, and validation set"
+# print("Final: " + "test: " + str(len(test_idx)) + " val: " + str(len(val_idx)) + " train: " + str(len(train_idx)) + " of:  " + str(len(barcodes)))
 
 # assign test and training set
 y_train = fullY[:,train_idx]
@@ -400,8 +401,6 @@ def indexInList(l2, l1):
 x_val = fullData[:,val_idx]
 x_test = fullData[:,test_idx]
 
-print(test_idx[1:5])
-
 # print result of draws)
 print("Training Ys \t(total " + str(y_train.shape[1]) + "): \t" + "(== 1) - ".join(outputs) + "(== 1): \t" + " - ".join([str(y_train[i,:].sum()) for i in range(y_train.shape[0])]))
 print("Validation Ys \t(total " + str(y_test.shape[1]) + "): \t" + "(== 1) - ".join(outputs) + "(== 1): \t" + " - ".join([str(y_test[i,:].sum()) for i in range(y_test.shape[0])]))