From 79a716a018f14dfc4ee2b0b231efe398e4d2d924 Mon Sep 17 00:00:00 2001
From: fuentesazahara <afuentri@alumni.uv.es>
Date: Fri, 18 Aug 2023 13:14:11 +0200
Subject: [PATCH 1/2] fixed possible bug on calc_splizvd.py line 257

I am running this pipeline from the sicilian_called_splice_juncs.tsv file which the sicilian pipeline delivers (https://github.com/salzman-lab/SICILIAN). When I do not run it with the "light" mode, and since I have the column "missing_domains" present in the input table, it raises an error on line 261 as there is no column named posStart_group or posEnd_group, they have not been created before in the code of this script. However, there are two columns called posA_group and posB_group created upstream in this script using the start and end junction positions. I do not know if I am right with this change but I think that is possible that is pointing to this columns that already exist.
---
 bin/calc_splizvd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/calc_splizvd.py b/bin/calc_splizvd.py
index bd0be1f..3a7945f 100755
--- a/bin/calc_splizvd.py
+++ b/bin/calc_splizvd.py
@@ -254,7 +254,7 @@ def main():
   df["max_rank_don"] = df["posB_group"].map(df.groupby("posB_group")["rank_don"].max())
 
   # add domain columns
-  letters = ["Start", "End"]
+  letters = ["A", "B"]
   for let in letters:
 
     if domain_breakdown:

From 2925e9832e3398c35834aa2aaecc99958313e11f Mon Sep 17 00:00:00 2001
From: Azahara Maria Fuentes Trillo <azahara.fuentes@hnode02.cm.cluster>
Date: Mon, 21 Aug 2023 14:41:39 +0200
Subject: [PATCH 2/2] made changes more general

---
 bin/calc_splizvd.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/bin/calc_splizvd.py b/bin/calc_splizvd.py
index 3a7945f..efa19c6 100755
--- a/bin/calc_splizvd.py
+++ b/bin/calc_splizvd.py
@@ -231,17 +231,17 @@ def main():
     split_dict = {True : ["ann"], False : ["unann"]}
 
   # remove constitutive splicing
-  df["posA_group"] = df["juncStart"].astype(str) + df["gene"]
-  df["posB_group"] = df["juncEnd"].astype(str) + df["gene"]
+  df["posStart_group"] = df["juncStart"].astype(str) + df["gene"]
+  df["posEnd_group"] = df["juncEnd"].astype(str) + df["gene"]
 
-  df["rank_acc"] = df.groupby("posA_group")["juncEnd"].rank(method="dense")
-  df["rank_don"] = df.groupby("posB_group")["juncStart"].rank(method="dense")
+  df["rank_acc"] = df.groupby("posStart_group")["juncEnd"].rank(method="dense")
+  df["rank_don"] = df.groupby("posEnd_group")["juncStart"].rank(method="dense")
   # remove "almost consistutive splicing"
   if args.rank_quant > 0:
-    let_dict2 = {"A" : "acc", "B" : "don"}
+    let_dict2 = {"Start" : "acc", "End" : "don"}
     
     # threshold ranks for each donor and acceptor
-    for let in ["A","B"]:
+    for let in ["Start","End"]:
       df["bottom_{}_quant".format(let_dict2[let])] = df["pos{}_group".format(let)].map(df.groupby("pos{}_group".format(let))["rank_{}".format(let_dict2[let])].quantile(q=args.rank_quant))
       df["top_{}_quant".format(let_dict2[let])] = df["pos{}_group".format(let)].map(df.groupby("pos{}_group".format(let))["rank_{}".format(let_dict2[let])].quantile(q=1 - args.rank_quant))
       df["rank_{}".format(let_dict2[let])] = df[["bottom_{}_quant".format(let_dict2[let]),"rank_{}".format(let_dict2[let])]].max(axis=1)
@@ -250,11 +250,11 @@ def main():
       # start ranks at 1 (in case 1 is removed by quantiling)
       df["rank_{}".format(let_dict2[let])] = df["rank_{}".format(let_dict2[let])] - df["bottom_{}_quant".format(let_dict2[let])] + 1
 
-  df["max_rank_acc"] = df["posA_group"].map(df.groupby("posA_group")["rank_acc"].max())
-  df["max_rank_don"] = df["posB_group"].map(df.groupby("posB_group")["rank_don"].max())
+  df["max_rank_acc"] = df["posStart_group"].map(df.groupby("posStart_group")["rank_acc"].max())
+  df["max_rank_don"] = df["posEnd_group"].map(df.groupby("posEnd_group")["rank_don"].max())
 
   # add domain columns
-  letters = ["A", "B"]
+  letters = ["Start", "End"]
   for let in letters:
 
     if domain_breakdown: