Skip to content

Commit

Permalink
Another fix for #51: do not read 'NA' splice junctions as 'NaN' values!
Browse files Browse the repository at this point in the history
  • Loading branch information
lucventurini committed Feb 3, 2020
1 parent 9f86ebe commit ebe42b8
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions scripts/portcullis/portcullis/rule_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
from performance import Performance


na_vals = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null']


def replace_op(op):
if op == "eq":
return "=="
Expand Down Expand Up @@ -131,7 +134,7 @@ def calcPerformance(passed, failed, invert=False):
def create_training_sets(args):
# Load portcullis junctions into dataframe
print("Loading input junctions ... ", end="", flush=True)
original = pd.read_csv(args.input, sep='\t', header=0, index_col=0)
original = pd.read_csv(args.input, sep='\t', header=0, index_col=0, na_values=na_vals)
fieldnames = [key for key in dict(original.dtypes)]
print("done.", len(original), "junctions loaded.")

Expand Down Expand Up @@ -335,7 +338,7 @@ def filter_one(args):
# Load portcullis junctions into dataframe
if args.verbose:
print("Loading input junctions ... ", end="", flush=True)
original = pd.read_csv(args.input, sep='\t', header=0, index_col=0)
original = pd.read_csv(args.input, sep='\t', header=0, index_col=0, na_values=na_vals)
if args.verbose:
print("done.")

Expand Down

0 comments on commit ebe42b8

Please sign in to comment.