Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds automatic adapt parameter selection via median error rate #260

Merged
merged 4 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 30 additions & 9 deletions freyja/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,15 @@ def print_barcode_version(ctx, param, value):
help='Pathogen of interest.' +
'Not used if using --barcodes option.',
show_default=True)
@click.option('--autoadapt', default=False,
is_flag=True,
help='use error profile to set adapt',
show_default=True)
def demix(variants, depths, output, eps, barcodes, meta,
covcut, confirmedonly, depthcutoff, lineageyml,
adapt, a_eps, region_of_interest,
relaxedmrca, relaxedthresh, solver, pathogen):
relaxedmrca, relaxedthresh, solver, pathogen,
autoadapt):
"""
Generate relative lineage abundances from VARIANTS and DEPTHS
"""
Expand All @@ -110,9 +115,9 @@ def demix(variants, depths, output, eps, barcodes, meta,
# drop intra-lineage diversity naming (keeps separate barcodes)
indexSimplified = [dfi.split('_')[0] for dfi in df_barcodes.index]
df_barcodes = df_barcodes.loc[indexSimplified, :]
df_depth = pd.read_csv(depths, sep='\t', header=None, index_col=1)

if depthcutoff != 0:
df_depth = pd.read_csv(depths, sep='\t', header=None, index_col=1)
df_barcodes = collapse_barcodes(df_barcodes, df_depth, depthcutoff,
lineageyml, locDir, output,
relaxedmrca, relaxedthresh,
Expand All @@ -121,11 +126,20 @@ def demix(variants, depths, output, eps, barcodes, meta,
mapDict = buildLineageMap(meta)
print('building mix/depth matrices')
# assemble data from (possibly) mixed samples
mix, depths_, cov = build_mix_and_depth_arrays(variants, depths, muts,
covcut)
print('demixing')
if autoadapt:
mix, depths_, cov, adapt = build_mix_and_depth_arrays(variants,
depths,
muts,
covcut,
autoadapt)
else:
mix, depths_, cov, _ = build_mix_and_depth_arrays(variants,
depths,
muts,
covcut,
autoadapt)
df_barcodes, mix, depths_ = reindex_dfs(df_barcodes, mix, depths_)

print('demixing')
try:
sample_strains, abundances, error = solve_demixing_problem(df_barcodes,
mix,
Expand Down Expand Up @@ -507,10 +521,14 @@ def variants(bamfile, ref, variants, depths, refname, minq, annot, varthresh):
help='Pathogen of interest.' +
'Not used if using --barcodes option.',
show_default=True)
@click.option('--autoadapt', default=False,
is_flag=True,
help='use error profile to set adapt',
show_default=True)
def boot(variants, depths, output_base, eps, barcodes, meta,
nb, nt, boxplot, confirmedonly, lineageyml, depthcutoff,
rawboots, relaxedmrca, relaxedthresh, bootseed,
solver, pathogen):
solver, pathogen, autoadapt):
"""
Perform bootstrapping method for freyja using VARIANTS and DEPTHS
"""
Expand Down Expand Up @@ -546,8 +564,11 @@ def boot(variants, depths, output_base, eps, barcodes, meta,
print('building mix/depth matrices')
# assemble data from (possibly) mixed samples
covcut = 10 # set value, coverage estimate not returned to user from boot
mix, depths_, cov = build_mix_and_depth_arrays(variants, depths, muts,
covcut)
mix, depths_, cov, adapt = build_mix_and_depth_arrays(variants,
depths,
muts,
covcut,
autoadapt)
print('demixing')
df_barcodes, mix, depths_ = reindex_dfs(df_barcodes, mix, depths_)
lin_df, constell_df = perform_bootstrap(df_barcodes, mix, depths_,
Expand Down
22 changes: 19 additions & 3 deletions freyja/sample_deconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,31 @@ def buildLineageMap(locDir):
return mapDict


def build_mix_and_depth_arrays(fn, depthFn, muts, covcut):
def get_error_rate(muts, df0, df_depths0):
from freyja.convert_paths2barcodes import sortFun
sites = set([sortFun(m) for m in muts])
df_depths0 = df_depths0[~df_depths0.index.isin(sites)]
df0 = df0[~df0['POS'].isin(sites)]
df0 = df0[df0['ALT_FREQ'] > 0]
error_rate = df0['ALT_FREQ'].median()
return error_rate


def build_mix_and_depth_arrays(fn, depthFn, muts, covcut, autoadapt):
input_is_vcf = fn.lower().endswith('vcf')
if input_is_vcf:
df = read_snv_frequencies_vcf(fn, depthFn, muts)
else:
df = read_snv_frequencies_ivar(fn, depthFn, muts)

df = df[['REF', 'POS', 'ALT', 'ALT_FREQ', 'ALT_DP']]
# only works for substitutions, but that's what we get from usher tree
df_depth = pd.read_csv(depthFn, sep='\t', header=None, index_col=1)
df_depth = pd.read_csv(depthFn, sep='\t', header=None, index_col=1)[[3]]

if autoadapt:
adapt = get_error_rate(muts, df, df_depth)
else:
adapt = 0

df['mutName'] = df['REF'] + df['POS'].astype(str) + df['ALT']
df = df.drop_duplicates(subset='mutName')
Expand All @@ -77,7 +93,7 @@ def build_mix_and_depth_arrays(fn, depthFn, muts, covcut):
depths = pd.Series({kI: df_depth.loc[int(re.findall(r'\d+', kI)[0]), 3]
.astype(float) for kI in muts}, name=fn)
coverage = 100.*np.sum(df_depth.loc[:, 3] >= covcut)/df_depth.shape[0]
return mix, depths, coverage
return mix, depths, coverage, adapt


def read_snv_frequencies_ivar(fn, depthFn, muts):
Expand Down
6 changes: 4 additions & 2 deletions freyja/tests/test_deconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ def test_build_mix_and_depth_plus_reindex(self):
varFn = 'freyja/data/mixture.tsv'
depthFn = 'freyja/data/mixture.depth'
covcut = 10
mix, depths, cov = build_mix_and_depth_arrays(varFn, depthFn, muts,
covcut)
autoadapt = False
mix, depths, cov, adapt = build_mix_and_depth_arrays(varFn, depthFn,
muts, covcut,
autoadapt)
# just making sure the files we've read in are ready for use
self.assertTrue(ptypes.is_float_dtype(mix))
self.assertTrue(ptypes.is_float_dtype(depths))
Expand Down
Loading