-
Notifications
You must be signed in to change notification settings - Fork 69
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1658 from veg/develop
2.5.55
- Loading branch information
Showing
34 changed files
with
1,833 additions
and
729 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,11 +32,13 @@ Version 2.2 changes the grid search to LHC, and adds an initial search phase to | |
for branch-site variation in synonymous substitution rates. Version 3.1 adds HMM auto-correlation option for SRV, and binds SRV distributions for multiple branch sets. | ||
Version 4.0 adds support for multiple hits (MH), ancestral state reconstruction saved to JSON, and profiling of branch-site level support for selection / multiple hits. | ||
Version 4.2 adds calculation of MH-attributable fractions of substitutions. | ||
Version 4.5 adds an "error absorption" component [experimental] | ||
", | ||
terms.io.version : "4.2", | ||
terms.io.version : "4.5", | ||
terms.io.reference : "*Gene-wide identification of episodic selection*, Mol Biol Evol. 32(5):1365-71, *Synonymous Site-to-Site Substitution Rate Variation Dramatically Inflates False Positive Rates of Selection Analyses: Ignore at Your Own Peril*, Mol Biol Evol. 37(8):2430-2439", | ||
terms.io.authors : "Sergei L Kosakovsky Pond", | ||
terms.io.contact : "[email protected]", | ||
terms.settings: {}, | ||
terms.io.requirements : "in-frame codon alignment and a phylogenetic tree (optionally annotated with {})" | ||
}; | ||
|
||
|
@@ -128,10 +130,12 @@ busted.do_srv = io.SelectAnOption ({"Yes" : "Allow synonymous substitution rates | |
"Synonymous rate variation" | ||
); | ||
|
||
selection.io.json_store_setting (busted.json, "srv", busted.do_srv); | ||
|
||
|
||
if (busted.do_srv == "Branch-site") { | ||
busted.do_bs_srv = TRUE; | ||
busted.do_srv = TRUE; | ||
(busted.json[terms.json.analysis])[terms.settings] = "Branch-site synonymous rate variation"; | ||
} else { | ||
if (busted.do_srv == "Yes") { | ||
busted.do_bs_srv = FALSE; | ||
|
@@ -160,11 +164,27 @@ busted.multi_hit = io.SelectAnOption ({ | |
{"None", "[Default] Use standard models which permit only single nucleotide changes to occur instantly"} | ||
}, "Include support for multiple nucleotide substitutions"); | ||
|
||
|
||
selection.io.json_store_setting (busted.json, "multiple-hit", busted.multi_hit); | ||
|
||
if (busted.do_srv) { | ||
KeywordArgument ("syn-rates", "The number alpha rate classes to include in the model [1-10, default 3]", busted.synonymous_rate_classes); | ||
busted.synonymous_rate_classes = io.PromptUser ("The number alpha rate classes to include in the model [1-10, default 3]", busted.synonymous_rate_classes, 1, 10, TRUE); | ||
} | ||
|
||
KeywordArgument ("error-sink", "[Advanced experimental setting] Include a rate class to capture misalignment artifacts", "No"); | ||
|
||
busted.error_sink = io.SelectAnOption ({ | ||
{"No", "Standard dN/dS model (all rates are used for inference)"} | ||
{"Yes", "The dN/dS model has an additional class (high dN/dS, low proportion), which 'absorbs' alignment errors"} | ||
}, "[Advanced experimental setting] Include a rate class to capture misalignment artifacts") == "Yes"; | ||
|
||
selection.io.json_store_setting (busted.json, "error-sink", busted.error_sink); | ||
|
||
if (busted.error_sink) { | ||
busted.rate_classes += 1; | ||
} | ||
|
||
KeywordArgument ("grid-size", "The number of points in the initial distributional guess for likelihood fitting", 250); | ||
busted.initial_grid.N = io.PromptUser ("The number of points in the initial distributional guess for likelihood fitting", 250, 1, 10000, TRUE); | ||
KeywordArgument ("starting-points", "The number of initial random guesses to seed rate values optimization", 1); | ||
|
@@ -302,6 +322,7 @@ if (busted.do_srv) { | |
busted.model_generator = "models.codon.BS_REL_SRV.ModelDescription"; | ||
busted.rate_class_arguments = {{busted.synonymous_rate_classes__,busted.rate_classes__}}; | ||
assert (busted.multi_hit == "None", "Multiple hit and branch site SRV combination is currently not supported"); | ||
assert (busted.error_sink == FALSE, "Error sink and branch site SRV combination is currently not supported"); | ||
} else { | ||
|
||
lfunction busted.model.with.GDD (type, code, rates) { | ||
|
@@ -432,6 +453,12 @@ for (busted.i = 1; busted.i < busted.rate_classes; busted.i += 1) { | |
parameters.SetRange (model.generic.GetGlobalParameter (busted.background.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,busted.i)), terms.range01); | ||
} | ||
|
||
if (busted.error_sink) { | ||
parameters.SetRange (model.generic.GetGlobalParameter (busted.test.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,1)), terms.range_high); | ||
parameters.SetRange (model.generic.GetGlobalParameter (busted.background.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,1)), terms.range_high); | ||
parameters.SetRange (model.generic.GetGlobalParameter (busted.test.bsrel_model , terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), 1)),terms.range_small_fraction); | ||
parameters.SetRange (model.generic.GetGlobalParameter (busted.background.bsrel_model , terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), 1)),terms.range_small_fraction); | ||
} | ||
|
||
parameters.SetRange (model.generic.GetGlobalParameter (busted.test.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,busted.rate_classes)), terms.range_gte1); | ||
|
||
|
@@ -647,15 +674,16 @@ io.ReportProgressMessageMD("BUSTED", "main", "* For *test* branches, the followi | |
|
||
selection.io.stopTimer (busted.json [terms.json.timers], "Unconstrained BUSTED model fitting"); | ||
|
||
busted.inferred_test_distribution = parameters.GetStickBreakingDistribution (busted.distribution) % 0; | ||
busted.inferred_test_distribution_raw = parameters.GetStickBreakingDistribution (busted.distribution); | ||
busted.inferred_test_distribution = busted.inferred_test_distribution_raw % 0; | ||
|
||
busted.has_selection_support = busted.inferred_test_distribution[Rows(busted.inferred_test_distribution)-1][-1]; | ||
busted.has_selection_support = busted.inferred_test_distribution_raw[Rows(busted.inferred_test_distribution_raw)-1][-1]; | ||
busted.has_selection_support = busted.has_selection_support[0] * busted.has_selection_support[1] > 1e-8; | ||
|
||
busted.distribution_for_json = {busted.FG : utility.Map (utility.Range (busted.rate_classes, 0, 1), | ||
"_index_", | ||
"{terms.json.omega_ratio : busted.inferred_test_distribution [_index_][0], | ||
terms.json.proportion : busted.inferred_test_distribution [_index_][1]}") | ||
"{terms.json.omega_ratio : busted.inferred_test_distribution_raw [_index_][0], | ||
terms.json.proportion : busted.inferred_test_distribution_raw [_index_][1]}") | ||
}; | ||
|
||
|
||
|
@@ -670,16 +698,26 @@ busted.EFV_ids = estimators.LFObjectGetEFV (busted.full_model[terms.likelihood_f | |
|
||
busted.report_multi_hit (busted.full_model, busted.distribution_for_json, "MultiHit", "alt-mh", busted.branch_length_string, busted.model_parameters); | ||
|
||
selection.io.report_dnds (busted.inferred_test_distribution); | ||
if (busted.error_sink) { | ||
selection.io.report_dnds_with_sink (busted.inferred_test_distribution, busted.inferred_test_distribution_raw[0][0], busted.inferred_test_distribution_raw[0][1]); | ||
} else { | ||
selection.io.report_dnds (busted.inferred_test_distribution); | ||
} | ||
|
||
if (busted.has_background) { | ||
io.ReportProgressMessageMD("BUSTED", "main", "* For *background* branches, the following rate distribution for branch-site combinations was inferred"); | ||
busted.inferred_background_distribution = parameters.GetStickBreakingDistribution (busted.background_distribution) % 0; | ||
selection.io.report_dnds (busted.inferred_background_distribution); | ||
busted.inferred_background_distribution_raw = parameters.GetStickBreakingDistribution (busted.background_distribution); | ||
busted.inferred_background_distribution = busted.inferred_background_distribution_raw % 0; | ||
if (busted.error_sink) { | ||
selection.io.report_dnds_with_sink (busted.inferred_background_distribution, busted.inferred_background_distribution_raw[0][0], busted.inferred_background_distribution_raw[0][1]); | ||
} else { | ||
selection.io.report_dnds (busted.inferred_background_distribution); | ||
} | ||
|
||
busted.distribution_for_json [busted.BG] = utility.Map (utility.Range (busted.rate_classes, 0, 1), | ||
"_index_", | ||
"{terms.json.omega_ratio : busted.inferred_background_distribution [_index_][0], | ||
terms.json.proportion : busted.inferred_background_distribution [_index_][1]}"); | ||
"{terms.json.omega_ratio : busted.inferred_background_distribution_raw [_index_][0], | ||
terms.json.proportion : busted.inferred_background_distribution_raw [_index_][1]}"); | ||
} | ||
|
||
if (busted.do_srv) { | ||
|
@@ -731,7 +769,7 @@ if (busted.do_srv) { | |
|
||
busted.stashLF = estimators.TakeLFStateSnapshot (busted.full_model[terms.likelihood_function]); | ||
|
||
if (busted.has_selection_support) { | ||
if (busted.has_selection_support || busted.error_sink) { | ||
|
||
utility.ToggleEnvVariable ("KEEP_OPTIMAL_ORDER", TRUE); | ||
|
||
|
@@ -999,7 +1037,11 @@ selection.io.json_store_lf (busted.json, | |
busted.display_orders[busted.unconstrained]); | ||
|
||
|
||
busted.run_test = busted.inferred_test_distribution [busted.rate_classes-1][0] > 1 && busted.inferred_test_distribution [busted.rate_classes-1][1] > 0; | ||
if (busted.error_sink) { | ||
busted.run_test = busted.inferred_test_distribution_raw [busted.rate_classes-1][0] > 1 && busted.inferred_test_distribution_raw [busted.rate_classes-1][1] > 0; | ||
} else { | ||
busted.run_test = busted.inferred_test_distribution [busted.rate_classes-1][0] > 1 && busted.inferred_test_distribution [busted.rate_classes-1][1] > 0; | ||
} | ||
|
||
for (_key_, _value_; in; busted.filter_specification) { | ||
selection.io.json_store_branch_attribute(busted.json, busted.unconstrained, terms.branch_length, 0, | ||
|
@@ -1048,23 +1090,31 @@ if (!busted.run_test) { | |
selection.io.extract_branch_info((busted.null_results[terms.branch_length])[_key_], "selection.io.branch.length"));'); | ||
|
||
io.ReportProgressMessageMD("BUSTED", "test", "* For *test* branches under the null (no dN/dS > 1 model), the following rate distribution for branch-site combinations was inferred"); | ||
busted.inferred_test_distribution = parameters.GetStickBreakingDistribution (busted.distribution) % 0; | ||
busted.inferred_test_distribution_raw = parameters.GetStickBreakingDistribution (busted.distribution); | ||
busted.inferred_test_distribution = busted.inferred_test_distribution_raw % 0; | ||
|
||
busted.distribution_for_json = {busted.FG : utility.Map (utility.Range (busted.rate_classes, 0, 1), | ||
"_index_", | ||
"{terms.json.omega_ratio : busted.inferred_test_distribution [_index_][0], | ||
terms.json.proportion : busted.inferred_test_distribution [_index_][1]}")}; | ||
"{terms.json.omega_ratio : busted.inferred_test_distribution_raw [_index_][0], | ||
terms.json.proportion : busted.inferred_test_distribution_raw [_index_][1]}")}; | ||
|
||
busted.report_multi_hit (busted.null_results, busted.distribution_for_json, "MultiHit", "null-mh",busted.branch_length_string, busted.model_parameters); | ||
selection.io.report_dnds (parameters.GetStickBreakingDistribution (busted.distribution) % 0); | ||
busted.null_distro_raw = parameters.GetStickBreakingDistribution (busted.distribution); | ||
|
||
if (busted.error_sink) { | ||
selection.io.report_dnds_with_sink (busted.null_distro_raw % 0, busted.null_distro_raw[0][0], busted.null_distro_raw[0][1]); | ||
} else { | ||
selection.io.report_dnds (busted.null_distro_raw % 0); | ||
} | ||
|
||
|
||
if (busted.has_background) { | ||
busted.inferred_background_distribution = parameters.GetStickBreakingDistribution (busted.background_distribution) % 0; | ||
busted.inferred_background_distribution_raw = parameters.GetStickBreakingDistribution (busted.background_distribution); | ||
//selection.io.report_dnds (busted.inferred_background_distribution); | ||
busted.distribution_for_json [busted.BG] = utility.Map (utility.Range (busted.rate_classes, 0, 1), | ||
"_index_", | ||
"{terms.json.omega_ratio : busted.inferred_background_distribution [_index_][0], | ||
terms.json.proportion : busted.inferred_background_distribution [_index_][1]}"); | ||
"{terms.json.omega_ratio : busted.inferred_background_distribution_raw [_index_][0], | ||
terms.json.proportion : busted.inferred_background_distribution_raw [_index_][1]}"); | ||
} | ||
|
||
if (busted.do_srv) { | ||
|
@@ -1252,10 +1302,18 @@ function busted.init_grid_setup (omega_distro) { | |
} | ||
}["_MATRIX_ELEMENT_VALUE_^(busted.rate_classes-_index_[0]-1)"]; | ||
busted.initial_grid_presets [_name_] = 0; | ||
busted.initial_ranges [_name_] = { | ||
terms.lower_bound : 0, | ||
terms.upper_bound : 1 | ||
}; | ||
|
||
if (busted.error_sink && _index_[0] == 0) { | ||
busted.initial_ranges [_name_] = { | ||
terms.lower_bound : 100, | ||
terms.upper_bound : 10000 | ||
}; | ||
} else { | ||
busted.initial_ranges [_name_] = { | ||
terms.lower_bound : 0, | ||
terms.upper_bound : 1 | ||
}; | ||
} | ||
} else { | ||
busted.initial_grid [_name_] = { | ||
{ | ||
|
@@ -1280,10 +1338,17 @@ function busted.init_grid_setup (omega_distro) { | |
} | ||
}; | ||
busted.initial_grid_presets [_name_] = 3; | ||
busted.initial_ranges [_name_] = { | ||
terms.lower_bound : 0, | ||
terms.upper_bound : 1 | ||
}; | ||
if (busted.error_sink && _index_[0] == 0) { | ||
busted.initial_ranges [_name_] = { | ||
terms.lower_bound : 0, | ||
terms.upper_bound : 0.01, | ||
}; | ||
} else { | ||
busted.initial_ranges [_name_] = { | ||
terms.lower_bound : 0, | ||
terms.upper_bound : 1 | ||
}; | ||
} | ||
' | ||
); | ||
|
||
|
Oops, something went wrong.