From 982b18651e021ce622952bf9947602d749dd47da Mon Sep 17 00:00:00 2001 From: Sergei Date: Thu, 26 Aug 2021 17:08:20 -0400 Subject: [PATCH 1/6] Add stop codon tolerance option for coding data --- res/TemplateBatchFiles/libv3/all-terms.bf | 1 + .../libv3/tasks/alignments.bf | 55 +++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index a295a0b9b..ccfd485ea 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -30,6 +30,7 @@ namespace terms{ data_type = "datatype"; devnull = "/dev/null"; _namespace = "namespace"; + warning = "warning"; category = "category"; mixture = "mixture"; diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 608fd918a..364ba2533 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -335,6 +335,17 @@ function alignments.PromptForGeneticCodeAndAlignment(dataset_name, datafilter_na return alignments.LoadCodonDataFile(dataset_name, datafilter_name, alignments.ReadCodonDataSet(dataset_name)); } +/** + * Prompts user for genetic code and alignment; do not create error condition on stop codons + * @name alignments.PromptForGeneticCodeAndAlignmentWarnStops + * @param {String} dataset_name - the name of the dataset you wish to use + * @param {String} datafilter_name - the name of the dataset filter you wish to use + * @returns {Dictionary} r - metadata pertaining to the dataset + */ +function alignments.PromptForGeneticCodeAndAlignmentWarnStops (dataset_name, datafilter_name) { + return alignments.LoadCodonDataFileWarnStops(dataset_name, datafilter_name, alignments.ReadCodonDataSet(dataset_name)); +} + /** * Loads genetic code and alignment from file path * @name alignments.LoadGeneticCodeAndAlignment @@ -393,6 +404,50 @@ function alignments.LoadCodonDataFile(dataset_name, datafilter_name, data_info) return data_info; } +/** + * Creates codon dataset filter from data set; do not treat stop codons as an error condition + * @name alignments.LoadCodonDataFileWarnStops + * @param {String} datafilter_name - the name of the dataset filter you wish to use + * @param {String} dataset_name - the name of the existing dataset + * @param {Dictionary} data_info - DataSet metadata information + * @returns {Dictionary} updated data_info that includes the number of sites, dataset, and datafilter name + */ +function alignments.LoadCodonDataFileWarnStops(dataset_name, datafilter_name, data_info) { + + + DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , , data_info[terms.stop_codons]); + + if (^"`datafilter_name`.sites"*3 != ^"`dataset_name`.sites") { + // generate a more diagnostic error here + data_info[^"terms.warning"] = {}; + + for (alignments.LoadCodonDataFile.i = 0; alignments.LoadCodonDataFile.i < ^"`dataset_name`.species"; alignments.LoadCodonDataFile.i += 1) { + DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , "" + alignments.LoadCodonDataFile.i , data_info[terms.stop_codons]); + if (^"`datafilter_name`.sites"*3 != ^"`dataset_name`.sites") { + alignments.LoadCodonDataFile.name = alignments.GetIthSequenceOriginalName (dataset_name, alignments.LoadCodonDataFile.i); + + alignments.LoadCodonDataFile.site_map = ^"`datafilter_name`.site_map"; + + alignments.LoadCodonDataFile.annotation_string = utility.PopulateDict (0, ^"`dataset_name`.sites", + '(alignments.LoadCodonDataFile.name[terms.data.sequence])[_idx]', + '_idx'); + + + utility.ForEach (alignments.LoadCodonDataFile.site_map, "_value_", + ' + `&alignments.LoadCodonDataFile.annotation_string`[_value_] = `&alignments.LoadCodonDataFile.annotation_string`[_value_] && 0; + '); + + (data_info[^"terms.warning"])[alignments.LoadCodonDataFile.name[terms.id]] = Join ("",alignments.LoadCodonDataFile.annotation_string); + } + } + } + data_info[terms.data.sites] = ^ "`datafilter_name`.sites"; + data_info[terms.data.dataset] = dataset_name; + data_info[terms.data.datafilter] = datafilter_name; + return data_info; +} + /** * Creates nucleotide dataset filter from file * @name alignments.ReadNucleotideAlignment From 44efdb264d33c03fbc928ac39b131754fafc7dcd Mon Sep 17 00:00:00 2001 From: Sergei Date: Thu, 26 Aug 2021 17:17:50 -0400 Subject: [PATCH 2/6] Add stop codon tolerance option for coding data --- res/TemplateBatchFiles/libv3/tasks/alignments.bf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 364ba2533..909b40216 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -422,7 +422,7 @@ function alignments.LoadCodonDataFileWarnStops(dataset_name, datafilter_name, da data_info[^"terms.warning"] = {}; for (alignments.LoadCodonDataFile.i = 0; alignments.LoadCodonDataFile.i < ^"`dataset_name`.species"; alignments.LoadCodonDataFile.i += 1) { - DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , "" + alignments.LoadCodonDataFile.i , data_info[terms.stop_codons]); + DataSetFilter ^datafilter_name = CreateFilter( ^ dataset_name, 3, , "" + alignments.LoadCodonDataFile.i , data_info[terms.stop_codons]); if (^"`datafilter_name`.sites"*3 != ^"`dataset_name`.sites") { alignments.LoadCodonDataFile.name = alignments.GetIthSequenceOriginalName (dataset_name, alignments.LoadCodonDataFile.i); @@ -441,6 +441,7 @@ function alignments.LoadCodonDataFileWarnStops(dataset_name, datafilter_name, da (data_info[^"terms.warning"])[alignments.LoadCodonDataFile.name[terms.id]] = Join ("",alignments.LoadCodonDataFile.annotation_string); } } + DataSetFilter ^ datafilter_name = CreateFilter( ^ dataset_name, 3, , , data_info[terms.stop_codons]); } data_info[terms.data.sites] = ^ "`datafilter_name`.sites"; data_info[terms.data.dataset] = dataset_name; From 347ebb6147ad6540e424ec53561dcdaad22727ab Mon Sep 17 00:00:00 2001 From: Sergei Date: Wed, 8 Sep 2021 19:30:54 -0400 Subject: [PATCH 3/6] FEL fixes --- res/TemplateBatchFiles/SelectionAnalyses/FEL.bf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf index 81a29ab4f..be9a6ab73 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf @@ -474,7 +474,7 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod is = fel.handle_a_site (lf, sims[i], partition_index, pattern_info, model_mapping, TRUE); null_LRT[i] = is; } - return {utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.Null"): Null, utility.getGlobalValue("terms.simulated"): null_LRT}; + return {utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.Null"): Null, utility.getGlobalValue("terms.simulated"): null_LRT, utility.getGlobalValue("terms.confidence_interval"): ci}; } } From 96b492e835c06975ec7afebc27ee53b8123cf7a7 Mon Sep 17 00:00:00 2001 From: Sergei Date: Fri, 10 Sep 2021 12:59:51 -0400 Subject: [PATCH 4/6] FEL --resample will store LRT distributions --- .../SelectionAnalyses/FEL.bf | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf index be9a6ab73..61c8a8c83 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf @@ -581,18 +581,21 @@ lfunction fel.store_results (node, result, arguments) { } }; } - + has_lrt = FALSE; if (None != result) { // not a constant site lrt = math.DoLRT ((result[utility.getGlobalValue("terms.Null")])[utility.getGlobalValue("terms.fit.log_likelihood")], (result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.fit.log_likelihood")], 1); - - if (result / ^"terms.simulated") { + + has_lrt = result / ^"terms.simulated"; + + if (has_lrt) { pv = +((result[^"terms.simulated"])["_MATRIX_ELEMENT_VALUE_>=" + lrt [utility.getGlobalValue("terms.LRT")]]); lrt [utility.getGlobalValue("terms.p_value")] = (pv+1)/(1+^"fel.resample"); } + result_row [0] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")], ^"fel.site_alpha"); result_row [1] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")], ^"fel.site_beta"); result_row [2] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.Null")], ^"fel.site_beta"); @@ -621,13 +624,18 @@ lfunction fel.store_results (node, result, arguments) { utility.EnsureKey (^"fel.site_results", partition_index); + if (has_lrt) { + utility.EnsureKey (^"fel.site_LRT", partition_index); + } + + for (_fel_result_; in; pattern_info[utility.getGlobalValue("terms.data.sites")]) { + ((^"fel.site_results")[partition_index])[_fel_result_] = result_row; + fel.report.echo (_fel_result_, partition_index, result_row); + if (has_lrt) { + ((^"fel.site_LRT")[partition_index])[_fel_result_] = result[^"terms.simulated"]; + } + } - utility.ForEach (pattern_info[utility.getGlobalValue("terms.data.sites")], "_fel_result_", - ' - (fel.site_results[`&partition_index`])[_fel_result_] = `&result_row`; - fel.report.echo (_fel_result_, `&partition_index`, `&result_row`); - ' - ); //assert (0); @@ -635,6 +643,7 @@ lfunction fel.store_results (node, result, arguments) { //---------------------------------------------------------------------------------------- fel.site_results = {}; +fel.site_LRT = {}; for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.partition_index += 1) { fel.report.header_done = FALSE; @@ -735,24 +744,23 @@ for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.par io.ClearProgressBar(); mpi.QueueComplete (fel.queue); + fel.partition_matrix = {Abs (fel.site_results[fel.partition_index]), Rows (fel.table_headers)}; - - utility.ForEachPair (fel.site_results[fel.partition_index], "_key_", "_value_", - ' + for (_key_, _value_; in; fel.site_results[fel.partition_index]) { for (fel.index = 0; fel.index < Rows (fel.table_headers); fel.index += 1) { fel.partition_matrix [0+_key_][fel.index] = _value_[fel.index]; } - ' - ); - + } fel.site_results[fel.partition_index] = fel.partition_matrix; - } fel.json [terms.json.MLE ] = {terms.json.headers : fel.table_headers, terms.json.content : fel.site_results }; +if (fel.resample) { + (fel.json [terms.json.MLE ])[terms.LRT] = fel.site_LRT; +} io.ReportProgressMessageMD ("fel", "results", "** Found _" + fel.report.counts[0] + "_ sites under pervasive positive diversifying and _" + fel.report.counts[1] + "_ sites under negative selection at p <= " + fel.pvalue + "**"); From ff6ed726da1759d96802adc9da1f4ae772915daf Mon Sep 17 00:00:00 2001 From: Sergei Date: Tue, 21 Sep 2021 16:53:41 -0400 Subject: [PATCH 5/6] FEL fix --- .../SelectionAnalyses/FEL.bf | 83 +++++++++++-------- res/TemplateBatchFiles/libv3/IOFunctions.bf | 2 +- 2 files changed, 51 insertions(+), 34 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf index 61c8a8c83..68a3dc748 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf @@ -134,7 +134,7 @@ if (^"fel.ci") { {"alpha", "Synonymous substitution rate at a site"} {"beta", "Non-synonymous substitution rate at a site"} {"alpha=beta", "The rate estimate under the neutral model"} - {"LRT", "Likelihood ration test statistic for beta = alpha, versus beta &neq; alpha"} + {"LRT", "Likelihood ratio test statistic for beta = alpha, versus beta &neq; alpha"} {"p-value", "Asymptotic p-value for evidence of selection, i.e. beta &neq; alpha"} {"Total branch length", "The total length of branches contributing to inference at this site, and used to scale dN-dS"} {"dN/dS LB", "95% profile likelihood CI lower bound for dN/dS (if available)"} @@ -148,7 +148,7 @@ if (^"fel.ci") { fel.table_headers = {{"alpha", "Synonymous substitution rate at a site"} {"beta", "Non-synonymous substitution rate at a site"} {"alpha=beta", "The rate estimate under the neutral model"} - {"LRT", "Likelihood ration test statistic for beta = alpha, versus beta &neq; alpha"} + {"LRT", "Likelihood ratio test statistic for beta = alpha, versus beta &neq; alpha"} {"p-value", "Asymptotic p-value for evidence of selection, i.e. beta &neq; alpha"} {"Total branch length", "The total length of branches contributing to inference at this site, and used to scale dN-dS"}}; fel.table_screen_output = {{"Codon", "Partition", "alpha", "beta", "LRT", "Selection detected?"}}; @@ -406,7 +406,6 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod ^"fel.beta_scaler_test" = 1; ^"fel.beta_scaler_nuisance" = 1; - Optimize (results, ^lf , { @@ -417,24 +416,27 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod ); - ci = None; if (^"fel.ci") { - if (^"fel.srv") { - lf_stash = estimators.TakeLFStateSnapshot (lf); - global omega_ratio_for_ci; - if (^"fel.alpha_scaler" == 0.0) { // has no syn_variation - ^"fel.alpha_scaler" = 1e-8; - } - omega_ratio_for_ci :< 10000; - omega_ratio_for_ci :>0; - omega_ratio_for_ci = ^"fel.beta_scaler_test" / ^"fel.alpha_scaler"; + if (!sim_mode) { + if (^"fel.srv") { + lf_stash = estimators.TakeLFStateSnapshot (lf); + global omega_ratio_for_ci; + if (^"fel.alpha_scaler" == 0.0) { // has no syn_variation + ^"fel.alpha_scaler" = 1e-8; + } + omega_ratio_for_ci :< 10000; + omega_ratio_for_ci :>0; + omega_ratio_for_ci = ^"fel.beta_scaler_test" / ^"fel.alpha_scaler"; - ^"fel.beta_scaler_test" := omega_ratio_for_ci * ^"fel.alpha_scaler"; - ci = parameters.GetProfileCI (&omega_ratio_for_ci, lf, 0.95); - estimators.RestoreLFStateFromSnapshot (lf, lf_stash); - } else { - ci = parameters.GetProfileCI ("fel.beta_scaler_test", lf, 0.95); + ^"fel.beta_scaler_test" := omega_ratio_for_ci * ^"fel.alpha_scaler"; + ci = parameters.GetProfileCI (&omega_ratio_for_ci, lf, 0.95); + estimators.RestoreLFStateFromSnapshot (lf, lf_stash); + } else { + ci = parameters.GetProfileCI ("fel.beta_scaler_test", lf, 0.95); + } } + } else { + ci = None; } if (sim_mode) { @@ -456,8 +458,10 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod Null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0]; } - + if (!sim_mode) { + tree_name = (lfInfo["Trees"])[0]; + sum = (BranchLength (^tree_name, -1)*^"fel.selected_branches_index")[0]; if (^"fel.resample") { N = ^"fel.resample"; sims = {}; @@ -477,9 +481,11 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod return {utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.Null"): Null, utility.getGlobalValue("terms.simulated"): null_LRT, utility.getGlobalValue("terms.confidence_interval"): ci}; } } - + + return {utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.Null"): Null, + utility.getGlobalValue("terms.math.sum") : sum, utility.getGlobalValue("terms.confidence_interval"): ci}; } @@ -603,17 +609,19 @@ lfunction fel.store_results (node, result, arguments) { result_row [4] = lrt [utility.getGlobalValue("terms.p_value")]; - sum = 0; + /*alternative_lengths = ((result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.branch_length")])[0]; + + tname = utility.getGlobalValue("terms.tree_attributes.test"); + for (_node_; in; ^"fel.case_respecting_node_names") { + _node_class_ = ((^"fel.selected_branches")[partition_index])[_node_]; + if (_node_class_ == tname) { + sum += ((alternative_lengths)[_node_])[^"terms.fit.MLE"]; + } + }*/ - utility.ForEach (^"fel.case_respecting_node_names", "_node_", - '_node_class_ = ((^"fel.selected_branches")[`&partition_index`])[_node_]; - if (_node_class_ == utility.getGlobalValue("terms.tree_attributes.test")) { - `&sum` += ((`&alternative_lengths`)[_node_])[utility.getGlobalValue("terms.fit.MLE")]; - } - ');*/ - - result_row [5] = sum; + + result_row [5] = result[^"terms.math.sum"]; if (None != result [^"terms.confidence_interval"]) { result_row [6] = (result [^"terms.confidence_interval"])[^"terms.lower_bound"]; @@ -659,17 +667,22 @@ for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.par // beta = beta_scaler_test * branch_length or beta_nuisance_test * branch_length SetParameter (DEFER_CONSTRAINT_APPLICATION, 1, 0); + fel.selected_branches_index = {1,utility.Array1D (fel.case_respecting_node_names)+1}; + fel.i = 0; for (_node_; in; fel.case_respecting_node_names) { _node_class_ = (fel.selected_branches[fel.partition_index])[_node_]; if (_node_class_ == terms.tree_attributes.test) { + fel.selected_branches_index [fel.i] = 1; _beta_scaler = fel.scalers[1]; } else { _beta_scaler = fel.scalers[2]; } fel.apply_proportional_site_constraint ("fel.site_tree", _node_, fel.alpha, fel.beta, fel.scalers[0], _beta_scaler, (( fel.final_partitioned_mg_results[terms.branch_length])[fel.partition_index])[_node_]); + fel.i += 1; } - + + /* utility.ForEach (fel.case_respecting_node_names, "_node_", '_node_class_ = (fel.selected_branches[fel.partition_index])[_node_]; @@ -704,7 +717,7 @@ for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.par fel.queue = mpi.CreateQueue ({"LikelihoodFunctions": {{"fel.site_likelihood"}}, "Models" : {{"fel.site.mg_rev"}}, "Headers" : {{"libv3/all-terms.bf","libv3/tasks/alignments.bf"}}, - "Variables" : {{"fel.srv","fel.resample","fel.ci"}} + "Variables" : {{"fel.srv","fel.resample","fel.ci","fel.selected_branches_index"}} }); @@ -767,8 +780,12 @@ io.ReportProgressMessageMD ("fel", "results", "** Found _" + fel.report.counts[0 selection.io.stopTimer (fel.json [terms.json.timers], "Total time"); selection.io.stopTimer (fel.json [terms.json.timers], "FEL analysis"); -if (fel.resamples) { - fel.json [terms.simulated] = fel.resamples; +if (fel.resample) { + fel.json [terms.simulated] = fel.resample; +} + +if (fel.ci) { + fel.json [terms.confidence_interval] = TRUE; } GetString (_hpv,HYPHY_VERSION,0); diff --git a/res/TemplateBatchFiles/libv3/IOFunctions.bf b/res/TemplateBatchFiles/libv3/IOFunctions.bf index c86962bf9..e780d658b 100644 --- a/res/TemplateBatchFiles/libv3/IOFunctions.bf +++ b/res/TemplateBatchFiles/libv3/IOFunctions.bf @@ -148,7 +148,7 @@ lfunction io.SpoolJSON(json, file) { * @param file */ lfunction io.ParseJSON(file_path) { - fscanf(file_path, "Raw", test); + fscanf(file_path, REWIND, "Raw", test); parsed_test = Eval(test); return parsed_test; } From 9ee9471837df12f68d001a885facad0fc971b2ea Mon Sep 17 00:00:00 2001 From: Steven Weaver Date: Sun, 26 Sep 2021 09:16:24 -0400 Subject: [PATCH 6/6] fixes #1407: honor output argument FUBAR --- res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf b/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf index 3af3ff8a6..df9267d3b 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf @@ -139,7 +139,6 @@ selection.io.startTimer (fubar.json [terms.json.timers], "Overall", 0); fubar.path.base = (fubar.json [terms.json.input])[terms.json.file]; KeywordArgument ("cache", "Save FUBAR cache to [default is alignment+.FUBAR.cache]", fubar.path.base + ".FUBAR.cache"); -KeywordArgument ("output", "Save FUBAR results (JSON) to [default is alignment+.FUBAR.json]", fubar.codon_data_info[terms.data.file] + ".FUBAR.json"); /*------------------------------------------------------------------------------ @@ -149,6 +148,9 @@ KeywordArgument ("output", "Save FUBAR results (JSON) to [default is alignment fubar.path.cache = io.PromptUserForString ("Save FUBAR cache to"); fubar.cache = io.LoadCacheFromFile (fubar.path.cache); +KeywordArgument ("output", "Save FUBAR results (JSON) to [default is alignment+.FUBAR.json]", fubar.codon_data_info[terms.data.file] + ".FUBAR.json"); +fubar.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to"); + fubar.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width: 16, terms.table_options.align : "center"}; console.log ( "> FUBAR will write cache and result files to _`fubar.path.base`.FUBAR.cache_ and _`fubar.path.base`.FUBAR.json_, respectively \n\n");